diff options
| author | MohamedBassem <me@mbassem.com> | 2024-04-19 22:32:57 +0100 |
|---|---|---|
| committer | Mohamed Bassem <me@mbassem.com> | 2024-04-20 00:05:31 +0100 |
| commit | 4c589d4c89f0fab97a14f02095e75335f08cc38e (patch) | |
| tree | 659ceffb892733df47fc849f6f894eb6c0d8aa02 | |
| parent | 4402e6f04170cbb0613d35fe94471162253e91b2 (diff) | |
| download | karakeep-4c589d4c89f0fab97a14f02095e75335f08cc38e.tar.zst | |
feature: Allow recrawling bookmarks without running inference jobs
| -rw-r--r-- | apps/web/app/dashboard/admin/page.tsx | 16 | ||||
| -rw-r--r-- | apps/workers/crawlerWorker.ts | 36 | ||||
| -rw-r--r-- | packages/shared/queues.ts | 1 | ||||
| -rw-r--r-- | packages/trpc/routers/admin.ts | 2 |
4 files changed, 46 insertions, 9 deletions
diff --git a/apps/web/app/dashboard/admin/page.tsx b/apps/web/app/dashboard/admin/page.tsx index 65ac44e1..43c48b44 100644 --- a/apps/web/app/dashboard/admin/page.tsx +++ b/apps/web/app/dashboard/admin/page.tsx @@ -103,7 +103,9 @@ function ActionsSection() { className="lg:w-1/2" variant="destructive" loading={isRecrawlPending} - onClick={() => recrawlLinks({ crawlStatus: "failure" })} + onClick={() => + recrawlLinks({ crawlStatus: "failure", runInference: true }) + } > Recrawl Failed Links Only </ActionButton> @@ -111,13 +113,23 @@ function ActionsSection() { className="lg:w-1/2" variant="destructive" loading={isRecrawlPending} - onClick={() => recrawlLinks({ crawlStatus: "all" })} + onClick={() => recrawlLinks({ crawlStatus: "all", runInference: true })} > Recrawl All Links </ActionButton> <ActionButton className="lg:w-1/2" variant="destructive" + loading={isRecrawlPending} + onClick={() => + recrawlLinks({ crawlStatus: "all", runInference: false }) + } + > + Recrawl All Links (Without Inference) + </ActionButton> + <ActionButton + className="lg:w-1/2" + variant="destructive" loading={isReindexPending} onClick={() => reindexBookmarks()} > diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index 27e9e14c..890127c6 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -25,7 +25,7 @@ import { withTimeout } from "utils"; import type { ZCrawlLinkRequest } from "@hoarder/shared/queues"; import { db } from "@hoarder/db"; import { bookmarkLinks, bookmarks } from "@hoarder/db/schema"; -import { newAssetId, saveAsset } from "@hoarder/shared/assetdb"; +import { deleteAsset, newAssetId, saveAsset } from "@hoarder/shared/assetdb"; import serverConfig from "@hoarder/shared/config"; import logger from "@hoarder/shared/logger"; import { @@ -165,7 +165,12 @@ async function getBookmarkDetails(bookmarkId: string) { if (!bookmark || !bookmark.link) { throw new Error("The bookmark either doesn't exist or not a link"); } - return { url: bookmark.link.url, userId: bookmark.userId }; + return { + url: bookmark.link.url, + userId: bookmark.userId, + screenshotAssetId: bookmark.link.screenshotAssetId, + imageAssetId: bookmark.link.imageAssetId, + }; } /** @@ -332,7 +337,12 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) { } const { bookmarkId } = request.data; - const { url, userId } = await getBookmarkDetails(bookmarkId); + const { + url, + userId, + screenshotAssetId: oldScreenshotAssetId, + imageAssetId: oldImageAssetId, + } = await getBookmarkDetails(bookmarkId); logger.info( `[Crawler][${jobId}] Will crawl "${url}" for link with id "${bookmarkId}"`, @@ -371,10 +381,22 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) { }) .where(eq(bookmarkLinks.id, bookmarkId)); - // Enqueue openai job - OpenAIQueue.add("openai", { - bookmarkId, - }); + // Delete the old assets if any + await Promise.all([ + oldScreenshotAssetId + ? deleteAsset({ userId, assetId: oldScreenshotAssetId }).catch(() => ({})) + : {}, + oldImageAssetId + ? deleteAsset({ userId, assetId: oldImageAssetId }).catch(() => ({})) + : {}, + ]); + + // Enqueue openai job (if not set, assume it's true for backward compatibility) + if (job.data.runInference !== false) { + OpenAIQueue.add("openai", { + bookmarkId, + }); + } // Update the search index SearchIndexingQueue.add("search_indexing", { diff --git a/packages/shared/queues.ts b/packages/shared/queues.ts index 6d5fdd5f..6ea89f5e 100644 --- a/packages/shared/queues.ts +++ b/packages/shared/queues.ts @@ -12,6 +12,7 @@ export const queueConnectionDetails = { // Link Crawler export const zCrawlLinkRequestSchema = z.object({ bookmarkId: z.string(), + runInference: z.boolean().optional(), }); export type ZCrawlLinkRequest = z.infer<typeof zCrawlLinkRequestSchema>; diff --git a/packages/trpc/routers/admin.ts b/packages/trpc/routers/admin.ts index 8792f7ed..0a0af173 100644 --- a/packages/trpc/routers/admin.ts +++ b/packages/trpc/routers/admin.ts @@ -100,6 +100,7 @@ export const adminAppRouter = router({ .input( z.object({ crawlStatus: z.enum(["success", "failure", "all"]), + runInference: z.boolean(), }), ) .mutation(async ({ ctx, input }) => { @@ -116,6 +117,7 @@ export const adminAppRouter = router({ bookmarkIds.map((b) => LinkCrawlerQueue.add("crawl", { bookmarkId: b.id, + runInference: input.runInference, }), ), ); |
