From 4c589d4c89f0fab97a14f02095e75335f08cc38e Mon Sep 17 00:00:00 2001 From: MohamedBassem Date: Fri, 19 Apr 2024 22:32:57 +0100 Subject: feature: Allow recrawling bookmarks without running inference jobs --- apps/web/app/dashboard/admin/page.tsx | 16 ++++++++++++++-- apps/workers/crawlerWorker.ts | 36 ++++++++++++++++++++++++++++------- packages/shared/queues.ts | 1 + packages/trpc/routers/admin.ts | 2 ++ 4 files changed, 46 insertions(+), 9 deletions(-) diff --git a/apps/web/app/dashboard/admin/page.tsx b/apps/web/app/dashboard/admin/page.tsx index 65ac44e1..43c48b44 100644 --- a/apps/web/app/dashboard/admin/page.tsx +++ b/apps/web/app/dashboard/admin/page.tsx @@ -103,7 +103,9 @@ function ActionsSection() { className="lg:w-1/2" variant="destructive" loading={isRecrawlPending} - onClick={() => recrawlLinks({ crawlStatus: "failure" })} + onClick={() => + recrawlLinks({ crawlStatus: "failure", runInference: true }) + } > Recrawl Failed Links Only @@ -111,10 +113,20 @@ function ActionsSection() { className="lg:w-1/2" variant="destructive" loading={isRecrawlPending} - onClick={() => recrawlLinks({ crawlStatus: "all" })} + onClick={() => recrawlLinks({ crawlStatus: "all", runInference: true })} > Recrawl All Links + + recrawlLinks({ crawlStatus: "all", runInference: false }) + } + > + Recrawl All Links (Without Inference) + ) { } const { bookmarkId } = request.data; - const { url, userId } = await getBookmarkDetails(bookmarkId); + const { + url, + userId, + screenshotAssetId: oldScreenshotAssetId, + imageAssetId: oldImageAssetId, + } = await getBookmarkDetails(bookmarkId); logger.info( `[Crawler][${jobId}] Will crawl "${url}" for link with id "${bookmarkId}"`, @@ -371,10 +381,22 @@ async function runCrawler(job: Job) { }) .where(eq(bookmarkLinks.id, bookmarkId)); - // Enqueue openai job - OpenAIQueue.add("openai", { - bookmarkId, - }); + // Delete the old assets if any + await Promise.all([ + oldScreenshotAssetId + ? deleteAsset({ userId, assetId: oldScreenshotAssetId }).catch(() => ({})) + : {}, + oldImageAssetId + ? deleteAsset({ userId, assetId: oldImageAssetId }).catch(() => ({})) + : {}, + ]); + + // Enqueue openai job (if not set, assume it's true for backward compatibility) + if (job.data.runInference !== false) { + OpenAIQueue.add("openai", { + bookmarkId, + }); + } // Update the search index SearchIndexingQueue.add("search_indexing", { diff --git a/packages/shared/queues.ts b/packages/shared/queues.ts index 6d5fdd5f..6ea89f5e 100644 --- a/packages/shared/queues.ts +++ b/packages/shared/queues.ts @@ -12,6 +12,7 @@ export const queueConnectionDetails = { // Link Crawler export const zCrawlLinkRequestSchema = z.object({ bookmarkId: z.string(), + runInference: z.boolean().optional(), }); export type ZCrawlLinkRequest = z.infer; diff --git a/packages/trpc/routers/admin.ts b/packages/trpc/routers/admin.ts index 8792f7ed..0a0af173 100644 --- a/packages/trpc/routers/admin.ts +++ b/packages/trpc/routers/admin.ts @@ -100,6 +100,7 @@ export const adminAppRouter = router({ .input( z.object({ crawlStatus: z.enum(["success", "failure", "all"]), + runInference: z.boolean(), }), ) .mutation(async ({ ctx, input }) => { @@ -116,6 +117,7 @@ export const adminAppRouter = router({ bookmarkIds.map((b) => LinkCrawlerQueue.add("crawl", { bookmarkId: b.id, + runInference: input.runInference, }), ), ); -- cgit v1.2.3-70-g09d2