From 9a950e1068a7309d0cb36ffd33ecd2cd0af5c004 Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Wed, 25 Dec 2024 23:53:46 +0000 Subject: refactor: Move asset preprocessing to its own worker out of the inference worker --- apps/workers/crawlerWorker.ts | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'apps/workers/crawlerWorker.ts') diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index 208de44b..252da3b2 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -49,6 +49,7 @@ import { import serverConfig from "@hoarder/shared/config"; import logger from "@hoarder/shared/logger"; import { + AssetPreprocessingQueue, LinkCrawlerQueue, OpenAIQueue, triggerSearchReindex, @@ -568,6 +569,9 @@ async function handleAsAssetBookmark( .where(eq(bookmarks.id, bookmarkId)); await trx.delete(bookmarkLinks).where(eq(bookmarkLinks.id, bookmarkId)); }); + await AssetPreprocessingQueue.enqueue({ + bookmarkId, + }); } async function crawlAndParseUrl( @@ -709,9 +713,6 @@ async function runCrawler(job: DequeuedJob) { // Link bookmarks get transformed into asset bookmarks if they point to a supported asset instead of a webpage const isPdf = contentType === ASSET_TYPES.APPLICATION_PDF; - let archivalLogic: () => Promise = () => { - return Promise.resolve(); - }; if (isPdf) { await handleAsAssetBookmark(url, "pdf", userId, jobId, bookmarkId); } else if ( @@ -721,7 +722,7 @@ async function runCrawler(job: DequeuedJob) { ) { await handleAsAssetBookmark(url, "image", userId, jobId, bookmarkId); } else { - archivalLogic = await crawlAndParseUrl( + const archivalLogic = await crawlAndParseUrl( url, userId, jobId, @@ -731,21 +732,21 @@ async function runCrawler(job: DequeuedJob) { oldFullPageArchiveAssetId, archiveFullPage, ); - } - // Enqueue openai job (if not set, assume it's true for backward compatibility) - if (job.data.runInference !== false) { - await OpenAIQueue.enqueue({ - bookmarkId, - }); - } + // Enqueue openai job (if not set, assume it's true for backward compatibility) + if (job.data.runInference !== false) { + await OpenAIQueue.enqueue({ + bookmarkId, + }); + } - // Update the search index - await triggerSearchReindex(bookmarkId); + // Update the search index + await triggerSearchReindex(bookmarkId); - // Trigger a potential download of a video from the URL - await triggerVideoWorker(bookmarkId, url); + // Trigger a potential download of a video from the URL + await triggerVideoWorker(bookmarkId, url); - // Do the archival as a separate last step as it has the potential for failure - await archivalLogic(); + // Do the archival as a separate last step as it has the potential for failure + await archivalLogic(); + } } -- cgit v1.2.3-70-g09d2