diff options
| author | MohamedBassem <me@mbassem.com> | 2024-04-19 22:32:57 +0100 |
|---|---|---|
| committer | Mohamed Bassem <me@mbassem.com> | 2024-04-20 00:05:31 +0100 |
| commit | 4c589d4c89f0fab97a14f02095e75335f08cc38e (patch) | |
| tree | 659ceffb892733df47fc849f6f894eb6c0d8aa02 /apps/workers/crawlerWorker.ts | |
| parent | 4402e6f04170cbb0613d35fe94471162253e91b2 (diff) | |
| download | karakeep-4c589d4c89f0fab97a14f02095e75335f08cc38e.tar.zst | |
feature: Allow recrawling bookmarks without running inference jobs
Diffstat (limited to 'apps/workers/crawlerWorker.ts')
| -rw-r--r-- | apps/workers/crawlerWorker.ts | 36 |
1 files changed, 29 insertions, 7 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index 27e9e14c..890127c6 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -25,7 +25,7 @@ import { withTimeout } from "utils"; import type { ZCrawlLinkRequest } from "@hoarder/shared/queues"; import { db } from "@hoarder/db"; import { bookmarkLinks, bookmarks } from "@hoarder/db/schema"; -import { newAssetId, saveAsset } from "@hoarder/shared/assetdb"; +import { deleteAsset, newAssetId, saveAsset } from "@hoarder/shared/assetdb"; import serverConfig from "@hoarder/shared/config"; import logger from "@hoarder/shared/logger"; import { @@ -165,7 +165,12 @@ async function getBookmarkDetails(bookmarkId: string) { if (!bookmark || !bookmark.link) { throw new Error("The bookmark either doesn't exist or not a link"); } - return { url: bookmark.link.url, userId: bookmark.userId }; + return { + url: bookmark.link.url, + userId: bookmark.userId, + screenshotAssetId: bookmark.link.screenshotAssetId, + imageAssetId: bookmark.link.imageAssetId, + }; } /** @@ -332,7 +337,12 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) { } const { bookmarkId } = request.data; - const { url, userId } = await getBookmarkDetails(bookmarkId); + const { + url, + userId, + screenshotAssetId: oldScreenshotAssetId, + imageAssetId: oldImageAssetId, + } = await getBookmarkDetails(bookmarkId); logger.info( `[Crawler][${jobId}] Will crawl "${url}" for link with id "${bookmarkId}"`, @@ -371,10 +381,22 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) { }) .where(eq(bookmarkLinks.id, bookmarkId)); - // Enqueue openai job - OpenAIQueue.add("openai", { - bookmarkId, - }); + // Delete the old assets if any + await Promise.all([ + oldScreenshotAssetId + ? deleteAsset({ userId, assetId: oldScreenshotAssetId }).catch(() => ({})) + : {}, + oldImageAssetId + ? deleteAsset({ userId, assetId: oldImageAssetId }).catch(() => ({})) + : {}, + ]); + + // Enqueue openai job (if not set, assume it's true for backward compatibility) + if (job.data.runInference !== false) { + OpenAIQueue.add("openai", { + bookmarkId, + }); + } // Update the search index SearchIndexingQueue.add("search_indexing", { |
