From 4c589d4c89f0fab97a14f02095e75335f08cc38e Mon Sep 17 00:00:00 2001 From: MohamedBassem Date: Fri, 19 Apr 2024 22:32:57 +0100 Subject: feature: Allow recrawling bookmarks without running inference jobs --- apps/workers/crawlerWorker.ts | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) (limited to 'apps/workers/crawlerWorker.ts') diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index 27e9e14c..890127c6 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -25,7 +25,7 @@ import { withTimeout } from "utils"; import type { ZCrawlLinkRequest } from "@hoarder/shared/queues"; import { db } from "@hoarder/db"; import { bookmarkLinks, bookmarks } from "@hoarder/db/schema"; -import { newAssetId, saveAsset } from "@hoarder/shared/assetdb"; +import { deleteAsset, newAssetId, saveAsset } from "@hoarder/shared/assetdb"; import serverConfig from "@hoarder/shared/config"; import logger from "@hoarder/shared/logger"; import { @@ -165,7 +165,12 @@ async function getBookmarkDetails(bookmarkId: string) { if (!bookmark || !bookmark.link) { throw new Error("The bookmark either doesn't exist or not a link"); } - return { url: bookmark.link.url, userId: bookmark.userId }; + return { + url: bookmark.link.url, + userId: bookmark.userId, + screenshotAssetId: bookmark.link.screenshotAssetId, + imageAssetId: bookmark.link.imageAssetId, + }; } /** @@ -332,7 +337,12 @@ async function runCrawler(job: Job) { } const { bookmarkId } = request.data; - const { url, userId } = await getBookmarkDetails(bookmarkId); + const { + url, + userId, + screenshotAssetId: oldScreenshotAssetId, + imageAssetId: oldImageAssetId, + } = await getBookmarkDetails(bookmarkId); logger.info( `[Crawler][${jobId}] Will crawl "${url}" for link with id "${bookmarkId}"`, @@ -371,10 +381,22 @@ async function runCrawler(job: Job) { }) .where(eq(bookmarkLinks.id, bookmarkId)); - // Enqueue openai job - OpenAIQueue.add("openai", { - bookmarkId, - }); + // Delete the old assets if any + await Promise.all([ + oldScreenshotAssetId + ? deleteAsset({ userId, assetId: oldScreenshotAssetId }).catch(() => ({})) + : {}, + oldImageAssetId + ? deleteAsset({ userId, assetId: oldImageAssetId }).catch(() => ({})) + : {}, + ]); + + // Enqueue openai job (if not set, assume it's true for backward compatibility) + if (job.data.runInference !== false) { + OpenAIQueue.add("openai", { + bookmarkId, + }); + } // Update the search index SearchIndexingQueue.add("search_indexing", { -- cgit v1.2.3-70-g09d2