aboutsummaryrefslogtreecommitdiffstats
path: root/apps/workers/crawlerWorker.ts
diff options
context:
space:
mode:
authorMohamedBassem <me@mbassem.com>2024-04-19 22:32:57 +0100
committerMohamed Bassem <me@mbassem.com>2024-04-20 00:05:31 +0100
commit4c589d4c89f0fab97a14f02095e75335f08cc38e (patch)
tree659ceffb892733df47fc849f6f894eb6c0d8aa02 /apps/workers/crawlerWorker.ts
parent4402e6f04170cbb0613d35fe94471162253e91b2 (diff)
downloadkarakeep-4c589d4c89f0fab97a14f02095e75335f08cc38e.tar.zst
feature: Allow recrawling bookmarks without running inference jobs
Diffstat (limited to 'apps/workers/crawlerWorker.ts')
-rw-r--r--apps/workers/crawlerWorker.ts36
1 files changed, 29 insertions, 7 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts
index 27e9e14c..890127c6 100644
--- a/apps/workers/crawlerWorker.ts
+++ b/apps/workers/crawlerWorker.ts
@@ -25,7 +25,7 @@ import { withTimeout } from "utils";
import type { ZCrawlLinkRequest } from "@hoarder/shared/queues";
import { db } from "@hoarder/db";
import { bookmarkLinks, bookmarks } from "@hoarder/db/schema";
-import { newAssetId, saveAsset } from "@hoarder/shared/assetdb";
+import { deleteAsset, newAssetId, saveAsset } from "@hoarder/shared/assetdb";
import serverConfig from "@hoarder/shared/config";
import logger from "@hoarder/shared/logger";
import {
@@ -165,7 +165,12 @@ async function getBookmarkDetails(bookmarkId: string) {
if (!bookmark || !bookmark.link) {
throw new Error("The bookmark either doesn't exist or not a link");
}
- return { url: bookmark.link.url, userId: bookmark.userId };
+ return {
+ url: bookmark.link.url,
+ userId: bookmark.userId,
+ screenshotAssetId: bookmark.link.screenshotAssetId,
+ imageAssetId: bookmark.link.imageAssetId,
+ };
}
/**
@@ -332,7 +337,12 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) {
}
const { bookmarkId } = request.data;
- const { url, userId } = await getBookmarkDetails(bookmarkId);
+ const {
+ url,
+ userId,
+ screenshotAssetId: oldScreenshotAssetId,
+ imageAssetId: oldImageAssetId,
+ } = await getBookmarkDetails(bookmarkId);
logger.info(
`[Crawler][${jobId}] Will crawl "${url}" for link with id "${bookmarkId}"`,
@@ -371,10 +381,22 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) {
})
.where(eq(bookmarkLinks.id, bookmarkId));
- // Enqueue openai job
- OpenAIQueue.add("openai", {
- bookmarkId,
- });
+ // Delete the old assets if any
+ await Promise.all([
+ oldScreenshotAssetId
+ ? deleteAsset({ userId, assetId: oldScreenshotAssetId }).catch(() => ({}))
+ : {},
+ oldImageAssetId
+ ? deleteAsset({ userId, assetId: oldImageAssetId }).catch(() => ({}))
+ : {},
+ ]);
+
+ // Enqueue openai job (if not set, assume it's true for backward compatibility)
+ if (job.data.runInference !== false) {
+ OpenAIQueue.add("openai", {
+ bookmarkId,
+ });
+ }
// Update the search index
SearchIndexingQueue.add("search_indexing", {