rcgit

/ karakeep

Diff 4c589d4c89f0

Back to commit

context:
File + - Graph
M apps/web/app/dashboard/admin/page.tsx +14 -2
M apps/workers/crawlerWorker.ts +29 -7
M packages/shared/queues.ts +1 -0
M packages/trpc/routers/admin.ts +2 -0
4 file(s) changed, 46 insertions(+), 9 deletions(-)

apps/web/app/dashboard/admin/page.tsx

diff --git a/apps/web/app/dashboard/admin/page.tsx b/apps/web/app/dashboard/admin/page.tsx
index 65ac44e1..43c48b44 100644
--- a/apps/web/app/dashboard/admin/page.tsx
+++ b/apps/web/app/dashboard/admin/page.tsx
@@ -103,7 +103,9 @@ function ActionsSection() {
         className="lg:w-1/2"
         variant="destructive"
         loading={isRecrawlPending}
-        onClick={() => recrawlLinks({ crawlStatus: "failure" })}
+        onClick={() =>
+          recrawlLinks({ crawlStatus: "failure", runInference: true })
+        }
       >
         Recrawl Failed Links Only
       </ActionButton>
@@ -111,10 +113,20 @@ function ActionsSection() {
         className="lg:w-1/2"
         variant="destructive"
         loading={isRecrawlPending}
-        onClick={() => recrawlLinks({ crawlStatus: "all" })}
+        onClick={() => recrawlLinks({ crawlStatus: "all", runInference: true })}
       >
         Recrawl All Links
       </ActionButton>
+      <ActionButton
+        className="lg:w-1/2"
+        variant="destructive"
+        loading={isRecrawlPending}
+        onClick={() =>
+          recrawlLinks({ crawlStatus: "all", runInference: false })
+        }
+      >
+        Recrawl All Links (Without Inference)
+      </ActionButton>
       <ActionButton
         className="lg:w-1/2"
         variant="destructive"

apps/workers/crawlerWorker.ts

diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts
index 27e9e14c..890127c6 100644
--- a/apps/workers/crawlerWorker.ts
+++ b/apps/workers/crawlerWorker.ts
@@ -25,7 +25,7 @@ import { withTimeout } from "utils";
 import type { ZCrawlLinkRequest } from "@hoarder/shared/queues";
 import { db } from "@hoarder/db";
 import { bookmarkLinks, bookmarks } from "@hoarder/db/schema";
-import { newAssetId, saveAsset } from "@hoarder/shared/assetdb";
+import { deleteAsset, newAssetId, saveAsset } from "@hoarder/shared/assetdb";
 import serverConfig from "@hoarder/shared/config";
 import logger from "@hoarder/shared/logger";
 import {
@@ -165,7 +165,12 @@ async function getBookmarkDetails(bookmarkId: string) {
   if (!bookmark || !bookmark.link) {
     throw new Error("The bookmark either doesn't exist or not a link");
   }
-  return { url: bookmark.link.url, userId: bookmark.userId };
+  return {
+    url: bookmark.link.url,
+    userId: bookmark.userId,
+    screenshotAssetId: bookmark.link.screenshotAssetId,
+    imageAssetId: bookmark.link.imageAssetId,
+  };
 }
 
 /**
@@ -332,7 +337,12 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) {
   }
 
   const { bookmarkId } = request.data;
-  const { url, userId } = await getBookmarkDetails(bookmarkId);
+  const {
+    url,
+    userId,
+    screenshotAssetId: oldScreenshotAssetId,
+    imageAssetId: oldImageAssetId,
+  } = await getBookmarkDetails(bookmarkId);
 
   logger.info(
     `[Crawler][${jobId}] Will crawl "${url}" for link with id "${bookmarkId}"`,
@@ -371,10 +381,22 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) {
     })
     .where(eq(bookmarkLinks.id, bookmarkId));
 
-  // Enqueue openai job
-  OpenAIQueue.add("openai", {
-    bookmarkId,
-  });
+  // Delete the old assets if any
+  await Promise.all([
+    oldScreenshotAssetId
+      ? deleteAsset({ userId, assetId: oldScreenshotAssetId }).catch(() => ({}))
+      : {},
+    oldImageAssetId
+      ? deleteAsset({ userId, assetId: oldImageAssetId }).catch(() => ({}))
+      : {},
+  ]);
+
+  // Enqueue openai job (if not set, assume it's true for backward compatibility)
+  if (job.data.runInference !== false) {
+    OpenAIQueue.add("openai", {
+      bookmarkId,
+    });
+  }
 
   // Update the search index
   SearchIndexingQueue.add("search_indexing", {

packages/shared/queues.ts

diff --git a/packages/shared/queues.ts b/packages/shared/queues.ts
index 6d5fdd5f..6ea89f5e 100644
--- a/packages/shared/queues.ts
+++ b/packages/shared/queues.ts
@@ -12,6 +12,7 @@ export const queueConnectionDetails = {
 // Link Crawler
 export const zCrawlLinkRequestSchema = z.object({
   bookmarkId: z.string(),
+  runInference: z.boolean().optional(),
 });
 export type ZCrawlLinkRequest = z.infer<typeof zCrawlLinkRequestSchema>;
 

packages/trpc/routers/admin.ts

diff --git a/packages/trpc/routers/admin.ts b/packages/trpc/routers/admin.ts
index 8792f7ed..0a0af173 100644
--- a/packages/trpc/routers/admin.ts
+++ b/packages/trpc/routers/admin.ts
@@ -100,6 +100,7 @@ export const adminAppRouter = router({
     .input(
       z.object({
         crawlStatus: z.enum(["success", "failure", "all"]),
+        runInference: z.boolean(),
       }),
     )
     .mutation(async ({ ctx, input }) => {
@@ -116,6 +117,7 @@ export const adminAppRouter = router({
         bookmarkIds.map((b) =>
           LinkCrawlerQueue.add("crawl", {
             bookmarkId: b.id,
+            runInference: input.runInference,
           }),
         ),
       );