aboutsummaryrefslogtreecommitdiffstats
path: root/apps/workers
diff options
context:
space:
mode:
authorMohamed Bassem <me@mbassem.com>2025-12-29 13:26:38 +0200
committerGitHub <noreply@github.com>2025-12-29 11:26:38 +0000
commitf7920bdc94d97a6a94477f49e145432607b94951 (patch)
tree32003337bea676bfccb89f8ab349663e9b3c5750 /apps/workers
parent1082076133ff185980ba3d6b5a989939ed431e14 (diff)
downloadkarakeep-f7920bdc94d97a6a94477f49e145432607b94951.tar.zst
fix: reset tagging status on crawl failure (#2316)
* feat: add the ability to specify a different changelog version * fix: reset tagging status on crawl failure * fix missing crawlStatus in loadMulti
Diffstat (limited to 'apps/workers')
-rw-r--r--apps/workers/workers/crawlerWorker.ts52
1 files changed, 37 insertions, 15 deletions
diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts
index 95c91002..411f615a 100644
--- a/apps/workers/workers/crawlerWorker.ts
+++ b/apps/workers/workers/crawlerWorker.ts
@@ -9,7 +9,7 @@ import { PlaywrightBlocker } from "@ghostery/adblocker-playwright";
import { Readability } from "@mozilla/readability";
import { Mutex } from "async-mutex";
import DOMPurify from "dompurify";
-import { eq } from "drizzle-orm";
+import { and, eq } from "drizzle-orm";
import { execa } from "execa";
import { exitAbortController } from "exit";
import { HttpProxyAgent } from "http-proxy-agent";
@@ -332,7 +332,12 @@ export class CrawlerWorker {
logger.info(`[Crawler][${jobId}] Completed successfully`);
const bookmarkId = job.data.bookmarkId;
if (bookmarkId) {
- await changeBookmarkStatus(bookmarkId, "success");
+ await db
+ .update(bookmarkLinks)
+ .set({
+ crawlStatus: "success",
+ })
+ .where(eq(bookmarkLinks.id, bookmarkId));
}
},
onError: async (job) => {
@@ -346,7 +351,36 @@ export class CrawlerWorker {
);
const bookmarkId = job.data?.bookmarkId;
if (bookmarkId && job.numRetriesLeft == 0) {
- await changeBookmarkStatus(bookmarkId, "failure");
+ await db.transaction(async (tx) => {
+ await tx
+ .update(bookmarkLinks)
+ .set({
+ crawlStatus: "failure",
+ })
+ .where(eq(bookmarkLinks.id, bookmarkId));
+ await tx
+ .update(bookmarks)
+ .set({
+ taggingStatus: null,
+ })
+ .where(
+ and(
+ eq(bookmarks.id, bookmarkId),
+ eq(bookmarks.taggingStatus, "pending"),
+ ),
+ );
+ await tx
+ .update(bookmarks)
+ .set({
+ summarizationStatus: null,
+ })
+ .where(
+ and(
+ eq(bookmarks.id, bookmarkId),
+ eq(bookmarks.summarizationStatus, "pending"),
+ ),
+ );
+ });
}
},
},
@@ -388,18 +422,6 @@ async function loadCookiesFromFile(): Promise<void> {
type DBAssetType = typeof assets.$inferInsert;
-async function changeBookmarkStatus(
- bookmarkId: string,
- crawlStatus: "success" | "failure",
-) {
- await db
- .update(bookmarkLinks)
- .set({
- crawlStatus,
- })
- .where(eq(bookmarkLinks.id, bookmarkId));
-}
-
async function browserlessCrawlPage(
jobId: string,
url: string,