From f7920bdc94d97a6a94477f49e145432607b94951 Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Mon, 29 Dec 2025 13:26:38 +0200 Subject: fix: reset tagging status on crawl failure (#2316) * feat: add the ability to specify a different changelog version * fix: reset tagging status on crawl failure * fix missing crawlStatus in loadMulti --- apps/workers/workers/crawlerWorker.ts | 52 +++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 15 deletions(-) (limited to 'apps/workers') diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts index 95c91002..411f615a 100644 --- a/apps/workers/workers/crawlerWorker.ts +++ b/apps/workers/workers/crawlerWorker.ts @@ -9,7 +9,7 @@ import { PlaywrightBlocker } from "@ghostery/adblocker-playwright"; import { Readability } from "@mozilla/readability"; import { Mutex } from "async-mutex"; import DOMPurify from "dompurify"; -import { eq } from "drizzle-orm"; +import { and, eq } from "drizzle-orm"; import { execa } from "execa"; import { exitAbortController } from "exit"; import { HttpProxyAgent } from "http-proxy-agent"; @@ -332,7 +332,12 @@ export class CrawlerWorker { logger.info(`[Crawler][${jobId}] Completed successfully`); const bookmarkId = job.data.bookmarkId; if (bookmarkId) { - await changeBookmarkStatus(bookmarkId, "success"); + await db + .update(bookmarkLinks) + .set({ + crawlStatus: "success", + }) + .where(eq(bookmarkLinks.id, bookmarkId)); } }, onError: async (job) => { @@ -346,7 +351,36 @@ export class CrawlerWorker { ); const bookmarkId = job.data?.bookmarkId; if (bookmarkId && job.numRetriesLeft == 0) { - await changeBookmarkStatus(bookmarkId, "failure"); + await db.transaction(async (tx) => { + await tx + .update(bookmarkLinks) + .set({ + crawlStatus: "failure", + }) + .where(eq(bookmarkLinks.id, bookmarkId)); + await tx + .update(bookmarks) + .set({ + taggingStatus: null, + }) + .where( + and( + eq(bookmarks.id, bookmarkId), + eq(bookmarks.taggingStatus, "pending"), + ), + ); + await tx + .update(bookmarks) + .set({ + summarizationStatus: null, + }) + .where( + and( + eq(bookmarks.id, bookmarkId), + eq(bookmarks.summarizationStatus, "pending"), + ), + ); + }); } }, }, @@ -388,18 +422,6 @@ async function loadCookiesFromFile(): Promise { type DBAssetType = typeof assets.$inferInsert; -async function changeBookmarkStatus( - bookmarkId: string, - crawlStatus: "success" | "failure", -) { - await db - .update(bookmarkLinks) - .set({ - crawlStatus, - }) - .where(eq(bookmarkLinks.id, bookmarkId)); -} - async function browserlessCrawlPage( jobId: string, url: string, -- cgit v1.2.3-70-g09d2