From 79321f83293bc37d37af4b0a0b2bd324f5bafe1a Mon Sep 17 00:00:00 2001 From: MohamedBassem Date: Tue, 2 Apr 2024 12:58:33 +0100 Subject: fix(workers): Add a timeout to the crawling job to prevent it from getting stuck. Fixes #63 --- apps/workers/crawlerWorker.ts | 3 ++- apps/workers/utils.ts | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 apps/workers/utils.ts diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index 87b785ad..a969ab86 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -20,6 +20,7 @@ import metascraperUrl from "metascraper-url"; import puppeteer from "puppeteer-extra"; import AdblockerPlugin from "puppeteer-extra-plugin-adblocker"; import StealthPlugin from "puppeteer-extra-plugin-stealth"; +import { withTimeout } from "utils"; import type { ZCrawlLinkRequest } from "@hoarder/shared/queues"; import { db } from "@hoarder/db"; @@ -110,7 +111,7 @@ export class CrawlerWorker { logger.info("Starting crawler worker ..."); const worker = new Worker( LinkCrawlerQueue.name, - runCrawler, + withTimeout(runCrawler, /* timeoutSec */ 30), { connection: queueConnectionDetails, autorun: false, diff --git a/apps/workers/utils.ts b/apps/workers/utils.ts new file mode 100644 index 00000000..2f56d3f0 --- /dev/null +++ b/apps/workers/utils.ts @@ -0,0 +1,16 @@ +export function withTimeout( + func: (param: T) => Promise, + timeoutSec: number, +) { + return async (param: T): Promise => { + return await Promise.race([ + func(param), + new Promise((_resolve, reject) => + setTimeout( + () => reject(new Error(`Timed-out after ${timeoutSec} secs`)), + timeoutSec * 1000, + ), + ), + ]); + }; +} -- cgit v1.2.3-70-g09d2