aboutsummaryrefslogtreecommitdiffstats
path: root/apps/workers/crawlerWorker.ts
diff options
context:
space:
mode:
authorMohamedBassem <me@mbassem.com>2024-03-21 03:57:32 +0000
committerMohamedBassem <me@mbassem.com>2024-03-21 03:57:32 +0000
commit5ea1f48e2359c70844c74708a85a2047fd2984b9 (patch)
tree20ffd3de619c0d12b135ef2c42f942168f2e4009 /apps/workers/crawlerWorker.ts
parent13ed0720e8157897f3085ad7f7f721ff6e27017b (diff)
downloadkarakeep-5ea1f48e2359c70844c74708a85a2047fd2984b9.tar.zst
fix(workers): Fix the leaky browser instances in workers during development
Diffstat (limited to 'apps/workers/crawlerWorker.ts')
-rw-r--r--apps/workers/crawlerWorker.ts58
1 files changed, 30 insertions, 28 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts
index ecd8d146..eb4a0697 100644
--- a/apps/workers/crawlerWorker.ts
+++ b/apps/workers/crawlerWorker.ts
@@ -1,40 +1,36 @@
-import logger from "@hoarder/shared/logger";
-import {
- LinkCrawlerQueue,
- OpenAIQueue,
- SearchIndexingQueue,
- ZCrawlLinkRequest,
- queueConnectionDetails,
- zCrawlLinkRequestSchema,
-} from "@hoarder/shared/queues";
+import assert from "assert";
+import { Readability } from "@mozilla/readability";
+import { Mutex } from "async-mutex";
+import { Job, Worker } from "bullmq";
import DOMPurify from "dompurify";
+import { eq } from "drizzle-orm";
+import { isShuttingDown, shutdownPromise } from "exit";
import { JSDOM } from "jsdom";
-
-import { Worker } from "bullmq";
-import { Job } from "bullmq";
-
-import { db } from "@hoarder/db";
-
-import { Browser } from "puppeteer";
-import puppeteer from "puppeteer-extra";
-import StealthPlugin from "puppeteer-extra-plugin-stealth";
-import AdblockerPlugin from "puppeteer-extra-plugin-adblocker";
-
import metascraper from "metascraper";
-
import metascraperDescription from "metascraper-description";
import metascraperImage from "metascraper-image";
import metascraperLogo from "metascraper-logo-favicon";
+import metascraperReadability from "metascraper-readability";
import metascraperTitle from "metascraper-title";
-import metascraperUrl from "metascraper-url";
import metascraperTwitter from "metascraper-twitter";
-import metascraperReadability from "metascraper-readability";
-import { Mutex } from "async-mutex";
-import assert from "assert";
-import serverConfig from "@hoarder/shared/config";
+import metascraperUrl from "metascraper-url";
+import { Browser } from "puppeteer";
+import puppeteer from "puppeteer-extra";
+import AdblockerPlugin from "puppeteer-extra-plugin-adblocker";
+import StealthPlugin from "puppeteer-extra-plugin-stealth";
+
+import { db } from "@hoarder/db";
import { bookmarkLinks } from "@hoarder/db/schema";
-import { eq } from "drizzle-orm";
-import { Readability } from "@mozilla/readability";
+import serverConfig from "@hoarder/shared/config";
+import logger from "@hoarder/shared/logger";
+import {
+ LinkCrawlerQueue,
+ OpenAIQueue,
+ queueConnectionDetails,
+ SearchIndexingQueue,
+ ZCrawlLinkRequest,
+ zCrawlLinkRequestSchema,
+} from "@hoarder/shared/queues";
const metascraperParser = metascraper([
metascraperReadability(),
@@ -60,6 +56,12 @@ async function launchBrowser() {
userDataDir: serverConfig.crawler.browserUserDataDir,
});
browser.on("disconnected", async () => {
+ if (isShuttingDown) {
+ logger.info(
+ "The puppeteer browser got disconnected. But we're shutting down so won't restart it.",
+ );
+ return;
+ }
logger.info(
"The puppeteer browser got disconnected. Will attempt to launch it again.",
);