From 4e9544b0c753b7fa01c56710a0d151e3a9f977e0 Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sat, 19 Jul 2025 11:01:18 +0000 Subject: feat: Allow setting browserless crawling per user --- apps/workers/workers/crawlerWorker.ts | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'apps') diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts index 05bce103..140a3701 100644 --- a/apps/workers/workers/crawlerWorker.ts +++ b/apps/workers/workers/crawlerWorker.ts @@ -39,6 +39,7 @@ import { bookmarkAssets, bookmarkLinks, bookmarks, + users, } from "@karakeep/db/schema"; import { ASSET_TYPES, @@ -350,6 +351,7 @@ async function browserlessCrawlPage( async function crawlPage( jobId: string, url: string, + userId: string, abortSignal: AbortSignal, ): Promise<{ htmlContent: string; @@ -357,6 +359,22 @@ async function crawlPage( statusCode: number; url: string; }> { + // Check user's browser crawling setting + const userData = await db.query.users.findFirst({ + where: eq(users.id, userId), + columns: { browserCrawlingEnabled: true }, + }); + if (!userData) { + logger.error(`[Crawler][${jobId}] User ${userId} not found`); + throw new Error(`User ${userId} not found`); + } + + const browserCrawlingEnabled = userData.browserCrawlingEnabled; + + if (browserCrawlingEnabled !== null && !browserCrawlingEnabled) { + return browserlessCrawlPage(jobId, url, abortSignal); + } + let browser: Browser | undefined; if (serverConfig.crawler.browserConnectOnDemand) { browser = await startBrowserInstance(); @@ -876,7 +894,7 @@ async function crawlAndParseUrl( url, }; } else { - result = await crawlPage(jobId, url, abortSignal); + result = await crawlPage(jobId, url, userId, abortSignal); } abortSignal.throwIfAborted(); -- cgit v1.2.3-70-g09d2