From a91aff23eaa3616aec331ef0267863ed0b0e594b Mon Sep 17 00:00:00 2001 From: MohamedBassem Date: Fri, 26 Apr 2024 11:06:54 +0100 Subject: feature(crawler): Allow increasing crawler concurrency and configure storing images and screenshots --- apps/workers/crawlerWorker.ts | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'apps/workers/crawlerWorker.ts') diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index b583864d..6b4d39f0 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -123,6 +123,7 @@ export class CrawlerWorker { /* timeoutSec */ serverConfig.crawler.jobTimeoutSec, ), { + concurrency: serverConfig.crawler.numWorkers, connection: queueConnectionDetails, autorun: false, }, @@ -282,6 +283,12 @@ async function storeScreenshot( userId: string, jobId: string, ) { + if (!serverConfig.crawler.storeScreenshot) { + logger.info( + `[Crawler][${jobId}] Skipping storing the screenshot as per the config.`, + ); + return null; + } const assetId = newAssetId(); await saveAsset({ userId, @@ -300,6 +307,12 @@ async function downloadAndStoreImage( userId: string, jobId: string, ) { + if (!serverConfig.crawler.downloadBannerImage) { + logger.info( + `[Crawler][${jobId}] Skipping downloading the image as per the config.`, + ); + return null; + } try { logger.info(`[Crawler][${jobId}] Downloading image from "${url}"`); const response = await fetch(url); -- cgit v1.2.3-70-g09d2