From a91aff23eaa3616aec331ef0267863ed0b0e594b Mon Sep 17 00:00:00 2001
From: MohamedBassem <me@mbassem.com>
Date: Fri, 26 Apr 2024 11:06:54 +0100
Subject: feature(crawler): Allow increasing crawler concurrency and configure
 storing images and screenshots

---
 apps/workers/crawlerWorker.ts | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'apps/workers/crawlerWorker.ts')

diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts
index b583864d..6b4d39f0 100644
--- a/apps/workers/crawlerWorker.ts
+++ b/apps/workers/crawlerWorker.ts
@@ -123,6 +123,7 @@ export class CrawlerWorker {
         /* timeoutSec */ serverConfig.crawler.jobTimeoutSec,
       ),
       {
+        concurrency: serverConfig.crawler.numWorkers,
         connection: queueConnectionDetails,
         autorun: false,
       },
@@ -282,6 +283,12 @@ async function storeScreenshot(
   userId: string,
   jobId: string,
 ) {
+  if (!serverConfig.crawler.storeScreenshot) {
+    logger.info(
+      `[Crawler][${jobId}] Skipping storing the screenshot as per the config.`,
+    );
+    return null;
+  }
   const assetId = newAssetId();
   await saveAsset({
     userId,
@@ -300,6 +307,12 @@ async function downloadAndStoreImage(
   userId: string,
   jobId: string,
 ) {
+  if (!serverConfig.crawler.downloadBannerImage) {
+    logger.info(
+      `[Crawler][${jobId}] Skipping downloading the image as per the config.`,
+    );
+    return null;
+  }
   try {
     logger.info(`[Crawler][${jobId}] Downloading image from "${url}"`);
     const response = await fetch(url);
-- 
cgit v1.2.3-70-g09d2