aboutsummaryrefslogtreecommitdiffstats
path: root/apps/workers
diff options
context:
space:
mode:
authorMohamed Bassem <me@mbassem.com>2024-11-23 14:20:17 +0000
committerMohamed Bassem <me@mbassem.com>2024-11-23 14:20:17 +0000
commit6ccdbe59d70057b18b0752f6becb371a14e39424 (patch)
tree2999d0238ae05e72425ce36f5ffc94ed04d60169 /apps/workers
parent378ad9bc157fb7741e09cdb687a97c82c2851578 (diff)
downloadkarakeep-6ccdbe59d70057b18b0752f6becb371a14e39424.tar.zst
fix(workers): Set a timeout on the screenshot call and completely skip it if screenshotting is disabled
Diffstat (limited to 'apps/workers')
-rw-r--r--apps/workers/crawlerWorker.ts45
1 files changed, 32 insertions, 13 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts
index 2dad98b7..376e50ea 100644
--- a/apps/workers/crawlerWorker.ts
+++ b/apps/workers/crawlerWorker.ts
@@ -277,18 +277,31 @@ async function crawlPage(jobId: string, url: string) {
logger.info(`[Crawler][${jobId}] Finished waiting for the page to load.`);
- const [htmlContent, screenshot] = await Promise.all([
- page.content(),
- page.screenshot({
- // If you change this, you need to change the asset type in the store function.
- type: "png",
- encoding: "binary",
- fullPage: serverConfig.crawler.fullPageScreenshot,
- }),
- ]);
- logger.info(
- `[Crawler][${jobId}] Finished capturing page content and a screenshot. FullPageScreenshot: ${serverConfig.crawler.fullPageScreenshot}`,
- );
+ const htmlContent = await page.content();
+ logger.info(`[Crawler][${jobId}] Successfully fetched the page content.`);
+
+ let screenshot: Buffer | undefined = undefined;
+ if (serverConfig.crawler.storeScreenshot) {
+ screenshot = await Promise.race<Buffer | undefined>([
+ page
+ .screenshot({
+ // If you change this, you need to change the asset type in the store function.
+ type: "png",
+ encoding: "binary",
+ fullPage: serverConfig.crawler.fullPageScreenshot,
+ })
+ .catch(() => undefined),
+ new Promise((f) => setTimeout(f, 5000)),
+ ]);
+ if (!screenshot) {
+ logger.warn(`[Crawler][${jobId}] Failed to capture the screenshot.`);
+ } else {
+ logger.info(
+ `[Crawler][${jobId}] Finished capturing page content and a screenshot. FullPageScreenshot: ${serverConfig.crawler.fullPageScreenshot}`,
+ );
+ }
+ }
+
return {
htmlContent,
screenshot,
@@ -336,7 +349,7 @@ function extractReadableContent(
}
async function storeScreenshot(
- screenshot: Buffer,
+ screenshot: Buffer | undefined,
userId: string,
jobId: string,
) {
@@ -346,6 +359,12 @@ async function storeScreenshot(
);
return null;
}
+ if (!screenshot) {
+ logger.info(
+ `[Crawler][${jobId}] Skipping storing the screenshot as it's empty.`,
+ );
+ return null;
+ }
const assetId = newAssetId();
const contentType = "image/png";
const fileName = "screenshot.png";