aboutsummaryrefslogtreecommitdiffstats
path: root/packages/workers
diff options
context:
space:
mode:
authorMohamedBassem <me@mbassem.com>2024-02-17 13:35:16 +0000
committerMohamedBassem <me@mbassem.com>2024-02-17 13:35:16 +0000
commite247b141a98ab7b55d54ca5c7be8347fd076bda2 (patch)
treec531b93ad3451943ad288452cc72ef65190b13c2 /packages/workers
parent561bef94d9229b3125228d7a6110847a4899591d (diff)
downloadkarakeep-e247b141a98ab7b55d54ca5c7be8347fd076bda2.tar.zst
fix: Let the crawler wait a bit more for page load
Diffstat (limited to 'packages/workers')
-rw-r--r--packages/workers/crawler.ts14
1 files changed, 12 insertions, 2 deletions
diff --git a/packages/workers/crawler.ts b/packages/workers/crawler.ts
index 353f9056..4ba6aedc 100644
--- a/packages/workers/crawler.ts
+++ b/packages/workers/crawler.ts
@@ -27,6 +27,7 @@ import metascraperTwitter from "metascraper-twitter";
import metascraperReadability from "metascraper-readability";
import { Mutex } from "async-mutex";
import assert from "assert";
+import serverConfig from "@remember/shared/config";
const metascraperParser = metascraper([
metascraperReadability(),
@@ -46,7 +47,7 @@ const browserMutex = new Mutex();
async function launchBrowser() {
browser = undefined;
await browserMutex.runExclusive(async () => {
- browser = await puppeteer.launch({ headless: true });
+ browser = await puppeteer.launch({ headless: serverConfig.crawler.headlessBrowser });
browser.on("disconnected", async () => {
logger.info(
"The puppeteer browser got disconnected. Will attempt to launch it again.",
@@ -105,9 +106,18 @@ async function crawlPage(url: string) {
await page.goto(url, {
timeout: 10000, // 10 seconds
- waitUntil: "networkidle2",
});
+ // Wait until there's at most two connections for 2 seconds
+ // Attempt to wait only for 5 seconds
+ await Promise.race([
+ page.waitForNetworkIdle({
+ idleTime: 1000, // 1 sec
+ concurrency: 2,
+ }),
+ new Promise((f) => setTimeout(f, 5000)),
+ ]);
+
const htmlContent = await page.content();
return htmlContent;
} finally {