aboutsummaryrefslogtreecommitdiffstats
path: root/packages/workers/crawler.ts
diff options
context:
space:
mode:
Diffstat (limited to 'packages/workers/crawler.ts')
-rw-r--r--packages/workers/crawler.ts14
1 files changed, 12 insertions, 2 deletions
diff --git a/packages/workers/crawler.ts b/packages/workers/crawler.ts
index 353f9056..4ba6aedc 100644
--- a/packages/workers/crawler.ts
+++ b/packages/workers/crawler.ts
@@ -27,6 +27,7 @@ import metascraperTwitter from "metascraper-twitter";
import metascraperReadability from "metascraper-readability";
import { Mutex } from "async-mutex";
import assert from "assert";
+import serverConfig from "@remember/shared/config";
const metascraperParser = metascraper([
metascraperReadability(),
@@ -46,7 +47,7 @@ const browserMutex = new Mutex();
async function launchBrowser() {
browser = undefined;
await browserMutex.runExclusive(async () => {
- browser = await puppeteer.launch({ headless: true });
+ browser = await puppeteer.launch({ headless: serverConfig.crawler.headlessBrowser });
browser.on("disconnected", async () => {
logger.info(
"The puppeteer browser got disconnected. Will attempt to launch it again.",
@@ -105,9 +106,18 @@ async function crawlPage(url: string) {
await page.goto(url, {
timeout: 10000, // 10 seconds
- waitUntil: "networkidle2",
});
+ // Wait until there's at most two connections for 2 seconds
+ // Attempt to wait only for 5 seconds
+ await Promise.race([
+ page.waitForNetworkIdle({
+ idleTime: 1000, // 1 sec
+ concurrency: 2,
+ }),
+ new Promise((f) => setTimeout(f, 5000)),
+ ]);
+
const htmlContent = await page.content();
return htmlContent;
} finally {