aboutsummaryrefslogtreecommitdiffstats
path: root/apps
diff options
context:
space:
mode:
authorMohamedBassem <me@mbassem.com>2024-04-11 15:03:31 +0300
committerMohamedBassem <me@mbassem.com>2024-04-11 15:03:31 +0300
commit238c2967b269ca0f66d8e759c6a0234107e1fd1e (patch)
tree1e8590a829bd6b24950ac56eb0a21450c8ce3332 /apps
parentbe622e5594ecb21c82bb6066a82c86e0917bcc35 (diff)
downloadkarakeep-238c2967b269ca0f66d8e759c6a0234107e1fd1e.tar.zst
fix: Increase default navigation timeout to 30s, make it configurable and add retries to crawling jobs
Diffstat (limited to 'apps')
-rw-r--r--apps/workers/crawlerWorker.ts2
-rw-r--r--apps/workers/utils.ts1
2 files changed, 1 insertions, 2 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts
index c9a1189c..eec8cd98 100644
--- a/apps/workers/crawlerWorker.ts
+++ b/apps/workers/crawlerWorker.ts
@@ -170,7 +170,7 @@ async function crawlPage(jobId: string, url: string) {
const page = await context.newPage();
await page.goto(url, {
- timeout: 10000, // 10 seconds
+ timeout: serverConfig.crawler.navigateTimeoutSec * 1000,
});
logger.info(
`[Crawler][${jobId}] Successfully navigated to "${url}". Waiting for the page to load ...`,
diff --git a/apps/workers/utils.ts b/apps/workers/utils.ts
index f8c48408..8e69dcd2 100644
--- a/apps/workers/utils.ts
+++ b/apps/workers/utils.ts
@@ -26,7 +26,6 @@ export async function readPDFText(buffer: Buffer): Promise<{
const pdfParser = new PDFParser(null, 1);
pdfParser.on("pdfParser_dataError", reject);
pdfParser.on("pdfParser_dataReady", (pdfData) => {
- // eslint-disable-next-line
resolve({
// The type isn't set correctly, reference : https://github.com/modesty/pdf2json/issues/327
// eslint-disable-next-line