diff options
| author | MohamedBassem <me@mbassem.com> | 2024-04-11 15:03:31 +0300 |
|---|---|---|
| committer | MohamedBassem <me@mbassem.com> | 2024-04-11 15:03:31 +0300 |
| commit | 238c2967b269ca0f66d8e759c6a0234107e1fd1e (patch) | |
| tree | 1e8590a829bd6b24950ac56eb0a21450c8ce3332 /apps | |
| parent | be622e5594ecb21c82bb6066a82c86e0917bcc35 (diff) | |
| download | karakeep-238c2967b269ca0f66d8e759c6a0234107e1fd1e.tar.zst | |
fix: Increase default navigation timeout to 30s, make it configurable and add retries to crawling jobs
Diffstat (limited to 'apps')
| -rw-r--r-- | apps/workers/crawlerWorker.ts | 2 | ||||
| -rw-r--r-- | apps/workers/utils.ts | 1 |
2 files changed, 1 insertions, 2 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index c9a1189c..eec8cd98 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -170,7 +170,7 @@ async function crawlPage(jobId: string, url: string) { const page = await context.newPage(); await page.goto(url, { - timeout: 10000, // 10 seconds + timeout: serverConfig.crawler.navigateTimeoutSec * 1000, }); logger.info( `[Crawler][${jobId}] Successfully navigated to "${url}". Waiting for the page to load ...`, diff --git a/apps/workers/utils.ts b/apps/workers/utils.ts index f8c48408..8e69dcd2 100644 --- a/apps/workers/utils.ts +++ b/apps/workers/utils.ts @@ -26,7 +26,6 @@ export async function readPDFText(buffer: Buffer): Promise<{ const pdfParser = new PDFParser(null, 1); pdfParser.on("pdfParser_dataError", reject); pdfParser.on("pdfParser_dataReady", (pdfData) => { - // eslint-disable-next-line resolve({ // The type isn't set correctly, reference : https://github.com/modesty/pdf2json/issues/327 // eslint-disable-next-line |
