diff options
| author | MohamedBassem <me@mbassem.com> | 2024-04-09 19:41:21 +0100 |
|---|---|---|
| committer | MohamedBassem <me@mbassem.com> | 2024-04-09 19:41:21 +0100 |
| commit | a9242a56d909a61ba6d51e531763294edb6f049c (patch) | |
| tree | c31fc27788602faa4196cc65899da228894434d0 /apps/workers/crawlerWorker.ts | |
| parent | f696d33b645c53369ec5833593bdc77c9fb7cea9 (diff) | |
| download | karakeep-a9242a56d909a61ba6d51e531763294edb6f049c.tar.zst | |
fix(crawler): Skip validating URLs in metascrapper as it was already being validated. Fixes #22
Diffstat (limited to 'apps/workers/crawlerWorker.ts')
| -rw-r--r-- | apps/workers/crawlerWorker.ts | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index cce409e5..c9a1189c 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -222,6 +222,9 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) { const meta = await metascraperParser({ url, html: htmlContent, + // We don't want to validate the URL again as we've already done it by visiting the page. + // This was added because URL validation fails if the URL ends with a question mark (e.g. empty query params). + validateUrl: false, }); logger.info(`[Crawler][${jobId}] Done parsing the content of the page.`); |
