From a9242a56d909a61ba6d51e531763294edb6f049c Mon Sep 17 00:00:00 2001 From: MohamedBassem Date: Tue, 9 Apr 2024 19:41:21 +0100 Subject: fix(crawler): Skip validating URLs in metascrapper as it was already being validated. Fixes #22 --- apps/workers/crawlerWorker.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index cce409e5..c9a1189c 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -222,6 +222,9 @@ async function runCrawler(job: Job) { const meta = await metascraperParser({ url, html: htmlContent, + // We don't want to validate the URL again as we've already done it by visiting the page. + // This was added because URL validation fails if the URL ends with a question mark (e.g. empty query params). + validateUrl: false, }); logger.info(`[Crawler][${jobId}] Done parsing the content of the page.`); -- cgit v1.2.3-70-g09d2