aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMohamedBassem <me@mbassem.com>2024-04-09 19:41:21 +0100
committerMohamedBassem <me@mbassem.com>2024-04-09 19:41:21 +0100
commita9242a56d909a61ba6d51e531763294edb6f049c (patch)
treec31fc27788602faa4196cc65899da228894434d0
parentf696d33b645c53369ec5833593bdc77c9fb7cea9 (diff)
downloadkarakeep-a9242a56d909a61ba6d51e531763294edb6f049c.tar.zst
fix(crawler): Skip validating URLs in metascrapper as it was already being validated. Fixes #22
-rw-r--r--apps/workers/crawlerWorker.ts3
1 files changed, 3 insertions, 0 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts
index cce409e5..c9a1189c 100644
--- a/apps/workers/crawlerWorker.ts
+++ b/apps/workers/crawlerWorker.ts
@@ -222,6 +222,9 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) {
const meta = await metascraperParser({
url,
html: htmlContent,
+ // We don't want to validate the URL again as we've already done it by visiting the page.
+ // This was added because URL validation fails if the URL ends with a question mark (e.g. empty query params).
+ validateUrl: false,
});
logger.info(`[Crawler][${jobId}] Done parsing the content of the page.`);