From 68e27adb029cb7bb7b51b8ea594163931a495c61 Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sun, 2 Feb 2025 15:44:48 +0000 Subject: fix: Dont rearchive singlefile uploads and consider them as archives --- apps/workers/crawlerWorker.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'apps/workers') diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index 91adb185..7611494e 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -669,7 +669,8 @@ async function crawlAndParseUrl( .set({ title: meta.title, description: meta.description, - imageUrl: meta.image, + // Don't store data URIs as they're not valid URLs and are usually quite large + imageUrl: meta.image?.startsWith("data:") ? null : meta.image, favicon: meta.logo, content: readableContent?.textContent, htmlContent: readableContent?.content, @@ -705,7 +706,10 @@ async function crawlAndParseUrl( ]); return async () => { - if (serverConfig.crawler.fullPageArchive || archiveFullPage) { + if ( + !precrawledArchiveAssetId && + (serverConfig.crawler.fullPageArchive || archiveFullPage) + ) { const { assetId: fullPageArchiveAssetId, size, -- cgit v1.2.3-70-g09d2