diff options
| author | Mohamed Bassem <me@mbassem.com> | 2025-02-02 15:44:48 +0000 |
|---|---|---|
| committer | Mohamed Bassem <me@mbassem.com> | 2025-02-02 15:47:21 +0000 |
| commit | 68e27adb029cb7bb7b51b8ea594163931a495c61 (patch) | |
| tree | f5ec56769072c35adb16c43a7a21686eb93477a2 /apps/workers/crawlerWorker.ts | |
| parent | b59fe2ee819acc4c8115c9f6322050e2d1dc2204 (diff) | |
| download | karakeep-68e27adb029cb7bb7b51b8ea594163931a495c61.tar.zst | |
fix: Dont rearchive singlefile uploads and consider them as archives
Diffstat (limited to 'apps/workers/crawlerWorker.ts')
| -rw-r--r-- | apps/workers/crawlerWorker.ts | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index 91adb185..7611494e 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -669,7 +669,8 @@ async function crawlAndParseUrl( .set({ title: meta.title, description: meta.description, - imageUrl: meta.image, + // Don't store data URIs as they're not valid URLs and are usually quite large + imageUrl: meta.image?.startsWith("data:") ? null : meta.image, favicon: meta.logo, content: readableContent?.textContent, htmlContent: readableContent?.content, @@ -705,7 +706,10 @@ async function crawlAndParseUrl( ]); return async () => { - if (serverConfig.crawler.fullPageArchive || archiveFullPage) { + if ( + !precrawledArchiveAssetId && + (serverConfig.crawler.fullPageArchive || archiveFullPage) + ) { const { assetId: fullPageArchiveAssetId, size, |
