aboutsummaryrefslogtreecommitdiffstats
path: root/apps/workers/crawlerWorker.ts
diff options
context:
space:
mode:
Diffstat (limited to 'apps/workers/crawlerWorker.ts')
-rw-r--r--apps/workers/crawlerWorker.ts8
1 files changed, 6 insertions, 2 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts
index 91adb185..7611494e 100644
--- a/apps/workers/crawlerWorker.ts
+++ b/apps/workers/crawlerWorker.ts
@@ -669,7 +669,8 @@ async function crawlAndParseUrl(
.set({
title: meta.title,
description: meta.description,
- imageUrl: meta.image,
+ // Don't store data URIs as they're not valid URLs and are usually quite large
+ imageUrl: meta.image?.startsWith("data:") ? null : meta.image,
favicon: meta.logo,
content: readableContent?.textContent,
htmlContent: readableContent?.content,
@@ -705,7 +706,10 @@ async function crawlAndParseUrl(
]);
return async () => {
- if (serverConfig.crawler.fullPageArchive || archiveFullPage) {
+ if (
+ !precrawledArchiveAssetId &&
+ (serverConfig.crawler.fullPageArchive || archiveFullPage)
+ ) {
const {
assetId: fullPageArchiveAssetId,
size,