aboutsummaryrefslogtreecommitdiffstats
path: root/apps
diff options
context:
space:
mode:
authorMohamed Bassem <me@mbassem.com>2025-02-02 15:44:48 +0000
committerMohamed Bassem <me@mbassem.com>2025-02-02 15:47:21 +0000
commit68e27adb029cb7bb7b51b8ea594163931a495c61 (patch)
treef5ec56769072c35adb16c43a7a21686eb93477a2 /apps
parentb59fe2ee819acc4c8115c9f6322050e2d1dc2204 (diff)
downloadkarakeep-68e27adb029cb7bb7b51b8ea594163931a495c61.tar.zst
fix: Dont rearchive singlefile uploads and consider them as archives
Diffstat (limited to 'apps')
-rw-r--r--apps/web/components/dashboard/preview/LinkContentSection.tsx9
-rw-r--r--apps/workers/crawlerWorker.ts8
2 files changed, 13 insertions, 4 deletions
diff --git a/apps/web/components/dashboard/preview/LinkContentSection.tsx b/apps/web/components/dashboard/preview/LinkContentSection.tsx
index f09cc31f..f1fe3820 100644
--- a/apps/web/components/dashboard/preview/LinkContentSection.tsx
+++ b/apps/web/components/dashboard/preview/LinkContentSection.tsx
@@ -26,10 +26,12 @@ import {
} from "@hoarder/shared/types/bookmarks";
function FullPageArchiveSection({ link }: { link: ZBookmarkedLink }) {
+ const archiveAssetId =
+ link.fullPageArchiveAssetId ?? link.precrawledArchiveAssetId;
return (
<iframe
title={link.url}
- src={`/api/assets/${link.fullPageArchiveAssetId}`}
+ src={`/api/assets/${archiveAssetId}`}
className="relative h-full min-w-full"
/>
);
@@ -199,7 +201,10 @@ export default function LinkContentSection({
</SelectItem>
<SelectItem
value="archive"
- disabled={!bookmark.content.fullPageArchiveAssetId}
+ disabled={
+ !bookmark.content.fullPageArchiveAssetId &&
+ !bookmark.content.precrawledArchiveAssetId
+ }
>
{t("common.archive")}
</SelectItem>
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts
index 91adb185..7611494e 100644
--- a/apps/workers/crawlerWorker.ts
+++ b/apps/workers/crawlerWorker.ts
@@ -669,7 +669,8 @@ async function crawlAndParseUrl(
.set({
title: meta.title,
description: meta.description,
- imageUrl: meta.image,
+ // Don't store data URIs as they're not valid URLs and are usually quite large
+ imageUrl: meta.image?.startsWith("data:") ? null : meta.image,
favicon: meta.logo,
content: readableContent?.textContent,
htmlContent: readableContent?.content,
@@ -705,7 +706,10 @@ async function crawlAndParseUrl(
]);
return async () => {
- if (serverConfig.crawler.fullPageArchive || archiveFullPage) {
+ if (
+ !precrawledArchiveAssetId &&
+ (serverConfig.crawler.fullPageArchive || archiveFullPage)
+ ) {
const {
assetId: fullPageArchiveAssetId,
size,