diff options
| author | Mohamed Bassem <me@mbassem.com> | 2025-12-27 11:59:39 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-12-27 09:59:39 +0000 |
| commit | 267db791290f4f539d7bda113992e3d1690b0e8b (patch) | |
| tree | 0144ea00dcf6a49bdaaf46511cd074651aeeee5a | |
| parent | bb6b742a040a70478d276529774bde67b8f93648 (diff) | |
| download | karakeep-267db791290f4f539d7bda113992e3d1690b0e8b.tar.zst | |
feat: support archiving as pdf (#2309)
* feat: support archiving as pdf
* add supprot for manually triggering pdf downloads
* fix submenu
* menu cleanup
* fix store pdf
| -rw-r--r-- | apps/web/components/dashboard/bookmarks/BookmarkOptions.tsx | 155 | ||||
| -rw-r--r-- | apps/web/components/dashboard/preview/LinkContentSection.tsx | 19 | ||||
| -rw-r--r-- | apps/web/lib/attachments.tsx | 2 | ||||
| -rw-r--r-- | apps/web/lib/i18n/locales/en/translation.json | 4 | ||||
| -rw-r--r-- | apps/web/lib/i18n/locales/en_US/translation.json | 4 | ||||
| -rw-r--r-- | apps/workers/workerUtils.ts | 2 | ||||
| -rw-r--r-- | apps/workers/workers/crawlerWorker.ts | 111 | ||||
| -rw-r--r-- | docs/docs/03-configuration/01-environment-variables.md | 1 | ||||
| -rw-r--r-- | packages/db/schema.ts | 2 | ||||
| -rw-r--r-- | packages/open-api/karakeep-openapi-spec.json | 7 | ||||
| -rw-r--r-- | packages/shared-server/src/queues.ts | 1 | ||||
| -rw-r--r-- | packages/shared/config.ts | 2 | ||||
| -rw-r--r-- | packages/shared/types/bookmarks.ts | 2 | ||||
| -rw-r--r-- | packages/trpc/lib/attachments.ts | 5 | ||||
| -rw-r--r-- | packages/trpc/models/bookmarks.ts | 4 | ||||
| -rw-r--r-- | packages/trpc/routers/bookmarks.ts | 2 |
16 files changed, 290 insertions, 33 deletions
diff --git a/apps/web/components/dashboard/bookmarks/BookmarkOptions.tsx b/apps/web/components/dashboard/bookmarks/BookmarkOptions.tsx index 66de6156..eb746efc 100644 --- a/apps/web/components/dashboard/bookmarks/BookmarkOptions.tsx +++ b/apps/web/components/dashboard/bookmarks/BookmarkOptions.tsx @@ -6,13 +6,18 @@ import { DropdownMenu, DropdownMenuContent, DropdownMenuItem, + DropdownMenuSub, + DropdownMenuSubContent, + DropdownMenuSubTrigger, DropdownMenuTrigger, } from "@/components/ui/dropdown-menu"; import { useToast } from "@/components/ui/use-toast"; import { useClientConfig } from "@/lib/clientConfig"; import { useTranslation } from "@/lib/i18n/client"; import { + Archive, FileDown, + FileText, Link, List, ListX, @@ -43,6 +48,30 @@ import { EditBookmarkDialog } from "./EditBookmarkDialog"; import { ArchivedActionIcon, FavouritedActionIcon } from "./icons"; import { useManageListsModal } from "./ManageListsModal"; +interface ActionItem { + id: string; + title: string; + icon: React.ReactNode; + visible: boolean; + disabled: boolean; + className?: string; + onClick: () => void; +} + +interface SubsectionItem { + id: string; + title: string; + icon: React.ReactNode; + visible: boolean; + items: ActionItem[]; +} + +type ActionItemType = ActionItem | SubsectionItem; + +function isSubsectionItem(item: ActionItemType): item is SubsectionItem { + return "items" in item; +} + export default function BookmarkOptions({ bookmark }: { bookmark: ZBookmark }) { const { t } = useTranslation(); const { toast } = useToast(); @@ -110,6 +139,15 @@ export default function BookmarkOptions({ bookmark }: { bookmark: ZBookmark }) { onError, }); + const preservePdfMutator = useRecrawlBookmark({ + onSuccess: () => { + toast({ + description: t("toasts.bookmarks.preserve_pdf"), + }); + }, + onError, + }); + const removeFromListMutator = useRemoveBookmarkFromList({ onSuccess: () => { toast({ @@ -120,7 +158,7 @@ export default function BookmarkOptions({ bookmark }: { bookmark: ZBookmark }) { }); // Define action items array - const actionItems = [ + const actionItems: ActionItemType[] = [ { id: "edit", title: t("actions.edit"), @@ -174,19 +212,6 @@ export default function BookmarkOptions({ bookmark }: { bookmark: ZBookmark }) { }), }, { - id: "download-full-page", - title: t("actions.download_full_page_archive"), - icon: <FileDown className="mr-2 size-4" />, - visible: isOwner && bookmark.content.type === BookmarkTypes.LINK, - disabled: false, - onClick: () => { - fullPageArchiveBookmarkMutator.mutate({ - bookmarkId: bookmark.id, - archiveFullPage: true, - }); - }, - }, - { id: "copy-link", title: t("actions.copy_link"), icon: <Link className="mr-2 size-4" />, @@ -213,14 +238,15 @@ export default function BookmarkOptions({ bookmark }: { bookmark: ZBookmark }) { id: "remove-from-list", title: t("actions.remove_from_list"), icon: <ListX className="mr-2 size-4" />, - visible: + visible: Boolean( (isOwner || (withinListContext && (withinListContext.userRole === "editor" || withinListContext.userRole === "owner"))) && - !!listId && - !!withinListContext && - withinListContext.type === "manual", + !!listId && + !!withinListContext && + withinListContext.type === "manual", + ), disabled: demoMode, onClick: () => removeFromListMutator.mutate({ @@ -237,6 +263,40 @@ export default function BookmarkOptions({ bookmark }: { bookmark: ZBookmark }) { onClick: () => crawlBookmarkMutator.mutate({ bookmarkId: bookmark.id }), }, { + id: "offline-copies", + title: t("actions.offline_copies"), + icon: <Archive className="mr-2 size-4" />, + visible: isOwner && bookmark.content.type === BookmarkTypes.LINK, + items: [ + { + id: "download-full-page", + title: t("actions.download_full_page_archive"), + icon: <FileDown className="mr-2 size-4" />, + visible: true, + disabled: demoMode, + onClick: () => { + fullPageArchiveBookmarkMutator.mutate({ + bookmarkId: bookmark.id, + archiveFullPage: true, + }); + }, + }, + { + id: "preserve-pdf", + title: t("actions.preserve_as_pdf"), + icon: <FileText className="mr-2 size-4" />, + visible: true, + disabled: demoMode, + onClick: () => { + preservePdfMutator.mutate({ + bookmarkId: bookmark.id, + storePdf: true, + }); + }, + }, + ], + }, + { id: "delete", title: t("actions.delete"), icon: <Trash2 className="mr-2 size-4" />, @@ -248,7 +308,12 @@ export default function BookmarkOptions({ bookmark }: { bookmark: ZBookmark }) { ]; // Filter visible items - const visibleItems = actionItems.filter((item) => item.visible); + const visibleItems: ActionItemType[] = actionItems.filter((item) => { + if (isSubsectionItem(item)) { + return item.visible && item.items.some((subItem) => subItem.visible); + } + return item.visible; + }); // If no items are visible, don't render the dropdown if (visibleItems.length === 0) { @@ -283,17 +348,47 @@ export default function BookmarkOptions({ bookmark }: { bookmark: ZBookmark }) { </Button> </DropdownMenuTrigger> <DropdownMenuContent className="w-fit"> - {visibleItems.map((item) => ( - <DropdownMenuItem - key={item.id} - disabled={item.disabled} - className={item.className} - onClick={item.onClick} - > - {item.icon} - <span>{item.title}</span> - </DropdownMenuItem> - ))} + {visibleItems.map((item) => { + if (isSubsectionItem(item)) { + const visibleSubItems = item.items.filter( + (subItem) => subItem.visible, + ); + if (visibleSubItems.length === 0) { + return null; + } + return ( + <DropdownMenuSub key={item.id}> + <DropdownMenuSubTrigger> + {item.icon} + <span>{item.title}</span> + </DropdownMenuSubTrigger> + <DropdownMenuSubContent> + {visibleSubItems.map((subItem) => ( + <DropdownMenuItem + key={subItem.id} + disabled={subItem.disabled} + onClick={subItem.onClick} + > + {subItem.icon} + <span>{subItem.title}</span> + </DropdownMenuItem> + ))} + </DropdownMenuSubContent> + </DropdownMenuSub> + ); + } + return ( + <DropdownMenuItem + key={item.id} + disabled={item.disabled} + className={item.className} + onClick={item.onClick} + > + {item.icon} + <span>{item.title}</span> + </DropdownMenuItem> + ); + })} </DropdownMenuContent> </DropdownMenu> </> diff --git a/apps/web/components/dashboard/preview/LinkContentSection.tsx b/apps/web/components/dashboard/preview/LinkContentSection.tsx index bdf5faf1..5fb51784 100644 --- a/apps/web/components/dashboard/preview/LinkContentSection.tsx +++ b/apps/web/components/dashboard/preview/LinkContentSection.tsx @@ -24,6 +24,7 @@ import { BookOpen, Camera, ExpandIcon, + FileText, Info, Video, } from "lucide-react"; @@ -104,6 +105,16 @@ function VideoSection({ link }: { link: ZBookmarkedLink }) { ); } +function PDFSection({ link }: { link: ZBookmarkedLink }) { + return ( + <iframe + title="PDF Viewer" + src={`/api/assets/${link.pdfAssetId}`} + className="relative h-full min-w-full" + /> + ); +} + export default function LinkContentSection({ bookmark, }: { @@ -154,6 +165,8 @@ export default function LinkContentSection({ content = <FullPageArchiveSection link={bookmark.content} />; } else if (section === "video") { content = <VideoSection link={bookmark.content} />; + } else if (section === "pdf") { + content = <PDFSection link={bookmark.content} />; } else { content = <ScreenshotSection link={bookmark.content} />; } @@ -198,6 +211,12 @@ export default function LinkContentSection({ {t("common.screenshot")} </div> </SelectItem> + <SelectItem value="pdf" disabled={!bookmark.content.pdfAssetId}> + <div className="flex items-center"> + <FileText className="mr-2 h-4 w-4" /> + {t("common.pdf")} + </div> + </SelectItem> <SelectItem value="archive" disabled={ diff --git a/apps/web/lib/attachments.tsx b/apps/web/lib/attachments.tsx index 81b9f12d..5d7175ec 100644 --- a/apps/web/lib/attachments.tsx +++ b/apps/web/lib/attachments.tsx @@ -2,6 +2,7 @@ import { Archive, Camera, FileCode, + FileText, Image, Paperclip, SquareUser, @@ -13,6 +14,7 @@ import { ZAssetType } from "@karakeep/shared/types/bookmarks"; export const ASSET_TYPE_TO_ICON: Record<ZAssetType, React.ReactNode> = { screenshot: <Camera className="size-4" />, + pdf: <FileText className="size-4" />, assetScreenshot: <Camera className="size-4" />, fullPageArchive: <Archive className="size-4" />, precrawledArchive: <Archive className="size-4" />, diff --git a/apps/web/lib/i18n/locales/en/translation.json b/apps/web/lib/i18n/locales/en/translation.json index a4b94e4b..9ec25732 100644 --- a/apps/web/lib/i18n/locales/en/translation.json +++ b/apps/web/lib/i18n/locales/en/translation.json @@ -26,6 +26,7 @@ "highlights": "Highlights", "source": "Source", "screenshot": "Screenshot", + "pdf": "Archived PDF", "video": "Video", "archive": "Archive", "home": "Home", @@ -70,7 +71,9 @@ "toggle_show_archived": "Show Archived", "refresh": "Refresh", "recrawl": "Recrawl", + "offline_copies": "Offline Copies", "download_full_page_archive": "Download Full Page Archive", + "preserve_as_pdf": "Preserve as PDF", "edit_tags": "Edit Tags", "edit_notes": "Edit Notes", "add_to_list": "Add to List", @@ -802,6 +805,7 @@ "deleted": "The bookmark has been deleted!", "refetch": "Re-fetch has been enqueued!", "full_page_archive": "Full Page Archive creation has been triggered", + "preserve_pdf": "PDF preservation has been triggered", "delete_from_list": "The bookmark has been deleted from the list", "clipboard_copied": "Link has been added to your clipboard!" }, diff --git a/apps/web/lib/i18n/locales/en_US/translation.json b/apps/web/lib/i18n/locales/en_US/translation.json index 6c3dd62b..2849f930 100644 --- a/apps/web/lib/i18n/locales/en_US/translation.json +++ b/apps/web/lib/i18n/locales/en_US/translation.json @@ -25,6 +25,7 @@ "admin": "Admin" }, "screenshot": "Screenshot", + "pdf": "Archived PDF", "video": "Video", "archive": "Archive", "home": "Home", @@ -62,7 +63,9 @@ "delete": "Delete", "refresh": "Refresh", "recrawl": "Recrawl", + "offline_copies": "Offline Copies", "download_full_page_archive": "Download Full Page Archive", + "preserve_as_pdf": "Preserve as PDF", "edit_tags": "Edit Tags", "add_to_list": "Add to List", "select_all": "Select All", @@ -781,6 +784,7 @@ "deleted": "The bookmark has been deleted!", "refetch": "Re-fetch has been enqueued!", "full_page_archive": "Full Page Archive creation has been triggered", + "preserve_pdf": "PDF preservation has been triggered", "delete_from_list": "The bookmark has been deleted from the list", "clipboard_copied": "Link has been added to your clipboard!", "updated": "The bookmark has been updated!" diff --git a/apps/workers/workerUtils.ts b/apps/workers/workerUtils.ts index 3eaf5b4b..a99f2103 100644 --- a/apps/workers/workerUtils.ts +++ b/apps/workers/workerUtils.ts @@ -34,6 +34,8 @@ export async function getBookmarkDetails(bookmarkId: string) { screenshotAssetId: bookmark.assets.find(
(a) => a.assetType == AssetTypes.LINK_SCREENSHOT,
)?.id,
+ pdfAssetId: bookmark.assets.find((a) => a.assetType == AssetTypes.LINK_PDF)
+ ?.id,
imageAssetId: bookmark.assets.find(
(a) => a.assetType == AssetTypes.LINK_BANNER_IMAGE,
)?.id,
diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts index 3591474e..95c91002 100644 --- a/apps/workers/workers/crawlerWorker.ts +++ b/apps/workers/workers/crawlerWorker.ts @@ -418,6 +418,7 @@ async function browserlessCrawlPage( htmlContent: await response.text(), statusCode: response.status, screenshot: undefined, + pdf: undefined, url: response.url, }; } @@ -426,10 +427,12 @@ async function crawlPage( jobId: string, url: string, userId: string, + forceStorePdf: boolean, abortSignal: AbortSignal, ): Promise<{ htmlContent: string; screenshot: Buffer | undefined; + pdf: Buffer | undefined; statusCode: number; url: string; }> { @@ -608,10 +611,45 @@ async function crawlPage( } } + // Capture PDF if configured or explicitly requested + let pdf: Buffer | undefined = undefined; + if (serverConfig.crawler.storePdf || forceStorePdf) { + const { data: pdfData, error: pdfError } = await tryCatch( + Promise.race<Buffer>([ + page.pdf({ + format: "A4", + printBackground: true, + }), + new Promise((_, reject) => + setTimeout( + () => + reject( + "TIMED_OUT, consider increasing CRAWLER_SCREENSHOT_TIMEOUT_SEC", + ), + serverConfig.crawler.screenshotTimeoutSec * 1000, + ), + ), + abortPromise(abortSignal).then(() => Buffer.from("")), + ]), + ); + abortSignal.throwIfAborted(); + if (pdfError) { + logger.warn( + `[Crawler][${jobId}] Failed to capture the PDF. Reason: ${pdfError}`, + ); + } else { + logger.info( + `[Crawler][${jobId}] Finished capturing page content as PDF`, + ); + pdf = pdfData; + } + } + return { htmlContent, statusCode: response?.status() ?? 0, screenshot, + pdf, url: page.url(), }; } finally { @@ -724,6 +762,44 @@ async function storeScreenshot( return { assetId, contentType, fileName, size: screenshot.byteLength }; } +async function storePdf( + pdf: Buffer | undefined, + userId: string, + jobId: string, +) { + if (!pdf) { + logger.info(`[Crawler][${jobId}] Skipping storing the PDF as it's empty.`); + return null; + } + const assetId = newAssetId(); + const contentType = "application/pdf"; + const fileName = "page.pdf"; + + // Check storage quota before saving the PDF + const { data: quotaApproved, error: quotaError } = await tryCatch( + QuotaService.checkStorageQuota(db, userId, pdf.byteLength), + ); + + if (quotaError) { + logger.warn( + `[Crawler][${jobId}] Skipping PDF storage due to quota exceeded: ${quotaError.message}`, + ); + return null; + } + + await saveAsset({ + userId, + assetId, + metadata: { contentType, fileName }, + asset: pdf, + quotaApproved, + }); + logger.info( + `[Crawler][${jobId}] Stored the PDF as assetId: ${assetId} (${pdf.byteLength} bytes)`, + ); + return { assetId, contentType, fileName, size: pdf.byteLength }; +} + async function downloadAndStoreFile( url: string, userId: string, @@ -1079,16 +1155,19 @@ async function crawlAndParseUrl( jobId: string, bookmarkId: string, oldScreenshotAssetId: string | undefined, + oldPdfAssetId: string | undefined, oldImageAssetId: string | undefined, oldFullPageArchiveAssetId: string | undefined, oldContentAssetId: string | undefined, precrawledArchiveAssetId: string | undefined, archiveFullPage: boolean, + forceStorePdf: boolean, abortSignal: AbortSignal, ) { let result: { htmlContent: string; screenshot: Buffer | undefined; + pdf: Buffer | undefined; statusCode: number | null; url: string; }; @@ -1104,15 +1183,16 @@ async function crawlAndParseUrl( result = { htmlContent: asset.asset.toString(), screenshot: undefined, + pdf: undefined, statusCode: 200, url, }; } else { - result = await crawlPage(jobId, url, userId, abortSignal); + result = await crawlPage(jobId, url, userId, forceStorePdf, abortSignal); } abortSignal.throwIfAborted(); - const { htmlContent, screenshot, statusCode, url: browserUrl } = result; + const { htmlContent, screenshot, pdf, statusCode, url: browserUrl } = result; // Track status code in Prometheus if (statusCode !== null) { @@ -1146,6 +1226,12 @@ async function crawlAndParseUrl( ]); abortSignal.throwIfAborted(); + const pdfAssetInfo = await Promise.race([ + storePdf(pdf, userId, jobId), + abortPromise(abortSignal), + ]); + abortSignal.throwIfAborted(); + const htmlContentAssetInfo = await storeHtmlContent( readableContent?.content, userId, @@ -1230,6 +1316,22 @@ async function crawlAndParseUrl( ); assetDeletionTasks.push(silentDeleteAsset(userId, oldScreenshotAssetId)); } + if (pdfAssetInfo) { + await updateAsset( + oldPdfAssetId, + { + id: pdfAssetInfo.assetId, + bookmarkId, + userId, + assetType: AssetTypes.LINK_PDF, + contentType: pdfAssetInfo.contentType, + size: pdfAssetInfo.size, + fileName: pdfAssetInfo.fileName, + }, + txn, + ); + assetDeletionTasks.push(silentDeleteAsset(userId, oldPdfAssetId)); + } if (imageAssetInfo) { await updateAsset(oldImageAssetId, imageAssetInfo, txn); assetDeletionTasks.push(silentDeleteAsset(userId, oldImageAssetId)); @@ -1355,11 +1457,12 @@ async function runCrawler( return { status: "completed" }; } - const { bookmarkId, archiveFullPage } = request.data; + const { bookmarkId, archiveFullPage, storePdf } = request.data; const { url, userId, screenshotAssetId: oldScreenshotAssetId, + pdfAssetId: oldPdfAssetId, imageAssetId: oldImageAssetId, fullPageArchiveAssetId: oldFullPageArchiveAssetId, contentAssetId: oldContentAssetId, @@ -1407,11 +1510,13 @@ async function runCrawler( jobId, bookmarkId, oldScreenshotAssetId, + oldPdfAssetId, oldImageAssetId, oldFullPageArchiveAssetId, oldContentAssetId, precrawledArchiveAssetId, archiveFullPage, + storePdf ?? false, job.abortSignal, ); diff --git a/docs/docs/03-configuration/01-environment-variables.md b/docs/docs/03-configuration/01-environment-variables.md index e6f2a3c9..8caef0df 100644 --- a/docs/docs/03-configuration/01-environment-variables.md +++ b/docs/docs/03-configuration/01-environment-variables.md @@ -131,6 +131,7 @@ Either `OPENAI_API_KEY` or `OLLAMA_BASE_URL` need to be set for automatic taggin | CRAWLER_STORE_SCREENSHOT | No | true | Whether to store a screenshot from the crawled website or not. Screenshots act as a fallback for when we fail to extract an image from a website. You can also view the stored screenshots for any link. | | CRAWLER_FULL_PAGE_SCREENSHOT | No | false | Whether to store a screenshot of the full page or not. Disabled by default, as it can lead to much higher disk usage. If disabled, the screenshot will only include the visible part of the page | | CRAWLER_SCREENSHOT_TIMEOUT_SEC | No | 5 | How long to wait for the screenshot finish before timing out. If you are capturing full-page screenshots of long webpages, consider increasing this value. | +| CRAWLER_STORE_PDF | No | false | Whether to store a PDF snapshot of the crawled page. Disabled by default, as it can lead to much higher disk usage. When enabled, a PDF version of each crawled page will be captured and stored as an asset, which can be viewed in the bookmark preview. | | CRAWLER_FULL_PAGE_ARCHIVE | No | false | Whether to store a full local copy of the page or not. Disabled by default, as it can lead to much higher disk usage. If disabled, only the readable text of the page is archived. | | CRAWLER_JOB_TIMEOUT_SEC | No | 60 | How long to wait for the crawler job to finish before timing out. If you have a slow internet connection or a low powered device, you might want to bump this up a bit | | CRAWLER_NAVIGATE_TIMEOUT_SEC | No | 30 | How long to spend navigating to the page (along with its redirects). Increase this if you have a slow internet connection | diff --git a/packages/db/schema.ts b/packages/db/schema.ts index 2c2a997c..ae7c3103 100644 --- a/packages/db/schema.ts +++ b/packages/db/schema.ts @@ -259,6 +259,7 @@ export const bookmarkLinks = sqliteTable( export const enum AssetTypes { LINK_BANNER_IMAGE = "linkBannerImage", LINK_SCREENSHOT = "linkScreenshot", + LINK_PDF = "linkPdf", ASSET_SCREENSHOT = "assetScreenshot", LINK_FULL_PAGE_ARCHIVE = "linkFullPageArchive", LINK_PRECRAWLED_ARCHIVE = "linkPrecrawledArchive", @@ -280,6 +281,7 @@ export const assets = sqliteTable( enum: [ AssetTypes.LINK_BANNER_IMAGE, AssetTypes.LINK_SCREENSHOT, + AssetTypes.LINK_PDF, AssetTypes.ASSET_SCREENSHOT, AssetTypes.LINK_FULL_PAGE_ARCHIVE, AssetTypes.LINK_PRECRAWLED_ARCHIVE, diff --git a/packages/open-api/karakeep-openapi-spec.json b/packages/open-api/karakeep-openapi-spec.json index 505cdfc2..344ba6df 100644 --- a/packages/open-api/karakeep-openapi-spec.json +++ b/packages/open-api/karakeep-openapi-spec.json @@ -175,6 +175,10 @@ "type": "string", "nullable": true }, + "pdfAssetId": { + "type": "string", + "nullable": true + }, "fullPageArchiveAssetId": { "type": "string", "nullable": true @@ -318,6 +322,7 @@ "enum": [ "linkHtmlContent", "screenshot", + "pdf", "assetScreenshot", "bannerImage", "fullPageArchive", @@ -1741,6 +1746,7 @@ "enum": [ "linkHtmlContent", "screenshot", + "pdf", "assetScreenshot", "bannerImage", "fullPageArchive", @@ -1777,6 +1783,7 @@ "enum": [ "linkHtmlContent", "screenshot", + "pdf", "assetScreenshot", "bannerImage", "fullPageArchive", diff --git a/packages/shared-server/src/queues.ts b/packages/shared-server/src/queues.ts index 8ee50df0..140d9c0b 100644 --- a/packages/shared-server/src/queues.ts +++ b/packages/shared-server/src/queues.ts @@ -21,6 +21,7 @@ export const zCrawlLinkRequestSchema = z.object({ bookmarkId: z.string(), runInference: z.boolean().optional(), archiveFullPage: z.boolean().optional().default(false), + storePdf: z.boolean().optional().default(false), }); export type ZCrawlLinkRequest = z.input<typeof zCrawlLinkRequestSchema>; diff --git a/packages/shared/config.ts b/packages/shared/config.ts index e956c0bc..191e9ecf 100644 --- a/packages/shared/config.ts +++ b/packages/shared/config.ts @@ -99,6 +99,7 @@ const allEnv = z.object({ CRAWLER_DOWNLOAD_BANNER_IMAGE: stringBool("true"), CRAWLER_STORE_SCREENSHOT: stringBool("true"), CRAWLER_FULL_PAGE_SCREENSHOT: stringBool("false"), + CRAWLER_STORE_PDF: stringBool("false"), CRAWLER_FULL_PAGE_ARCHIVE: stringBool("false"), CRAWLER_VIDEO_DOWNLOAD: stringBool("false"), CRAWLER_VIDEO_DOWNLOAD_MAX_SIZE: z.coerce.number().default(50), @@ -301,6 +302,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => { downloadBannerImage: val.CRAWLER_DOWNLOAD_BANNER_IMAGE, storeScreenshot: val.CRAWLER_STORE_SCREENSHOT, fullPageScreenshot: val.CRAWLER_FULL_PAGE_SCREENSHOT, + storePdf: val.CRAWLER_STORE_PDF, fullPageArchive: val.CRAWLER_FULL_PAGE_ARCHIVE, downloadVideo: val.CRAWLER_VIDEO_DOWNLOAD, maxVideoDownloadSize: val.CRAWLER_VIDEO_DOWNLOAD_MAX_SIZE, diff --git a/packages/shared/types/bookmarks.ts b/packages/shared/types/bookmarks.ts index 8a294422..0b9cf4ee 100644 --- a/packages/shared/types/bookmarks.ts +++ b/packages/shared/types/bookmarks.ts @@ -18,6 +18,7 @@ export type ZSortOrder = z.infer<typeof zSortOrder>; export const zAssetTypesSchema = z.enum([ "linkHtmlContent", "screenshot", + "pdf", "assetScreenshot", "bannerImage", "fullPageArchive", @@ -44,6 +45,7 @@ export const zBookmarkedLinkSchema = z.object({ imageUrl: z.string().nullish(), imageAssetId: z.string().nullish(), screenshotAssetId: z.string().nullish(), + pdfAssetId: z.string().nullish(), fullPageArchiveAssetId: z.string().nullish(), precrawledArchiveAssetId: z.string().nullish(), videoAssetId: z.string().nullish(), diff --git a/packages/trpc/lib/attachments.ts b/packages/trpc/lib/attachments.ts index fb9e2079..f3170c22 100644 --- a/packages/trpc/lib/attachments.ts +++ b/packages/trpc/lib/attachments.ts @@ -9,6 +9,7 @@ import { export function mapDBAssetTypeToUserType(assetType: AssetTypes): ZAssetType { const map: Record<AssetTypes, z.infer<typeof zAssetTypesSchema>> = { [AssetTypes.LINK_SCREENSHOT]: "screenshot", + [AssetTypes.LINK_PDF]: "pdf", [AssetTypes.ASSET_SCREENSHOT]: "assetScreenshot", [AssetTypes.LINK_FULL_PAGE_ARCHIVE]: "fullPageArchive", [AssetTypes.LINK_PRECRAWLED_ARCHIVE]: "precrawledArchive", @@ -29,6 +30,7 @@ export function mapSchemaAssetTypeToDB( ): AssetTypes { const map: Record<ZAssetType, AssetTypes> = { screenshot: AssetTypes.LINK_SCREENSHOT, + pdf: AssetTypes.LINK_PDF, assetScreenshot: AssetTypes.ASSET_SCREENSHOT, fullPageArchive: AssetTypes.LINK_FULL_PAGE_ARCHIVE, precrawledArchive: AssetTypes.LINK_PRECRAWLED_ARCHIVE, @@ -46,6 +48,7 @@ export function mapSchemaAssetTypeToDB( export function humanFriendlyNameForAssertType(type: ZAssetType) { const map: Record<ZAssetType, string> = { screenshot: "Screenshot", + pdf: "PDF", assetScreenshot: "Asset Screenshot", fullPageArchive: "Full Page Archive", precrawledArchive: "Precrawled Archive", @@ -63,6 +66,7 @@ export function humanFriendlyNameForAssertType(type: ZAssetType) { export function isAllowedToAttachAsset(type: ZAssetType) { const map: Record<ZAssetType, boolean> = { screenshot: true, + pdf: true, assetScreenshot: true, fullPageArchive: false, precrawledArchive: true, @@ -80,6 +84,7 @@ export function isAllowedToAttachAsset(type: ZAssetType) { export function isAllowedToDetachAsset(type: ZAssetType) { const map: Record<ZAssetType, boolean> = { screenshot: true, + pdf: true, assetScreenshot: true, fullPageArchive: true, precrawledArchive: true, diff --git a/packages/trpc/models/bookmarks.ts b/packages/trpc/models/bookmarks.ts index 7ecbcfed..a8b30fc5 100644 --- a/packages/trpc/models/bookmarks.ts +++ b/packages/trpc/models/bookmarks.ts @@ -161,6 +161,7 @@ export class Bookmark extends BareBookmark { screenshotAssetId: assets.find( (a) => a.assetType == AssetTypes.LINK_SCREENSHOT, )?.id, + pdfAssetId: assets.find((a) => a.assetType == AssetTypes.LINK_PDF)?.id, fullPageArchiveAssetId: assets.find( (a) => a.assetType == AssetTypes.LINK_FULL_PAGE_ARCHIVE, )?.id, @@ -525,6 +526,9 @@ export class Bookmark extends BareBookmark { if (row.assets.assetType == AssetTypes.LINK_SCREENSHOT) { content.screenshotAssetId = row.assets.id; } + if (row.assets.assetType == AssetTypes.LINK_PDF) { + content.pdfAssetId = row.assets.id; + } if (row.assets.assetType == AssetTypes.LINK_FULL_PAGE_ARCHIVE) { content.fullPageArchiveAssetId = row.assets.id; } diff --git a/packages/trpc/routers/bookmarks.ts b/packages/trpc/routers/bookmarks.ts index a9d0df38..fb9b4697 100644 --- a/packages/trpc/routers/bookmarks.ts +++ b/packages/trpc/routers/bookmarks.ts @@ -562,6 +562,7 @@ export const bookmarksAppRouter = router({ z.object({ bookmarkId: z.string(), archiveFullPage: z.boolean().optional().default(false), + storePdf: z.boolean().optional().default(false), }), ) .use(ensureBookmarkOwnership) @@ -577,6 +578,7 @@ export const bookmarksAppRouter = router({ { bookmarkId: input.bookmarkId, archiveFullPage: input.archiveFullPage, + storePdf: input.storePdf, }, { groupId: ctx.user.id, |
