From 267db791290f4f539d7bda113992e3d1690b0e8b Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sat, 27 Dec 2025 11:59:39 +0200 Subject: feat: support archiving as pdf (#2309) * feat: support archiving as pdf * add supprot for manually triggering pdf downloads * fix submenu * menu cleanup * fix store pdf --- packages/db/schema.ts | 2 ++ packages/open-api/karakeep-openapi-spec.json | 7 +++++++ packages/shared-server/src/queues.ts | 1 + packages/shared/config.ts | 2 ++ packages/shared/types/bookmarks.ts | 2 ++ packages/trpc/lib/attachments.ts | 5 +++++ packages/trpc/models/bookmarks.ts | 4 ++++ packages/trpc/routers/bookmarks.ts | 2 ++ 8 files changed, 25 insertions(+) (limited to 'packages') diff --git a/packages/db/schema.ts b/packages/db/schema.ts index 2c2a997c..ae7c3103 100644 --- a/packages/db/schema.ts +++ b/packages/db/schema.ts @@ -259,6 +259,7 @@ export const bookmarkLinks = sqliteTable( export const enum AssetTypes { LINK_BANNER_IMAGE = "linkBannerImage", LINK_SCREENSHOT = "linkScreenshot", + LINK_PDF = "linkPdf", ASSET_SCREENSHOT = "assetScreenshot", LINK_FULL_PAGE_ARCHIVE = "linkFullPageArchive", LINK_PRECRAWLED_ARCHIVE = "linkPrecrawledArchive", @@ -280,6 +281,7 @@ export const assets = sqliteTable( enum: [ AssetTypes.LINK_BANNER_IMAGE, AssetTypes.LINK_SCREENSHOT, + AssetTypes.LINK_PDF, AssetTypes.ASSET_SCREENSHOT, AssetTypes.LINK_FULL_PAGE_ARCHIVE, AssetTypes.LINK_PRECRAWLED_ARCHIVE, diff --git a/packages/open-api/karakeep-openapi-spec.json b/packages/open-api/karakeep-openapi-spec.json index 505cdfc2..344ba6df 100644 --- a/packages/open-api/karakeep-openapi-spec.json +++ b/packages/open-api/karakeep-openapi-spec.json @@ -175,6 +175,10 @@ "type": "string", "nullable": true }, + "pdfAssetId": { + "type": "string", + "nullable": true + }, "fullPageArchiveAssetId": { "type": "string", "nullable": true @@ -318,6 +322,7 @@ "enum": [ "linkHtmlContent", "screenshot", + "pdf", "assetScreenshot", "bannerImage", "fullPageArchive", @@ -1741,6 +1746,7 @@ "enum": [ "linkHtmlContent", "screenshot", + "pdf", "assetScreenshot", "bannerImage", "fullPageArchive", @@ -1777,6 +1783,7 @@ "enum": [ "linkHtmlContent", "screenshot", + "pdf", "assetScreenshot", "bannerImage", "fullPageArchive", diff --git a/packages/shared-server/src/queues.ts b/packages/shared-server/src/queues.ts index 8ee50df0..140d9c0b 100644 --- a/packages/shared-server/src/queues.ts +++ b/packages/shared-server/src/queues.ts @@ -21,6 +21,7 @@ export const zCrawlLinkRequestSchema = z.object({ bookmarkId: z.string(), runInference: z.boolean().optional(), archiveFullPage: z.boolean().optional().default(false), + storePdf: z.boolean().optional().default(false), }); export type ZCrawlLinkRequest = z.input; diff --git a/packages/shared/config.ts b/packages/shared/config.ts index e956c0bc..191e9ecf 100644 --- a/packages/shared/config.ts +++ b/packages/shared/config.ts @@ -99,6 +99,7 @@ const allEnv = z.object({ CRAWLER_DOWNLOAD_BANNER_IMAGE: stringBool("true"), CRAWLER_STORE_SCREENSHOT: stringBool("true"), CRAWLER_FULL_PAGE_SCREENSHOT: stringBool("false"), + CRAWLER_STORE_PDF: stringBool("false"), CRAWLER_FULL_PAGE_ARCHIVE: stringBool("false"), CRAWLER_VIDEO_DOWNLOAD: stringBool("false"), CRAWLER_VIDEO_DOWNLOAD_MAX_SIZE: z.coerce.number().default(50), @@ -301,6 +302,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => { downloadBannerImage: val.CRAWLER_DOWNLOAD_BANNER_IMAGE, storeScreenshot: val.CRAWLER_STORE_SCREENSHOT, fullPageScreenshot: val.CRAWLER_FULL_PAGE_SCREENSHOT, + storePdf: val.CRAWLER_STORE_PDF, fullPageArchive: val.CRAWLER_FULL_PAGE_ARCHIVE, downloadVideo: val.CRAWLER_VIDEO_DOWNLOAD, maxVideoDownloadSize: val.CRAWLER_VIDEO_DOWNLOAD_MAX_SIZE, diff --git a/packages/shared/types/bookmarks.ts b/packages/shared/types/bookmarks.ts index 8a294422..0b9cf4ee 100644 --- a/packages/shared/types/bookmarks.ts +++ b/packages/shared/types/bookmarks.ts @@ -18,6 +18,7 @@ export type ZSortOrder = z.infer; export const zAssetTypesSchema = z.enum([ "linkHtmlContent", "screenshot", + "pdf", "assetScreenshot", "bannerImage", "fullPageArchive", @@ -44,6 +45,7 @@ export const zBookmarkedLinkSchema = z.object({ imageUrl: z.string().nullish(), imageAssetId: z.string().nullish(), screenshotAssetId: z.string().nullish(), + pdfAssetId: z.string().nullish(), fullPageArchiveAssetId: z.string().nullish(), precrawledArchiveAssetId: z.string().nullish(), videoAssetId: z.string().nullish(), diff --git a/packages/trpc/lib/attachments.ts b/packages/trpc/lib/attachments.ts index fb9e2079..f3170c22 100644 --- a/packages/trpc/lib/attachments.ts +++ b/packages/trpc/lib/attachments.ts @@ -9,6 +9,7 @@ import { export function mapDBAssetTypeToUserType(assetType: AssetTypes): ZAssetType { const map: Record> = { [AssetTypes.LINK_SCREENSHOT]: "screenshot", + [AssetTypes.LINK_PDF]: "pdf", [AssetTypes.ASSET_SCREENSHOT]: "assetScreenshot", [AssetTypes.LINK_FULL_PAGE_ARCHIVE]: "fullPageArchive", [AssetTypes.LINK_PRECRAWLED_ARCHIVE]: "precrawledArchive", @@ -29,6 +30,7 @@ export function mapSchemaAssetTypeToDB( ): AssetTypes { const map: Record = { screenshot: AssetTypes.LINK_SCREENSHOT, + pdf: AssetTypes.LINK_PDF, assetScreenshot: AssetTypes.ASSET_SCREENSHOT, fullPageArchive: AssetTypes.LINK_FULL_PAGE_ARCHIVE, precrawledArchive: AssetTypes.LINK_PRECRAWLED_ARCHIVE, @@ -46,6 +48,7 @@ export function mapSchemaAssetTypeToDB( export function humanFriendlyNameForAssertType(type: ZAssetType) { const map: Record = { screenshot: "Screenshot", + pdf: "PDF", assetScreenshot: "Asset Screenshot", fullPageArchive: "Full Page Archive", precrawledArchive: "Precrawled Archive", @@ -63,6 +66,7 @@ export function humanFriendlyNameForAssertType(type: ZAssetType) { export function isAllowedToAttachAsset(type: ZAssetType) { const map: Record = { screenshot: true, + pdf: true, assetScreenshot: true, fullPageArchive: false, precrawledArchive: true, @@ -80,6 +84,7 @@ export function isAllowedToAttachAsset(type: ZAssetType) { export function isAllowedToDetachAsset(type: ZAssetType) { const map: Record = { screenshot: true, + pdf: true, assetScreenshot: true, fullPageArchive: true, precrawledArchive: true, diff --git a/packages/trpc/models/bookmarks.ts b/packages/trpc/models/bookmarks.ts index 7ecbcfed..a8b30fc5 100644 --- a/packages/trpc/models/bookmarks.ts +++ b/packages/trpc/models/bookmarks.ts @@ -161,6 +161,7 @@ export class Bookmark extends BareBookmark { screenshotAssetId: assets.find( (a) => a.assetType == AssetTypes.LINK_SCREENSHOT, )?.id, + pdfAssetId: assets.find((a) => a.assetType == AssetTypes.LINK_PDF)?.id, fullPageArchiveAssetId: assets.find( (a) => a.assetType == AssetTypes.LINK_FULL_PAGE_ARCHIVE, )?.id, @@ -525,6 +526,9 @@ export class Bookmark extends BareBookmark { if (row.assets.assetType == AssetTypes.LINK_SCREENSHOT) { content.screenshotAssetId = row.assets.id; } + if (row.assets.assetType == AssetTypes.LINK_PDF) { + content.pdfAssetId = row.assets.id; + } if (row.assets.assetType == AssetTypes.LINK_FULL_PAGE_ARCHIVE) { content.fullPageArchiveAssetId = row.assets.id; } diff --git a/packages/trpc/routers/bookmarks.ts b/packages/trpc/routers/bookmarks.ts index a9d0df38..fb9b4697 100644 --- a/packages/trpc/routers/bookmarks.ts +++ b/packages/trpc/routers/bookmarks.ts @@ -562,6 +562,7 @@ export const bookmarksAppRouter = router({ z.object({ bookmarkId: z.string(), archiveFullPage: z.boolean().optional().default(false), + storePdf: z.boolean().optional().default(false), }), ) .use(ensureBookmarkOwnership) @@ -577,6 +578,7 @@ export const bookmarksAppRouter = router({ { bookmarkId: input.bookmarkId, archiveFullPage: input.archiveFullPage, + storePdf: input.storePdf, }, { groupId: ctx.user.id, -- cgit v1.2.3-70-g09d2