aboutsummaryrefslogtreecommitdiffstats
path: root/packages
diff options
context:
space:
mode:
authorMohamed Bassem <me@mbassem.com>2025-12-27 11:59:39 +0200
committerGitHub <noreply@github.com>2025-12-27 09:59:39 +0000
commit267db791290f4f539d7bda113992e3d1690b0e8b (patch)
tree0144ea00dcf6a49bdaaf46511cd074651aeeee5a /packages
parentbb6b742a040a70478d276529774bde67b8f93648 (diff)
downloadkarakeep-267db791290f4f539d7bda113992e3d1690b0e8b.tar.zst
feat: support archiving as pdf (#2309)
* feat: support archiving as pdf * add supprot for manually triggering pdf downloads * fix submenu * menu cleanup * fix store pdf
Diffstat (limited to 'packages')
-rw-r--r--packages/db/schema.ts2
-rw-r--r--packages/open-api/karakeep-openapi-spec.json7
-rw-r--r--packages/shared-server/src/queues.ts1
-rw-r--r--packages/shared/config.ts2
-rw-r--r--packages/shared/types/bookmarks.ts2
-rw-r--r--packages/trpc/lib/attachments.ts5
-rw-r--r--packages/trpc/models/bookmarks.ts4
-rw-r--r--packages/trpc/routers/bookmarks.ts2
8 files changed, 25 insertions, 0 deletions
diff --git a/packages/db/schema.ts b/packages/db/schema.ts
index 2c2a997c..ae7c3103 100644
--- a/packages/db/schema.ts
+++ b/packages/db/schema.ts
@@ -259,6 +259,7 @@ export const bookmarkLinks = sqliteTable(
export const enum AssetTypes {
LINK_BANNER_IMAGE = "linkBannerImage",
LINK_SCREENSHOT = "linkScreenshot",
+ LINK_PDF = "linkPdf",
ASSET_SCREENSHOT = "assetScreenshot",
LINK_FULL_PAGE_ARCHIVE = "linkFullPageArchive",
LINK_PRECRAWLED_ARCHIVE = "linkPrecrawledArchive",
@@ -280,6 +281,7 @@ export const assets = sqliteTable(
enum: [
AssetTypes.LINK_BANNER_IMAGE,
AssetTypes.LINK_SCREENSHOT,
+ AssetTypes.LINK_PDF,
AssetTypes.ASSET_SCREENSHOT,
AssetTypes.LINK_FULL_PAGE_ARCHIVE,
AssetTypes.LINK_PRECRAWLED_ARCHIVE,
diff --git a/packages/open-api/karakeep-openapi-spec.json b/packages/open-api/karakeep-openapi-spec.json
index 505cdfc2..344ba6df 100644
--- a/packages/open-api/karakeep-openapi-spec.json
+++ b/packages/open-api/karakeep-openapi-spec.json
@@ -175,6 +175,10 @@
"type": "string",
"nullable": true
},
+ "pdfAssetId": {
+ "type": "string",
+ "nullable": true
+ },
"fullPageArchiveAssetId": {
"type": "string",
"nullable": true
@@ -318,6 +322,7 @@
"enum": [
"linkHtmlContent",
"screenshot",
+ "pdf",
"assetScreenshot",
"bannerImage",
"fullPageArchive",
@@ -1741,6 +1746,7 @@
"enum": [
"linkHtmlContent",
"screenshot",
+ "pdf",
"assetScreenshot",
"bannerImage",
"fullPageArchive",
@@ -1777,6 +1783,7 @@
"enum": [
"linkHtmlContent",
"screenshot",
+ "pdf",
"assetScreenshot",
"bannerImage",
"fullPageArchive",
diff --git a/packages/shared-server/src/queues.ts b/packages/shared-server/src/queues.ts
index 8ee50df0..140d9c0b 100644
--- a/packages/shared-server/src/queues.ts
+++ b/packages/shared-server/src/queues.ts
@@ -21,6 +21,7 @@ export const zCrawlLinkRequestSchema = z.object({
bookmarkId: z.string(),
runInference: z.boolean().optional(),
archiveFullPage: z.boolean().optional().default(false),
+ storePdf: z.boolean().optional().default(false),
});
export type ZCrawlLinkRequest = z.input<typeof zCrawlLinkRequestSchema>;
diff --git a/packages/shared/config.ts b/packages/shared/config.ts
index e956c0bc..191e9ecf 100644
--- a/packages/shared/config.ts
+++ b/packages/shared/config.ts
@@ -99,6 +99,7 @@ const allEnv = z.object({
CRAWLER_DOWNLOAD_BANNER_IMAGE: stringBool("true"),
CRAWLER_STORE_SCREENSHOT: stringBool("true"),
CRAWLER_FULL_PAGE_SCREENSHOT: stringBool("false"),
+ CRAWLER_STORE_PDF: stringBool("false"),
CRAWLER_FULL_PAGE_ARCHIVE: stringBool("false"),
CRAWLER_VIDEO_DOWNLOAD: stringBool("false"),
CRAWLER_VIDEO_DOWNLOAD_MAX_SIZE: z.coerce.number().default(50),
@@ -301,6 +302,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
downloadBannerImage: val.CRAWLER_DOWNLOAD_BANNER_IMAGE,
storeScreenshot: val.CRAWLER_STORE_SCREENSHOT,
fullPageScreenshot: val.CRAWLER_FULL_PAGE_SCREENSHOT,
+ storePdf: val.CRAWLER_STORE_PDF,
fullPageArchive: val.CRAWLER_FULL_PAGE_ARCHIVE,
downloadVideo: val.CRAWLER_VIDEO_DOWNLOAD,
maxVideoDownloadSize: val.CRAWLER_VIDEO_DOWNLOAD_MAX_SIZE,
diff --git a/packages/shared/types/bookmarks.ts b/packages/shared/types/bookmarks.ts
index 8a294422..0b9cf4ee 100644
--- a/packages/shared/types/bookmarks.ts
+++ b/packages/shared/types/bookmarks.ts
@@ -18,6 +18,7 @@ export type ZSortOrder = z.infer<typeof zSortOrder>;
export const zAssetTypesSchema = z.enum([
"linkHtmlContent",
"screenshot",
+ "pdf",
"assetScreenshot",
"bannerImage",
"fullPageArchive",
@@ -44,6 +45,7 @@ export const zBookmarkedLinkSchema = z.object({
imageUrl: z.string().nullish(),
imageAssetId: z.string().nullish(),
screenshotAssetId: z.string().nullish(),
+ pdfAssetId: z.string().nullish(),
fullPageArchiveAssetId: z.string().nullish(),
precrawledArchiveAssetId: z.string().nullish(),
videoAssetId: z.string().nullish(),
diff --git a/packages/trpc/lib/attachments.ts b/packages/trpc/lib/attachments.ts
index fb9e2079..f3170c22 100644
--- a/packages/trpc/lib/attachments.ts
+++ b/packages/trpc/lib/attachments.ts
@@ -9,6 +9,7 @@ import {
export function mapDBAssetTypeToUserType(assetType: AssetTypes): ZAssetType {
const map: Record<AssetTypes, z.infer<typeof zAssetTypesSchema>> = {
[AssetTypes.LINK_SCREENSHOT]: "screenshot",
+ [AssetTypes.LINK_PDF]: "pdf",
[AssetTypes.ASSET_SCREENSHOT]: "assetScreenshot",
[AssetTypes.LINK_FULL_PAGE_ARCHIVE]: "fullPageArchive",
[AssetTypes.LINK_PRECRAWLED_ARCHIVE]: "precrawledArchive",
@@ -29,6 +30,7 @@ export function mapSchemaAssetTypeToDB(
): AssetTypes {
const map: Record<ZAssetType, AssetTypes> = {
screenshot: AssetTypes.LINK_SCREENSHOT,
+ pdf: AssetTypes.LINK_PDF,
assetScreenshot: AssetTypes.ASSET_SCREENSHOT,
fullPageArchive: AssetTypes.LINK_FULL_PAGE_ARCHIVE,
precrawledArchive: AssetTypes.LINK_PRECRAWLED_ARCHIVE,
@@ -46,6 +48,7 @@ export function mapSchemaAssetTypeToDB(
export function humanFriendlyNameForAssertType(type: ZAssetType) {
const map: Record<ZAssetType, string> = {
screenshot: "Screenshot",
+ pdf: "PDF",
assetScreenshot: "Asset Screenshot",
fullPageArchive: "Full Page Archive",
precrawledArchive: "Precrawled Archive",
@@ -63,6 +66,7 @@ export function humanFriendlyNameForAssertType(type: ZAssetType) {
export function isAllowedToAttachAsset(type: ZAssetType) {
const map: Record<ZAssetType, boolean> = {
screenshot: true,
+ pdf: true,
assetScreenshot: true,
fullPageArchive: false,
precrawledArchive: true,
@@ -80,6 +84,7 @@ export function isAllowedToAttachAsset(type: ZAssetType) {
export function isAllowedToDetachAsset(type: ZAssetType) {
const map: Record<ZAssetType, boolean> = {
screenshot: true,
+ pdf: true,
assetScreenshot: true,
fullPageArchive: true,
precrawledArchive: true,
diff --git a/packages/trpc/models/bookmarks.ts b/packages/trpc/models/bookmarks.ts
index 7ecbcfed..a8b30fc5 100644
--- a/packages/trpc/models/bookmarks.ts
+++ b/packages/trpc/models/bookmarks.ts
@@ -161,6 +161,7 @@ export class Bookmark extends BareBookmark {
screenshotAssetId: assets.find(
(a) => a.assetType == AssetTypes.LINK_SCREENSHOT,
)?.id,
+ pdfAssetId: assets.find((a) => a.assetType == AssetTypes.LINK_PDF)?.id,
fullPageArchiveAssetId: assets.find(
(a) => a.assetType == AssetTypes.LINK_FULL_PAGE_ARCHIVE,
)?.id,
@@ -525,6 +526,9 @@ export class Bookmark extends BareBookmark {
if (row.assets.assetType == AssetTypes.LINK_SCREENSHOT) {
content.screenshotAssetId = row.assets.id;
}
+ if (row.assets.assetType == AssetTypes.LINK_PDF) {
+ content.pdfAssetId = row.assets.id;
+ }
if (row.assets.assetType == AssetTypes.LINK_FULL_PAGE_ARCHIVE) {
content.fullPageArchiveAssetId = row.assets.id;
}
diff --git a/packages/trpc/routers/bookmarks.ts b/packages/trpc/routers/bookmarks.ts
index a9d0df38..fb9b4697 100644
--- a/packages/trpc/routers/bookmarks.ts
+++ b/packages/trpc/routers/bookmarks.ts
@@ -562,6 +562,7 @@ export const bookmarksAppRouter = router({
z.object({
bookmarkId: z.string(),
archiveFullPage: z.boolean().optional().default(false),
+ storePdf: z.boolean().optional().default(false),
}),
)
.use(ensureBookmarkOwnership)
@@ -577,6 +578,7 @@ export const bookmarksAppRouter = router({
{
bookmarkId: input.bookmarkId,
archiveFullPage: input.archiveFullPage,
+ storePdf: input.storePdf,
},
{
groupId: ctx.user.id,