diff options
| author | Ahmad Mujahid <55625580+AhmadMuj@users.noreply.github.com> | 2025-02-17 13:25:16 +0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-02-17 09:25:16 +0000 |
| commit | e5cb9aa848009ea22c1385e4d33b7edf372979fb (patch) | |
| tree | 89470d8da8aab10f30bbfccea8d1b0cea08a1408 /packages | |
| parent | a14be108736133535e2828b6bbdc8d0a69accd63 (diff) | |
| download | karakeep-e5cb9aa848009ea22c1385e4d33b7edf372979fb.tar.zst | |
feat: Add PDF screenshot generation and display (#995)
* Updated pdf2json to 3.1.5
* Extract and store a screenshot from PDF files using pdf2pic
* Installing graphicsmagick and ghostscript
* Generate Missing PDF screenshot with tidyAssets worker for backward support
* Display PDF screenshot instead of the PDF in web if it exists.
* Display PDF screenshot in mobile app if exists.
* Updated pnpm-lock.yaml
* Removed console.log
* Revert the unnecessary changes in package.json
* Revert pnpm-lock changes
* Prevent rendering PDF files if the screenshot is not generated
* refactor: replace useEffect with useMemo for section initialization
* feat: show PDF file download button and handle large PDFs by defaulting to screenshot view
* feat: add file size to openapi spec
* feature: Add Assets preprocessing in fix mode to admin actions
* i18n: add reprocess_assets_fix_mode translation
* i18n: Add missing ar translations
* A bunch of fixes
* Fix openspec schema
---------
Co-authored-by: Mohamed Bassem <me@mbassem.com>
Diffstat (limited to 'packages')
| -rw-r--r-- | packages/db/schema.ts | 2 | ||||
| -rw-r--r-- | packages/open-api/hoarder-openapi-spec.json | 7 | ||||
| -rw-r--r-- | packages/shared/assetdb.ts | 33 | ||||
| -rw-r--r-- | packages/shared/queues.ts | 8 | ||||
| -rw-r--r-- | packages/shared/types/bookmarks.ts | 2 | ||||
| -rw-r--r-- | packages/trpc/lib/attachments.ts | 5 | ||||
| -rw-r--r-- | packages/trpc/routers/admin.ts | 10 | ||||
| -rw-r--r-- | packages/trpc/routers/bookmarks.ts | 10 |
8 files changed, 77 insertions, 0 deletions
diff --git a/packages/db/schema.ts b/packages/db/schema.ts index 6bd67448..111081b8 100644 --- a/packages/db/schema.ts +++ b/packages/db/schema.ts @@ -164,6 +164,7 @@ export const bookmarkLinks = sqliteTable( export const enum AssetTypes { LINK_BANNER_IMAGE = "linkBannerImage", LINK_SCREENSHOT = "linkScreenshot", + ASSET_SCREENSHOT = "assetScreenshot", LINK_FULL_PAGE_ARCHIVE = "linkFullPageArchive", LINK_PRECRAWLED_ARCHIVE = "linkPrecrawledArchive", LINK_VIDEO = "linkVideo", @@ -180,6 +181,7 @@ export const assets = sqliteTable( enum: [ AssetTypes.LINK_BANNER_IMAGE, AssetTypes.LINK_SCREENSHOT, + AssetTypes.ASSET_SCREENSHOT, AssetTypes.LINK_FULL_PAGE_ARCHIVE, AssetTypes.LINK_PRECRAWLED_ARCHIVE, AssetTypes.LINK_VIDEO, diff --git a/packages/open-api/hoarder-openapi-spec.json b/packages/open-api/hoarder-openapi-spec.json index 182cf3b0..3af444b8 100644 --- a/packages/open-api/hoarder-openapi-spec.json +++ b/packages/open-api/hoarder-openapi-spec.json @@ -224,6 +224,10 @@ "sourceUrl": { "type": "string", "nullable": true + }, + "size": { + "type": "number", + "nullable": true } }, "required": [ @@ -260,6 +264,7 @@ "type": "string", "enum": [ "screenshot", + "assetScreenshot", "bannerImage", "fullPageArchive", "video", @@ -1121,6 +1126,7 @@ "type": "string", "enum": [ "screenshot", + "assetScreenshot", "bannerImage", "fullPageArchive", "video", @@ -1153,6 +1159,7 @@ "type": "string", "enum": [ "screenshot", + "assetScreenshot", "bannerImage", "fullPageArchive", "video", diff --git a/packages/shared/assetdb.ts b/packages/shared/assetdb.ts index 89738fcf..974f7893 100644 --- a/packages/shared/assetdb.ts +++ b/packages/shared/assetdb.ts @@ -4,6 +4,7 @@ import { Glob } from "glob"; import { z } from "zod"; import serverConfig from "./config"; +import logger from "./logger"; const ROOT_PATH = path.join(serverConfig.dataDir, "assets"); @@ -241,3 +242,35 @@ export async function* getAllAssets() { }; } } + +export async function storeScreenshot( + screenshot: Buffer | undefined, + userId: string, + jobId: string, +) { + if (!serverConfig.crawler.storeScreenshot) { + logger.info( + `[Crawler][${jobId}] Skipping storing the screenshot as per the config.`, + ); + return null; + } + if (!screenshot) { + logger.info( + `[Crawler][${jobId}] Skipping storing the screenshot as it's empty.`, + ); + return null; + } + const assetId = newAssetId(); + const contentType = "image/png"; + const fileName = "screenshot.png"; + await saveAsset({ + userId, + assetId, + metadata: { contentType, fileName }, + asset: screenshot, + }); + logger.info( + `[Crawler][${jobId}] Stored the screenshot as assetId: ${assetId}`, + ); + return { assetId, contentType, fileName, size: screenshot.byteLength }; +} diff --git a/packages/shared/queues.ts b/packages/shared/queues.ts index cbe58f8d..5484ffb2 100644 --- a/packages/shared/queues.ts +++ b/packages/shared/queues.ts @@ -98,6 +98,13 @@ export async function triggerSearchDeletion(bookmarkId: string) { }); } +export async function triggerReprocessingFixMode(bookmarkId: string) { + await AssetPreprocessingQueue.enqueue({ + bookmarkId, + fixMode: true, + }); +} + export const zvideoRequestSchema = z.object({ bookmarkId: z.string(), url: z.string(), @@ -143,6 +150,7 @@ export const FeedQueue = new SqliteQueue<ZFeedRequestSchema>( // Preprocess Assets export const zAssetPreprocessingRequestSchema = z.object({ bookmarkId: z.string(), + fixMode: z.boolean().optional().default(false), }); export type AssetPreprocessingRequest = z.infer< typeof zAssetPreprocessingRequestSchema diff --git a/packages/shared/types/bookmarks.ts b/packages/shared/types/bookmarks.ts index b6a74474..9644095c 100644 --- a/packages/shared/types/bookmarks.ts +++ b/packages/shared/types/bookmarks.ts @@ -17,6 +17,7 @@ export type ZSortOrder = z.infer<typeof zSortOrder>; export const zAssetTypesSchema = z.enum([ "screenshot", + "assetScreenshot", "bannerImage", "fullPageArchive", "video", @@ -61,6 +62,7 @@ export const zBookmarkedAssetSchema = z.object({ assetId: z.string(), fileName: z.string().nullish(), sourceUrl: z.string().nullish(), + size: z.number().nullish(), }); export type ZBookmarkedAsset = z.infer<typeof zBookmarkedAssetSchema>; diff --git a/packages/trpc/lib/attachments.ts b/packages/trpc/lib/attachments.ts index f4fda9cd..3ad79a5a 100644 --- a/packages/trpc/lib/attachments.ts +++ b/packages/trpc/lib/attachments.ts @@ -6,6 +6,7 @@ import { ZAssetType, zAssetTypesSchema } from "@hoarder/shared/types/bookmarks"; export function mapDBAssetTypeToUserType(assetType: AssetTypes): ZAssetType { const map: Record<AssetTypes, z.infer<typeof zAssetTypesSchema>> = { [AssetTypes.LINK_SCREENSHOT]: "screenshot", + [AssetTypes.ASSET_SCREENSHOT]: "assetScreenshot", [AssetTypes.LINK_FULL_PAGE_ARCHIVE]: "fullPageArchive", [AssetTypes.LINK_PRECRAWLED_ARCHIVE]: "precrawledArchive", [AssetTypes.LINK_BANNER_IMAGE]: "bannerImage", @@ -21,6 +22,7 @@ export function mapSchemaAssetTypeToDB( ): AssetTypes { const map: Record<ZAssetType, AssetTypes> = { screenshot: AssetTypes.LINK_SCREENSHOT, + assetScreenshot: AssetTypes.ASSET_SCREENSHOT, fullPageArchive: AssetTypes.LINK_FULL_PAGE_ARCHIVE, precrawledArchive: AssetTypes.LINK_PRECRAWLED_ARCHIVE, bannerImage: AssetTypes.LINK_BANNER_IMAGE, @@ -34,6 +36,7 @@ export function mapSchemaAssetTypeToDB( export function humanFriendlyNameForAssertType(type: ZAssetType) { const map: Record<ZAssetType, string> = { screenshot: "Screenshot", + assetScreenshot: "Asset Screenshot", fullPageArchive: "Full Page Archive", precrawledArchive: "Precrawled Archive", bannerImage: "Banner Image", @@ -47,6 +50,7 @@ export function humanFriendlyNameForAssertType(type: ZAssetType) { export function isAllowedToAttachAsset(type: ZAssetType) { const map: Record<ZAssetType, boolean> = { screenshot: true, + assetScreenshot: true, fullPageArchive: false, precrawledArchive: false, bannerImage: true, @@ -60,6 +64,7 @@ export function isAllowedToAttachAsset(type: ZAssetType) { export function isAllowedToDetachAsset(type: ZAssetType) { const map: Record<ZAssetType, boolean> = { screenshot: true, + assetScreenshot: true, fullPageArchive: true, precrawledArchive: false, bannerImage: true, diff --git a/packages/trpc/routers/admin.ts b/packages/trpc/routers/admin.ts index c7dd7575..6393c950 100644 --- a/packages/trpc/routers/admin.ts +++ b/packages/trpc/routers/admin.ts @@ -9,6 +9,7 @@ import { OpenAIQueue, SearchIndexingQueue, TidyAssetsQueue, + triggerReprocessingFixMode, triggerSearchReindex, } from "@hoarder/shared/queues"; import { @@ -154,6 +155,15 @@ export const adminAppRouter = router({ await Promise.all(bookmarkIds.map((b) => triggerSearchReindex(b.id))); }), + reprocessAssetsFixMode: adminProcedure.mutation(async ({ ctx }) => { + const bookmarkIds = await ctx.db.query.bookmarkAssets.findMany({ + columns: { + id: true, + }, + }); + + await Promise.all(bookmarkIds.map((b) => triggerReprocessingFixMode(b.id))); + }), reRunInferenceOnAllBookmarks: adminProcedure .input( z.object({ diff --git a/packages/trpc/routers/bookmarks.ts b/packages/trpc/routers/bookmarks.ts index 12ec9ccb..6ab863fb 100644 --- a/packages/trpc/routers/bookmarks.ts +++ b/packages/trpc/routers/bookmarks.ts @@ -259,6 +259,7 @@ function toZodSchema(bookmark: BookmarkQueryReturnType): ZBookmark { assetId: asset.assetId, fileName: asset.fileName, sourceUrl: asset.sourceUrl, + size: assets.find((a) => a.id == asset.assetId)?.size, }; break; } @@ -441,6 +442,7 @@ export const bookmarksAppRouter = router({ case BookmarkTypes.ASSET: { await AssetPreprocessingQueue.enqueue({ bookmarkId: bookmark.id, + fixMode: false, }); break; } @@ -830,6 +832,7 @@ export const bookmarksAppRouter = router({ assetType: bookmarkAssets.assetType, fileName: bookmarkAssets.fileName, sourceUrl: bookmarkAssets.sourceUrl ?? null, + size: null, // This will get filled in the asset loop }; break; } @@ -881,6 +884,13 @@ export const bookmarksAppRouter = router({ } acc[bookmarkId].content = content; } + if (acc[bookmarkId].content.type == BookmarkTypes.ASSET) { + const content = acc[bookmarkId].content; + if (row.assets.id == content.assetId) { + // If this is the bookmark's main aset, caputure its size. + content.size = row.assets.size; + } + } acc[bookmarkId].assets.push({ id: row.assets.id, assetType: mapDBAssetTypeToUserType(row.assets.assetType), |
