diff options
| author | Mohamed Bassem <me@mbassem.com> | 2025-01-11 18:09:51 +0000 |
|---|---|---|
| committer | Mohamed Bassem <me@mbassem.com> | 2025-01-11 18:09:51 +0000 |
| commit | 10506173cd5309e7c63d83055243abc67cecad4f (patch) | |
| tree | f37f7dd704c63e34a1e5b0bffdda442b03179d9c | |
| parent | 107d923b3abd60329463957ca4604107b3427b2c (diff) | |
| download | karakeep-10506173cd5309e7c63d83055243abc67cecad4f.tar.zst | |
feat: Add support for singlefile extension uploads. #172
| -rw-r--r-- | apps/web/app/api/assets/route.ts | 75 | ||||
| -rw-r--r-- | apps/web/app/api/v1/bookmarks/singlefile/route.ts | 54 | ||||
| -rw-r--r-- | apps/web/components/dashboard/preview/AttachmentBox.tsx | 1 | ||||
| -rw-r--r-- | apps/workers/crawlerWorker.ts | 36 | ||||
| -rw-r--r-- | apps/workers/workerUtils.ts | 3 | ||||
| -rw-r--r-- | docs/docs/14-Guides/03-singlefile.md | 20 | ||||
| -rw-r--r-- | packages/db/schema.ts | 2 | ||||
| -rw-r--r-- | packages/e2e_tests/tests/api/bookmarks.test.ts | 51 | ||||
| -rw-r--r-- | packages/open-api/hoarder-openapi-spec.json | 6 | ||||
| -rw-r--r-- | packages/sdk/src/hoarder-api.d.ts | 4 | ||||
| -rw-r--r-- | packages/shared/assetdb.ts | 7 | ||||
| -rw-r--r-- | packages/shared/types/bookmarks.ts | 7 | ||||
| -rw-r--r-- | packages/trpc/lib/attachments.ts | 5 | ||||
| -rw-r--r-- | packages/trpc/routers/bookmarks.ts | 41 |
14 files changed, 275 insertions, 37 deletions
diff --git a/apps/web/app/api/assets/route.ts b/apps/web/app/api/assets/route.ts index 0e52ff93..81ee454e 100644 --- a/apps/web/app/api/assets/route.ts +++ b/apps/web/app/api/assets/route.ts @@ -9,43 +9,43 @@ import { SUPPORTED_UPLOAD_ASSET_TYPES, } from "@hoarder/shared/assetdb"; import serverConfig from "@hoarder/shared/config"; +import { AuthedContext } from "@hoarder/trpc"; const MAX_UPLOAD_SIZE_BYTES = serverConfig.maxAssetSizeMb * 1024 * 1024; export const dynamic = "force-dynamic"; -export async function POST(request: Request) { - const ctx = await createContextFromRequest(request); - if (!ctx.user) { - return Response.json({ error: "Unauthorized" }, { status: 401 }); - } - if (serverConfig.demoMode) { - throw new TRPCError({ - message: "Mutations are not allowed in demo mode", - code: "FORBIDDEN", - }); - } - const formData = await request.formData(); + +export async function uploadFromPostData( + user: AuthedContext["user"], + db: AuthedContext["db"], + formData: FormData, +): Promise< + | { error: string; status: number } + | { + assetId: string; + contentType: string; + fileName: string; + size: number; + } +> { const data = formData.get("file") ?? formData.get("image"); let buffer; let contentType; if (data instanceof File) { contentType = data.type; if (!SUPPORTED_UPLOAD_ASSET_TYPES.has(contentType)) { - return Response.json( - { error: "Unsupported asset type" }, - { status: 400 }, - ); + return { error: "Unsupported asset type", status: 400 }; } if (data.size > MAX_UPLOAD_SIZE_BYTES) { - return Response.json({ error: "Asset is too big" }, { status: 413 }); + return { error: "Asset is too big", status: 413 }; } buffer = Buffer.from(await data.arrayBuffer()); } else { - return Response.json({ error: "Bad request" }, { status: 400 }); + return { error: "Bad request", status: 400 }; } const fileName = data.name; - const [assetDb] = await ctx.db + const [assetDb] = await db .insert(assets) .values({ id: newAssetId(), @@ -53,25 +53,50 @@ export async function POST(request: Request) { // And without an attached bookmark. assetType: AssetTypes.UNKNOWN, bookmarkId: null, - userId: ctx.user.id, + userId: user.id, contentType, size: data.size, fileName, }) .returning(); - const assetId = assetDb.id; await saveAsset({ - userId: ctx.user.id, - assetId, + userId: user.id, + assetId: assetDb.id, metadata: { contentType, fileName }, asset: buffer, }); - return Response.json({ - assetId, + return { + assetId: assetDb.id, contentType, size: buffer.byteLength, fileName, + }; +} + +export async function POST(request: Request) { + const ctx = await createContextFromRequest(request); + if (ctx.user === null) { + return Response.json({ error: "Unauthorized" }, { status: 401 }); + } + if (serverConfig.demoMode) { + throw new TRPCError({ + message: "Mutations are not allowed in demo mode", + code: "FORBIDDEN", + }); + } + const formData = await request.formData(); + + const resp = await uploadFromPostData(ctx.user, ctx.db, formData); + if ("error" in resp) { + return Response.json({ error: resp.error }, { status: resp.status }); + } + + return Response.json({ + assetId: resp.assetId, + contentType: resp.contentType, + size: resp.size, + fileName: resp.fileName, } satisfies ZUploadResponse); } diff --git a/apps/web/app/api/v1/bookmarks/singlefile/route.ts b/apps/web/app/api/v1/bookmarks/singlefile/route.ts new file mode 100644 index 00000000..3f8ac2f7 --- /dev/null +++ b/apps/web/app/api/v1/bookmarks/singlefile/route.ts @@ -0,0 +1,54 @@ +import { createContextFromRequest } from "@/server/api/client"; +import { TRPCError } from "@trpc/server"; + +import serverConfig from "@hoarder/shared/config"; +import { BookmarkTypes } from "@hoarder/shared/types/bookmarks"; +import { createCallerFactory } from "@hoarder/trpc"; +import { appRouter } from "@hoarder/trpc/routers/_app"; + +import { uploadFromPostData } from "../../../assets/route"; + +export const dynamic = "force-dynamic"; + +export async function POST(req: Request) { + const ctx = await createContextFromRequest(req); + if (!ctx.user) { + return Response.json({ error: "Unauthorized" }, { status: 401 }); + } + if (serverConfig.demoMode) { + throw new TRPCError({ + message: "Mutations are not allowed in demo mode", + code: "FORBIDDEN", + }); + } + const formData = await req.formData(); + const up = await uploadFromPostData(ctx.user, ctx.db, formData); + + if ("error" in up) { + return Response.json({ error: up.error }, { status: up.status }); + } + + const url = formData.get("url"); + if (!url) { + throw new TRPCError({ + message: "URL is required", + code: "BAD_REQUEST", + }); + } + if (typeof url !== "string") { + throw new TRPCError({ + message: "URL must be a string", + code: "BAD_REQUEST", + }); + } + + const createCaller = createCallerFactory(appRouter); + const api = createCaller(ctx); + + const bookmark = await api.bookmarks.createBookmark({ + type: BookmarkTypes.LINK, + url, + precrawledArchiveId: up.assetId, + }); + return Response.json(bookmark, { status: 201 }); +} diff --git a/apps/web/components/dashboard/preview/AttachmentBox.tsx b/apps/web/components/dashboard/preview/AttachmentBox.tsx index 32184c30..6547ae51 100644 --- a/apps/web/components/dashboard/preview/AttachmentBox.tsx +++ b/apps/web/components/dashboard/preview/AttachmentBox.tsx @@ -46,6 +46,7 @@ export default function AttachmentBox({ bookmark }: { bookmark: ZBookmark }) { const typeToIcon: Record<ZAssetType, React.ReactNode> = { screenshot: <Camera className="size-4" />, fullPageArchive: <Archive className="size-4" />, + precrawledArchive: <Archive className="size-4" />, bannerImage: <Image className="size-4" />, video: <Video className="size-4" />, bookmarkAsset: <Paperclip className="size-4" />, diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index 252da3b2..16b1f4ae 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -41,6 +41,7 @@ import { getAssetSize, IMAGE_ASSET_TYPES, newAssetId, + readAsset, saveAsset, saveAssetFromFile, silentDeleteAsset, @@ -582,14 +583,35 @@ async function crawlAndParseUrl( oldScreenshotAssetId: string | undefined, oldImageAssetId: string | undefined, oldFullPageArchiveAssetId: string | undefined, + precrawledArchiveAssetId: string | undefined, archiveFullPage: boolean, ) { - const { - htmlContent, - screenshot, - statusCode, - url: browserUrl, - } = await crawlPage(jobId, url); + let result: { + htmlContent: string; + screenshot: Buffer | undefined; + statusCode: number | null; + url: string; + }; + + if (precrawledArchiveAssetId) { + logger.info( + `[Crawler][${jobId}] The page has been precrawled. Will use the precrawled archive instead.`, + ); + const asset = await readAsset({ + userId, + assetId: precrawledArchiveAssetId, + }); + result = { + htmlContent: asset.asset.toString(), + screenshot: undefined, + statusCode: 200, + url, + }; + } else { + result = await crawlPage(jobId, url); + } + + const { htmlContent, screenshot, statusCode, url: browserUrl } = result; const [meta, readableContent, screenshotAssetInfo] = await Promise.all([ extractMetadata(htmlContent, browserUrl, jobId), @@ -701,6 +723,7 @@ async function runCrawler(job: DequeuedJob<ZCrawlLinkRequest>) { screenshotAssetId: oldScreenshotAssetId, imageAssetId: oldImageAssetId, fullPageArchiveAssetId: oldFullPageArchiveAssetId, + precrawledArchiveAssetId, } = await getBookmarkDetails(bookmarkId); logger.info( @@ -730,6 +753,7 @@ async function runCrawler(job: DequeuedJob<ZCrawlLinkRequest>) { oldScreenshotAssetId, oldImageAssetId, oldFullPageArchiveAssetId, + precrawledArchiveAssetId, archiveFullPage, ); diff --git a/apps/workers/workerUtils.ts b/apps/workers/workerUtils.ts index e93d241b..2b365c73 100644 --- a/apps/workers/workerUtils.ts +++ b/apps/workers/workerUtils.ts @@ -44,5 +44,8 @@ export async function getBookmarkDetails(bookmarkId: string) { videoAssetId: bookmark.assets.find(
(a) => a.assetType == AssetTypes.LINK_VIDEO,
)?.id,
+ precrawledArchiveAssetId: bookmark.assets.find(
+ (a) => a.assetType == AssetTypes.LINK_PRECRAWLED_ARCHIVE,
+ )?.id,
};
}
diff --git a/docs/docs/14-Guides/03-singlefile.md b/docs/docs/14-Guides/03-singlefile.md new file mode 100644 index 00000000..9dbba0f6 --- /dev/null +++ b/docs/docs/14-Guides/03-singlefile.md @@ -0,0 +1,20 @@ +# Using Hoarder with SingleFile Extension + +Hoarder supports being a destination for the [SingleFile extension](https://github.com/gildas-lormeau/SingleFile). This has the benefit of allowing you to use the singlefile extension to hoard links as you're seeing them in the browser. This is perfect for websites that don't like to get crawled, has annoying cookie banner or require authentication. + +## Setup + +1. Install the [SingleFile extension](https://github.com/gildas-lormeau/SingleFile). +2. In the extension settings, select `Destinations`. +3. Select `upload to a REST Form API`. +4. In the URL, insert the address: `https://YOUR_SERVER_ADDRESS/api/v1/bookmarks/singlefile`. +5. In the `authorization token` field, paste an API key that you can get from your hoarder settings. +6. Set `data field name` to `file`. +7. Set `URL field name` to `url`. + +Now, go to any page and click the singlefile extension icon. Once it's done with the upload, the bookmark should show up in your hoarder instance. + +:::info +Currently, we don't support screenshots for singlefile uploads, but this will change in the future. +::: + diff --git a/packages/db/schema.ts b/packages/db/schema.ts index 19bf6db5..6498545a 100644 --- a/packages/db/schema.ts +++ b/packages/db/schema.ts @@ -158,6 +158,7 @@ export const enum AssetTypes { LINK_BANNER_IMAGE = "linkBannerImage", LINK_SCREENSHOT = "linkScreenshot", LINK_FULL_PAGE_ARCHIVE = "linkFullPageArchive", + LINK_PRECRAWLED_ARCHIVE = "linkPrecrawledArchive", LINK_VIDEO = "linkVideo", BOOKMARK_ASSET = "bookmarkAsset", UNKNOWN = "unknown", @@ -173,6 +174,7 @@ export const assets = sqliteTable( AssetTypes.LINK_BANNER_IMAGE, AssetTypes.LINK_SCREENSHOT, AssetTypes.LINK_FULL_PAGE_ARCHIVE, + AssetTypes.LINK_PRECRAWLED_ARCHIVE, AssetTypes.LINK_VIDEO, AssetTypes.BOOKMARK_ASSET, AssetTypes.UNKNOWN, diff --git a/packages/e2e_tests/tests/api/bookmarks.test.ts b/packages/e2e_tests/tests/api/bookmarks.test.ts index 7c605aab..df3cefe2 100644 --- a/packages/e2e_tests/tests/api/bookmarks.test.ts +++ b/packages/e2e_tests/tests/api/bookmarks.test.ts @@ -394,4 +394,55 @@ describe("Bookmarks API", () => { expect(finalPage!.bookmarks.length).toBe(1); expect(finalPage!.nextCursor).toBeNull(); }); + + it("should support precrawling via singlefile", async () => { + const file = new File(["<html>HELLO WORLD</html>"], "test.html", { + type: "text/html", + }); + + const formData = new FormData(); + formData.append("url", "https://example.com"); + formData.append("file", file); + + // OpenAPI typescript doesn't support multipart/form-data + // Upload the singlefile archive + const response = await fetch( + `http://localhost:${port}/api/v1/bookmarks/singlefile`, + { + method: "POST", + headers: { + authorization: `Bearer ${apiKey}`, + }, + body: formData, + }, + ); + + if (!response.ok) { + throw new Error(`Failed to upload asset: ${response.statusText}`); + } + + expect(response.status).toBe(201); + + const { id: bookmarkId } = (await response.json()) as { + id: string; + }; + + // Get the created bookmark + const { data: retrievedBookmark, response: getResponse } = await client.GET( + "/bookmarks/{bookmarkId}", + { + params: { + path: { + bookmarkId: bookmarkId, + }, + }, + }, + ); + + expect(getResponse.status).toBe(200); + assert(retrievedBookmark!.content.type === "link"); + expect(retrievedBookmark!.assets.map((a) => a.assetType)).toContain( + "precrawledArchive", + ); + }); }); diff --git a/packages/open-api/hoarder-openapi-spec.json b/packages/open-api/hoarder-openapi-spec.json index 7b2b9436..382733e0 100644 --- a/packages/open-api/hoarder-openapi-spec.json +++ b/packages/open-api/hoarder-openapi-spec.json @@ -256,6 +256,7 @@ "fullPageArchive", "video", "bookmarkAsset", + "precrawledArchive", "unknown" ] } @@ -598,6 +599,9 @@ "url": { "type": "string", "format": "uri" + }, + "precrawledArchiveId": { + "type": "string" } }, "required": [ @@ -1107,6 +1111,7 @@ "fullPageArchive", "video", "bookmarkAsset", + "precrawledArchive", "unknown" ] } @@ -1138,6 +1143,7 @@ "fullPageArchive", "video", "bookmarkAsset", + "precrawledArchive", "unknown" ] } diff --git a/packages/sdk/src/hoarder-api.d.ts b/packages/sdk/src/hoarder-api.d.ts index f4d76a8a..482f6c3c 100644 --- a/packages/sdk/src/hoarder-api.d.ts +++ b/packages/sdk/src/hoarder-api.d.ts @@ -68,6 +68,7 @@ export interface paths { type: "link"; /** Format: uri */ url: string; + precrawledArchiveId?: string; } | { /** @enum {string} */ @@ -426,6 +427,7 @@ export interface paths { | "fullPageArchive" | "video" | "bookmarkAsset" + | "precrawledArchive" | "unknown"; }; }; @@ -446,6 +448,7 @@ export interface paths { | "fullPageArchive" | "video" | "bookmarkAsset" + | "precrawledArchive" | "unknown"; }; }; @@ -1250,6 +1253,7 @@ export interface components { | "fullPageArchive" | "video" | "bookmarkAsset" + | "precrawledArchive" | "unknown"; }[]; }; diff --git a/packages/shared/assetdb.ts b/packages/shared/assetdb.ts index 2ef69279..d2b9eebf 100644 --- a/packages/shared/assetdb.ts +++ b/packages/shared/assetdb.ts @@ -25,6 +25,13 @@ export const IMAGE_ASSET_TYPES: Set<string> = new Set<string>([ // The assets that we allow the users to upload export const SUPPORTED_UPLOAD_ASSET_TYPES: Set<string> = new Set<string>([ ...IMAGE_ASSET_TYPES, + ASSET_TYPES.TEXT_HTML, + ASSET_TYPES.APPLICATION_PDF, +]); + +// The assets that we allow as a bookmark of type asset +export const SUPPORTED_BOOKMARK_ASSET_TYPES: Set<string> = new Set<string>([ + ...IMAGE_ASSET_TYPES, ASSET_TYPES.APPLICATION_PDF, ]); diff --git a/packages/shared/types/bookmarks.ts b/packages/shared/types/bookmarks.ts index a1e39280..0a414ff9 100644 --- a/packages/shared/types/bookmarks.ts +++ b/packages/shared/types/bookmarks.ts @@ -18,6 +18,7 @@ export const zAssetTypesSchema = z.enum([ "fullPageArchive", "video", "bookmarkAsset", + "precrawledArchive", "unknown", ]); export type ZAssetType = z.infer<typeof zAssetTypesSchema>; @@ -126,7 +127,11 @@ export const zNewBookmarkRequestSchema = z }) .and( z.discriminatedUnion("type", [ - z.object({ type: z.literal(BookmarkTypes.LINK), url: z.string().url() }), + z.object({ + type: z.literal(BookmarkTypes.LINK), + url: z.string().url(), + precrawledArchiveId: z.string().optional(), + }), z.object({ type: z.literal(BookmarkTypes.TEXT), text: z.string(), diff --git a/packages/trpc/lib/attachments.ts b/packages/trpc/lib/attachments.ts index 0fd41d1b..f4fda9cd 100644 --- a/packages/trpc/lib/attachments.ts +++ b/packages/trpc/lib/attachments.ts @@ -7,6 +7,7 @@ export function mapDBAssetTypeToUserType(assetType: AssetTypes): ZAssetType { const map: Record<AssetTypes, z.infer<typeof zAssetTypesSchema>> = { [AssetTypes.LINK_SCREENSHOT]: "screenshot", [AssetTypes.LINK_FULL_PAGE_ARCHIVE]: "fullPageArchive", + [AssetTypes.LINK_PRECRAWLED_ARCHIVE]: "precrawledArchive", [AssetTypes.LINK_BANNER_IMAGE]: "bannerImage", [AssetTypes.LINK_VIDEO]: "video", [AssetTypes.BOOKMARK_ASSET]: "bookmarkAsset", @@ -21,6 +22,7 @@ export function mapSchemaAssetTypeToDB( const map: Record<ZAssetType, AssetTypes> = { screenshot: AssetTypes.LINK_SCREENSHOT, fullPageArchive: AssetTypes.LINK_FULL_PAGE_ARCHIVE, + precrawledArchive: AssetTypes.LINK_PRECRAWLED_ARCHIVE, bannerImage: AssetTypes.LINK_BANNER_IMAGE, video: AssetTypes.LINK_VIDEO, bookmarkAsset: AssetTypes.BOOKMARK_ASSET, @@ -33,6 +35,7 @@ export function humanFriendlyNameForAssertType(type: ZAssetType) { const map: Record<ZAssetType, string> = { screenshot: "Screenshot", fullPageArchive: "Full Page Archive", + precrawledArchive: "Precrawled Archive", bannerImage: "Banner Image", video: "Video", bookmarkAsset: "Bookmark Asset", @@ -45,6 +48,7 @@ export function isAllowedToAttachAsset(type: ZAssetType) { const map: Record<ZAssetType, boolean> = { screenshot: true, fullPageArchive: false, + precrawledArchive: false, bannerImage: true, video: false, bookmarkAsset: false, @@ -57,6 +61,7 @@ export function isAllowedToDetachAsset(type: ZAssetType) { const map: Record<ZAssetType, boolean> = { screenshot: true, fullPageArchive: true, + precrawledArchive: false, bannerImage: true, video: true, bookmarkAsset: false, diff --git a/packages/trpc/routers/bookmarks.ts b/packages/trpc/routers/bookmarks.ts index 15e4cb7c..026bd322 100644 --- a/packages/trpc/routers/bookmarks.ts +++ b/packages/trpc/routers/bookmarks.ts @@ -22,7 +22,10 @@ import { rssFeedImportsTable, tagsOnBookmarks, } from "@hoarder/db/schema"; -import { deleteAsset } from "@hoarder/shared/assetdb"; +import { + deleteAsset, + SUPPORTED_BOOKMARK_ASSET_TYPES, +} from "@hoarder/shared/assetdb"; import serverConfig from "@hoarder/shared/config"; import { InferenceClientFactory } from "@hoarder/shared/inference"; import { buildSummaryPrompt } from "@hoarder/shared/prompts"; @@ -98,9 +101,6 @@ export const ensureAssetOwnership = async (opts: { }) => { const asset = await opts.ctx.db.query.assets.findFirst({ where: eq(bookmarks.id, opts.assetId), - columns: { - userId: true, - }, }); if (!opts.ctx.user) { throw new TRPCError({ @@ -120,6 +120,7 @@ export const ensureAssetOwnership = async (opts: { message: "User is not allowed to access resource", }); } + return asset; }; async function getBookmark(ctx: AuthedContext, bookmarkId: string) { @@ -307,6 +308,24 @@ export const bookmarksAppRouter = router({ }) .returning() )[0]; + if (input.precrawledArchiveId) { + await ensureAssetOwnership({ + ctx, + assetId: input.precrawledArchiveId, + }); + await tx + .update(assets) + .set({ + bookmarkId: bookmark.id, + assetType: AssetTypes.LINK_PRECRAWLED_ARCHIVE, + }) + .where( + and( + eq(assets.id, input.precrawledArchiveId), + eq(assets.userId, ctx.user.id), + ), + ); + } content = { type: BookmarkTypes.LINK, ...link, @@ -344,7 +363,19 @@ export const bookmarksAppRouter = router({ sourceUrl: null, }) .returning(); - await ensureAssetOwnership({ ctx, assetId: input.assetId }); + const uploadedAsset = await ensureAssetOwnership({ + ctx, + assetId: input.assetId, + }); + if ( + !uploadedAsset.contentType || + !SUPPORTED_BOOKMARK_ASSET_TYPES.has(uploadedAsset.contentType) + ) { + throw new TRPCError({ + code: "BAD_REQUEST", + message: "Unsupported asset type", + }); + } await tx .update(assets) .set({ |
