diff options
| -rw-r--r-- | apps/workers/workerUtils.ts | 6 | ||||
| -rw-r--r-- | docs/docs/14-Guides/03-singlefile.md | 16 | ||||
| -rw-r--r-- | packages/api/routes/bookmarks.ts | 69 | ||||
| -rw-r--r-- | packages/e2e_tests/tests/api/bookmarks.test.ts | 274 | ||||
| -rw-r--r-- | packages/trpc/lib/attachments.ts | 2 |
5 files changed, 327 insertions, 40 deletions
diff --git a/apps/workers/workerUtils.ts b/apps/workers/workerUtils.ts index dc846177..44180951 100644 --- a/apps/workers/workerUtils.ts +++ b/apps/workers/workerUtils.ts @@ -44,8 +44,8 @@ export async function getBookmarkDetails(bookmarkId: string) { videoAssetId: bookmark.assets.find(
(a) => a.assetType == AssetTypes.LINK_VIDEO,
)?.id,
- precrawledArchiveAssetId: bookmark.assets.find(
- (a) => a.assetType == AssetTypes.LINK_PRECRAWLED_ARCHIVE,
- )?.id,
+ precrawledArchiveAssetId: bookmark.assets
+ .filter((a) => a.assetType == AssetTypes.LINK_PRECRAWLED_ARCHIVE)
+ .at(-1)?.id,
};
}
diff --git a/docs/docs/14-Guides/03-singlefile.md b/docs/docs/14-Guides/03-singlefile.md index bc4acb64..c0f4e174 100644 --- a/docs/docs/14-Guides/03-singlefile.md +++ b/docs/docs/14-Guides/03-singlefile.md @@ -11,9 +11,25 @@ Karakeep supports being a destination for the [SingleFile extension](https://git 5. In the `authorization token` field, paste an API key that you can get from your karakeep settings. 6. Set `data field name` to `file`. 7. Set `URL field name` to `url`. +8. (Optional) Add `&ifexists=MODE` to the URL where MODE is one of `skip`, `overwrite`, `overwrite-recrawl`, `append`, or `append-recrawl`. See "Handling Existing Bookmarks" section below for details. Now, go to any page and click the singlefile extension icon. Once it's done with the upload, the bookmark should show up in your karakeep instance. Note that the singlefile extension doesn't show any progress on the upload. Given that archives are typically large, it might take 30+ seconds until the upload is done and starts showing up in Karakeep. +## Handling Existing Bookmarks + +When uploading a page that already exists in your archive (same URL), you can control the behavior by setting the `ifexists` query parameter in the upload URL. The available modes are: + +- `skip` (default): If the bookmark already exists, skip creating a new one +- `overwrite`: Replace existing precrawled archive (only the most recent archive is kept) +- `overwrite-recrawl`: Replace existing archive and queue a recrawl to update content +- `append`: Add new archive version alongside existing ones +- `append-recrawl`: Add new archive and queue a recrawl + +To use these modes, append `?ifexists=MODE` to your upload URL, replacing `MODE` with your desired behavior. + +For example: +`https://YOUR_SERVER_ADDRESS/api/v1/bookmarks/singlefile?ifexists=overwrite` + ## Recommended settings diff --git a/packages/api/routes/bookmarks.ts b/packages/api/routes/bookmarks.ts index fbc46d2f..abf0daae 100644 --- a/packages/api/routes/bookmarks.ts +++ b/packages/api/routes/bookmarks.ts @@ -90,6 +90,21 @@ const app = new Hono() .post( "/singlefile", zValidator( + "query", + z.object({ + ifexists: z + .enum([ + "skip", + "overwrite", + "overwrite-recrawl", + "append", + "append-recrawl", + ]) + .optional() + .default("skip"), + }), + ), + zValidator( "form", z.object({ url: z.string(), @@ -107,7 +122,59 @@ const app = new Hono() url: form.url, precrawledArchiveId: up.assetId, }); - return c.json(bookmark, 201); + if (bookmark.alreadyExists) { + const ifexists = c.req.valid("query").ifexists; + switch (ifexists) { + case "skip": + break; + case "overwrite-recrawl": + case "overwrite": { + const existingPrecrawledArchiveId = bookmark.assets + .filter((a) => a.assetType == "precrawledArchive") + .at(-1)?.id; + if (existingPrecrawledArchiveId) { + await c.var.api.assets.replaceAsset({ + bookmarkId: bookmark.id, + oldAssetId: existingPrecrawledArchiveId, + newAssetId: up.assetId, + }); + } else { + await c.var.api.assets.attachAsset({ + bookmarkId: bookmark.id, + asset: { + id: up.assetId, + assetType: "precrawledArchive", + }, + }); + } + if (ifexists == "overwrite-recrawl") { + await c.var.api.bookmarks.recrawlBookmark({ + bookmarkId: bookmark.id, + }); + } + break; + } + case "append-recrawl": + case "append": { + await c.var.api.assets.attachAsset({ + bookmarkId: bookmark.id, + asset: { + id: up.assetId, + assetType: "precrawledArchive", + }, + }); + if (ifexists == "append-recrawl") { + await c.var.api.bookmarks.recrawlBookmark({ + bookmarkId: bookmark.id, + }); + } + break; + } + } + return c.json(bookmark, 200); + } else { + return c.json(bookmark, 201); + } }, ) diff --git a/packages/e2e_tests/tests/api/bookmarks.test.ts b/packages/e2e_tests/tests/api/bookmarks.test.ts index 6c56f689..d40c1add 100644 --- a/packages/e2e_tests/tests/api/bookmarks.test.ts +++ b/packages/e2e_tests/tests/api/bookmarks.test.ts @@ -397,54 +397,258 @@ describe("Bookmarks API", () => { expect(finalPage!.nextCursor).toBeNull(); }); - it("should support precrawling via singlefile", async () => { - const file = new File(["<html>HELLO WORLD</html>"], "test.html", { - type: "text/html", - }); + describe("singlefile", () => { + async function uploadSinglefileAsset(ifexists?: string) { + const file = new File(["<html>HELLO WORLD</html>"], "test.html", { + type: "text/html", + }); - const formData = new FormData(); - formData.append("url", "https://example.com"); - formData.append("file", file); + const formData = new FormData(); + formData.append("url", "https://example.com"); + formData.append("file", file); - // OpenAPI typescript doesn't support multipart/form-data - // Upload the singlefile archive - const response = await fetch( - `http://localhost:${port}/api/v1/bookmarks/singlefile`, - { + const url = new URL( + `http://localhost:${port}/api/v1/bookmarks/singlefile`, + ); + if (ifexists) { + url.searchParams.append("ifexists", ifexists); + } + + const response = await fetch(url.toString(), { method: "POST", headers: { authorization: `Bearer ${apiKey}`, }, body: formData, - }, - ); + }); - if (!response.ok) { - throw new Error(`Failed to upload asset: ${response.statusText}`); + if (!response.ok) { + return [null, response] as const; + } + + const data = (await response.json()) as { id: string }; + return [data, response] as const; } - expect(response.status).toBe(201); + it("should support precrawling via singlefile with ifexists=skip", async () => { + // First upload: create a bookmark + const [data, response] = await uploadSinglefileAsset(); + expect(response?.status).toBe(201); + const bookmarkId = data?.id; + if (!bookmarkId) throw new Error("Bookmark ID not found"); + + // Get the bookmark and record the precrawled asset id + const { data: bookmark, response: getResponse1 } = await client.GET( + "/bookmarks/{bookmarkId}", + { + params: { path: { bookmarkId } }, + }, + ); + expect(getResponse1.status).toBe(200); + const assetIds = bookmark!.assets + .filter((a) => a.assetType === "precrawledArchive") + .map((a) => a.id); + expect(assetIds.length).toBe(1); + const firstAssetId = assetIds[0]; + + // Second upload with skip + const [data2, response2] = await uploadSinglefileAsset("skip"); + expect(response2?.status).toBe(200); + expect(data2?.id).toBe(bookmarkId); + + // Get the bookmark again + const { data: bookmark2, response: getResponse2 } = await client.GET( + "/bookmarks/{bookmarkId}", + { + params: { path: { bookmarkId } }, + }, + ); + expect(getResponse2.status).toBe(200); + const assetIds2 = bookmark2!.assets + .filter((a) => a.assetType === "precrawledArchive") + .map((a) => a.id); + expect(assetIds2).toEqual([firstAssetId]); // same asset + }); - const { id: bookmarkId } = (await response.json()) as { - id: string; - }; + it("should support precrawling via singlefile with ifexists=overwrite", async () => { + // First upload + const [data, response] = await uploadSinglefileAsset("overwrite"); + expect(response?.status).toBe(201); + const bookmarkId = data?.id; + if (!bookmarkId) throw new Error("Bookmark ID not found"); + + // Record the asset + const { data: bookmark, response: getResponse1 } = await client.GET( + "/bookmarks/{bookmarkId}", + { + params: { path: { bookmarkId } }, + }, + ); + expect(getResponse1.status).toBe(200); + const firstAssetId = bookmark!.assets.find( + (a) => a.assetType === "precrawledArchive", + )?.id; + expect(firstAssetId).toBeDefined(); + + // Second upload with overwrite + const [data2, response2] = await uploadSinglefileAsset("overwrite"); + expect(response2?.status).toBe(200); + expect(data2?.id).toBe(bookmarkId); + + // Get the bookmark again + const { data: bookmark2, response: getResponse2 } = await client.GET( + "/bookmarks/{bookmarkId}", + { + params: { path: { bookmarkId } }, + }, + ); + expect(getResponse2.status).toBe(200); + const secondAssetId = bookmark2!.assets.find( + (a) => a.assetType === "precrawledArchive", + )?.id; + expect(secondAssetId).toBeDefined(); + expect(secondAssetId).not.toBe(firstAssetId); + // There should be only one precrawledArchive asset + const precrawledAssets = bookmark2!.assets.filter( + (a) => a.assetType === "precrawledArchive", + ); + expect(precrawledAssets.length).toBe(1); + }); - // Get the created bookmark - const { data: retrievedBookmark, response: getResponse } = await client.GET( - "/bookmarks/{bookmarkId}", - { - params: { - path: { - bookmarkId: bookmarkId, - }, + it("should support precrawling via singlefile with ifexists=overwrite-recrawl", async () => { + // First upload + const [data, response] = await uploadSinglefileAsset("overwrite-recrawl"); + expect(response?.status).toBe(201); + const bookmarkId = data?.id; + if (!bookmarkId) throw new Error("Bookmark ID not found"); + + // Record the asset + const { data: bookmark, response: getResponse1 } = await client.GET( + "/bookmarks/{bookmarkId}", + { + params: { path: { bookmarkId } }, }, - }, - ); + ); + expect(getResponse1.status).toBe(200); + const firstAssetId = bookmark!.assets.find( + (a) => a.assetType === "precrawledArchive", + )?.id; + expect(firstAssetId).toBeDefined(); + + // Second upload with overwrite-recrawl + const [data2, response2] = + await uploadSinglefileAsset("overwrite-recrawl"); + expect(response2?.status).toBe(200); + expect(data2?.id).toBe(bookmarkId); + + // Get the bookmark again + const { data: bookmark2, response: getResponse2 } = await client.GET( + "/bookmarks/{bookmarkId}", + { + params: { path: { bookmarkId } }, + }, + ); + expect(getResponse2.status).toBe(200); + const secondAssetId = bookmark2!.assets.find( + (a) => a.assetType === "precrawledArchive", + )?.id; + expect(secondAssetId).toBeDefined(); + expect(secondAssetId).not.toBe(firstAssetId); + // There should be only one precrawledArchive asset + const precrawledAssets = bookmark2!.assets.filter( + (a) => a.assetType === "precrawledArchive", + ); + expect(precrawledAssets.length).toBe(1); + }); - expect(getResponse.status).toBe(200); - assert(retrievedBookmark!.content.type === "link"); - expect(retrievedBookmark!.assets.map((a) => a.assetType)).toContain( - "precrawledArchive", - ); + it("should support precrawling via singlefile with ifexists=append", async () => { + // First upload + const [data, response] = await uploadSinglefileAsset("append"); + expect(response?.status).toBe(201); + const bookmarkId = data?.id; + if (!bookmarkId) throw new Error("Bookmark ID not found"); + + // Record the first asset + const { data: bookmark, response: getResponse1 } = await client.GET( + "/bookmarks/{bookmarkId}", + { + params: { path: { bookmarkId } }, + }, + ); + expect(getResponse1.status).toBe(200); + const firstAssetId = bookmark!.assets.find( + (a) => a.assetType === "precrawledArchive", + )?.id; + expect(firstAssetId).toBeDefined(); + + // Second upload with append + const [data2, response2] = await uploadSinglefileAsset("append"); + expect(response2?.status).toBe(200); + expect(data2?.id).toBe(bookmarkId); + + // Get the bookmark again + const { data: bookmark2, response: getResponse2 } = await client.GET( + "/bookmarks/{bookmarkId}", + { + params: { path: { bookmarkId } }, + }, + ); + expect(getResponse2.status).toBe(200); + const precrawledAssets = bookmark2!.assets.filter( + (a) => a.assetType === "precrawledArchive", + ); + expect(precrawledAssets.length).toBe(2); + expect(precrawledAssets.map((a) => a.id)).toContain(firstAssetId); + // The second asset id should be different + const secondAssetId = precrawledAssets.find( + (asset) => asset.id !== firstAssetId, + )?.id; + expect(secondAssetId).toBeDefined(); + }); + + it("should support precrawling via singlefile with ifexists=append-recrawl", async () => { + // First upload + const [data, response] = await uploadSinglefileAsset("append-recrawl"); + expect(response?.status).toBe(201); + const bookmarkId = data?.id; + if (!bookmarkId) throw new Error("Bookmark ID not found"); + + // Record the first asset + const { data: bookmark, response: getResponse1 } = await client.GET( + "/bookmarks/{bookmarkId}", + { + params: { path: { bookmarkId } }, + }, + ); + expect(getResponse1.status).toBe(200); + const firstAssetId = bookmark!.assets.find( + (a) => a.assetType === "precrawledArchive", + )?.id; + expect(firstAssetId).toBeDefined(); + + // Second upload with append-recrawl + const [data2, response2] = await uploadSinglefileAsset("append-recrawl"); + expect(response2?.status).toBe(200); + expect(data2?.id).toBe(bookmarkId); + + // Get the bookmark again + const { data: bookmark2, response: getResponse2 } = await client.GET( + "/bookmarks/{bookmarkId}", + { + params: { path: { bookmarkId } }, + }, + ); + expect(getResponse2.status).toBe(200); + const precrawledAssets = bookmark2!.assets.filter( + (a) => a.assetType === "precrawledArchive", + ); + expect(precrawledAssets.length).toBe(2); + expect(precrawledAssets.map((a) => a.id)).toContain(firstAssetId); + // The second asset id should be different + const secondAssetId = precrawledAssets.find( + (asset) => asset.id !== firstAssetId, + )?.id; + expect(secondAssetId).toBeDefined(); + }); }); }); diff --git a/packages/trpc/lib/attachments.ts b/packages/trpc/lib/attachments.ts index 15cbba74..739aa8f5 100644 --- a/packages/trpc/lib/attachments.ts +++ b/packages/trpc/lib/attachments.ts @@ -55,7 +55,7 @@ export function isAllowedToAttachAsset(type: ZAssetType) { screenshot: true, assetScreenshot: true, fullPageArchive: false, - precrawledArchive: false, + precrawledArchive: true, bannerImage: true, video: false, bookmarkAsset: false, |
