diff options
| author | Mohamed Bassem <me@mbassem.com> | 2026-01-11 09:27:35 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-01-11 09:27:35 +0000 |
| commit | 0f9132b5a9186accd991492b73b9ef904342df29 (patch) | |
| tree | 4050d433cddc5092dc4dcfc2fae29deef4f0028f /packages | |
| parent | 0e938c14044f66f7ad0ffe3eeda5fa8969a83849 (diff) | |
| download | karakeep-0f9132b5a9186accd991492b73b9ef904342df29.tar.zst | |
feat: privacy-respecting bookmark debugger admin tool (#2373)
* fix: parallelize queue enqueues in bookmark routes
* fix: guard meilisearch client init with mutex
* feat: add bookmark debugging admin tool
* more fixes
* more fixes
* more fixes
Diffstat (limited to 'packages')
| -rw-r--r-- | packages/trpc/models/assets.ts | 25 | ||||
| -rw-r--r-- | packages/trpc/models/bookmarks.ts | 145 | ||||
| -rw-r--r-- | packages/trpc/routers/admin.test.ts | 265 | ||||
| -rw-r--r-- | packages/trpc/routers/admin.ts | 172 |
4 files changed, 592 insertions, 15 deletions
diff --git a/packages/trpc/models/assets.ts b/packages/trpc/models/assets.ts index ad114341..f97cfffb 100644 --- a/packages/trpc/models/assets.ts +++ b/packages/trpc/models/assets.ts @@ -4,7 +4,11 @@ import { z } from "zod"; import { assets } from "@karakeep/db/schema"; import { deleteAsset } from "@karakeep/shared/assetdb"; +import serverConfig from "@karakeep/shared/config"; +import { createSignedToken } from "@karakeep/shared/signedTokens"; +import { zAssetSignedTokenSchema } from "@karakeep/shared/types/assets"; import { zAssetTypesSchema } from "@karakeep/shared/types/bookmarks"; +import { getAssetUrl } from "@karakeep/shared/utils/assetUtils"; import { AuthedContext } from ".."; import { @@ -254,4 +258,25 @@ export class Asset { }); } } + + getUrl() { + return getAssetUrl(this.asset.id); + } + + static getPublicSignedAssetUrl( + assetId: string, + assetOwnerId: string, + expireAt: number, + ) { + const payload: z.infer<typeof zAssetSignedTokenSchema> = { + assetId, + userId: assetOwnerId, + }; + const signedToken = createSignedToken( + payload, + serverConfig.signingSecret(), + expireAt, + ); + return `${serverConfig.publicApiUrl}/public/assets/${assetId}?token=${signedToken}`; + } } diff --git a/packages/trpc/models/bookmarks.ts b/packages/trpc/models/bookmarks.ts index 0700246f..c8cd1f00 100644 --- a/packages/trpc/models/bookmarks.ts +++ b/packages/trpc/models/bookmarks.ts @@ -31,12 +31,7 @@ import { } from "@karakeep/db/schema"; import { SearchIndexingQueue, triggerWebhook } from "@karakeep/shared-server"; import { deleteAsset, readAsset } from "@karakeep/shared/assetdb"; -import serverConfig from "@karakeep/shared/config"; -import { - createSignedToken, - getAlignedExpiry, -} from "@karakeep/shared/signedTokens"; -import { zAssetSignedTokenSchema } from "@karakeep/shared/types/assets"; +import { getAlignedExpiry } from "@karakeep/shared/signedTokens"; import { BookmarkTypes, DEFAULT_NUM_BOOKMARKS_PER_PAGE, @@ -55,6 +50,7 @@ import { htmlToPlainText } from "@karakeep/shared/utils/htmlUtils"; import { AuthedContext } from ".."; import { mapDBAssetTypeToUserType } from "../lib/attachments"; +import { Asset } from "./assets"; import { List } from "./lists"; async function dummyDrizzleReturnType() { @@ -271,6 +267,130 @@ export class Bookmark extends BareBookmark { return new Bookmark(ctx, data); } + static async buildDebugInfo(ctx: AuthedContext, bookmarkId: string) { + // Verify the user is an admin + if (ctx.user.role !== "admin") { + throw new TRPCError({ + code: "FORBIDDEN", + message: "Admin access required", + }); + } + + const PRIVACY_REDACTED_ASSET_TYPES = new Set<AssetTypes>([ + AssetTypes.USER_UPLOADED, + AssetTypes.BOOKMARK_ASSET, + ]); + + const bookmark = await ctx.db.query.bookmarks.findFirst({ + where: eq(bookmarks.id, bookmarkId), + with: { + link: true, + text: true, + asset: true, + tagsOnBookmarks: { + with: { + tag: true, + }, + }, + assets: true, + }, + }); + + if (!bookmark) { + throw new TRPCError({ + code: "NOT_FOUND", + message: "Bookmark not found", + }); + } + + // Build link info + let linkInfo = null; + if (bookmark.link) { + const htmlContentPreview = await (async () => { + try { + const content = await Bookmark.getBookmarkHtmlContent( + bookmark.link!, + bookmark.userId, + ); + return content ? content.substring(0, 1000) : null; + } catch { + return null; + } + })(); + + linkInfo = { + url: bookmark.link.url, + crawlStatus: bookmark.link.crawlStatus ?? "pending", + crawlStatusCode: bookmark.link.crawlStatusCode, + crawledAt: bookmark.link.crawledAt, + hasHtmlContent: !!bookmark.link.htmlContent, + hasContentAsset: !!bookmark.link.contentAssetId, + htmlContentPreview, + }; + } + + // Build text info + let textInfo = null; + if (bookmark.text) { + textInfo = { + hasText: !!bookmark.text.text, + sourceUrl: bookmark.text.sourceUrl, + }; + } + + // Build asset info + let assetInfo = null; + if (bookmark.asset) { + assetInfo = { + assetType: bookmark.asset.assetType, + hasContent: !!bookmark.asset.content, + fileName: bookmark.asset.fileName, + }; + } + + // Build tags + const tags = bookmark.tagsOnBookmarks.map((t) => ({ + id: t.tag.id, + name: t.tag.name, + attachedBy: t.attachedBy, + })); + + // Build assets list with signed URLs (exclude userUploaded) + const assetsWithUrls = bookmark.assets.map((a) => { + // Generate signed token with 10 mins expiry + const expiresAt = Date.now() + 10 * 60 * 1000; // 10 mins + // Exclude userUploaded assets for privacy reasons + const url = !PRIVACY_REDACTED_ASSET_TYPES.has(a.assetType) + ? Asset.getPublicSignedAssetUrl(a.id, bookmark.userId, expiresAt) + : null; + + return { + id: a.id, + assetType: a.assetType, + size: a.size, + url, + }; + }); + + return { + id: bookmark.id, + type: bookmark.type, + source: bookmark.source, + createdAt: bookmark.createdAt, + modifiedAt: bookmark.modifiedAt, + title: bookmark.title, + summary: bookmark.summary, + taggingStatus: bookmark.taggingStatus, + summarizationStatus: bookmark.summarizationStatus, + userId: bookmark.userId, + linkInfo, + textInfo, + assetInfo, + tags, + assets: assetsWithUrls, + }; + } + static async loadMulti( ctx: AuthedContext, input: z.infer<typeof zGetBookmarksRequestSchema>, @@ -641,17 +761,12 @@ export class Bookmark extends BareBookmark { asPublicBookmark(): ZPublicBookmark { const getPublicSignedAssetUrl = (assetId: string) => { - const payload: z.infer<typeof zAssetSignedTokenSchema> = { + // Tokens will expire in 1 hour and will have a grace period of 15mins + return Asset.getPublicSignedAssetUrl( assetId, - userId: this.ctx.user.id, - }; - const signedToken = createSignedToken( - payload, - serverConfig.signingSecret(), - // Tokens will expire in 1 hour and will have a grace period of 15mins - getAlignedExpiry(/* interval */ 3600, /* grace */ 900), + this.bookmark.userId, + getAlignedExpiry(3600, 900), ); - return `${serverConfig.publicApiUrl}/public/assets/${assetId}?token=${signedToken}`; }; const getContent = ( content: ZBookmarkContent, diff --git a/packages/trpc/routers/admin.test.ts b/packages/trpc/routers/admin.test.ts new file mode 100644 index 00000000..2f80d9c0 --- /dev/null +++ b/packages/trpc/routers/admin.test.ts @@ -0,0 +1,265 @@ +import { eq } from "drizzle-orm"; +import { assert, beforeEach, describe, expect, test } from "vitest"; + +import { bookmarkLinks, users } from "@karakeep/db/schema"; +import { BookmarkTypes } from "@karakeep/shared/types/bookmarks"; + +import type { CustomTestContext } from "../testUtils"; +import { buildTestContext, getApiCaller } from "../testUtils"; + +beforeEach<CustomTestContext>(async (context) => { + const testContext = await buildTestContext(true); + Object.assign(context, testContext); +}); + +describe("Admin Routes", () => { + describe("getBookmarkDebugInfo", () => { + test<CustomTestContext>("admin can access bookmark debug info for link bookmark", async ({ + apiCallers, + db, + }) => { + // Create an admin user + const adminUser = await db + .insert(users) + .values({ + name: "Admin User", + email: "admin@test.com", + role: "admin", + }) + .returning(); + const adminApi = getApiCaller( + db, + adminUser[0].id, + adminUser[0].email, + "admin", + ); + + // Create a bookmark as a regular user + const bookmark = await apiCallers[0].bookmarks.createBookmark({ + url: "https://example.com", + type: BookmarkTypes.LINK, + }); + + // Update the bookmark link with some metadata + await db + .update(bookmarkLinks) + .set({ + crawlStatus: "success", + crawlStatusCode: 200, + crawledAt: new Date(), + htmlContent: "<html><body>Test content</body></html>", + title: "Test Title", + description: "Test Description", + }) + .where(eq(bookmarkLinks.id, bookmark.id)); + + // Admin should be able to access debug info + const debugInfo = await adminApi.admin.getBookmarkDebugInfo({ + bookmarkId: bookmark.id, + }); + + expect(debugInfo.id).toEqual(bookmark.id); + expect(debugInfo.type).toEqual(BookmarkTypes.LINK); + expect(debugInfo.linkInfo).toBeDefined(); + assert(debugInfo.linkInfo); + expect(debugInfo.linkInfo.url).toEqual("https://example.com"); + expect(debugInfo.linkInfo.crawlStatus).toEqual("success"); + expect(debugInfo.linkInfo.crawlStatusCode).toEqual(200); + expect(debugInfo.linkInfo.hasHtmlContent).toEqual(true); + expect(debugInfo.linkInfo.htmlContentPreview).toBeDefined(); + expect(debugInfo.linkInfo.htmlContentPreview).toContain("Test content"); + }); + + test<CustomTestContext>("admin can access bookmark debug info for text bookmark", async ({ + apiCallers, + db, + }) => { + // Create an admin user + const adminUser = await db + .insert(users) + .values({ + name: "Admin User", + email: "admin@test.com", + role: "admin", + }) + .returning(); + const adminApi = getApiCaller( + db, + adminUser[0].id, + adminUser[0].email, + "admin", + ); + + // Create a text bookmark + const bookmark = await apiCallers[0].bookmarks.createBookmark({ + text: "This is a test text bookmark", + type: BookmarkTypes.TEXT, + }); + + // Admin should be able to access debug info + const debugInfo = await adminApi.admin.getBookmarkDebugInfo({ + bookmarkId: bookmark.id, + }); + + expect(debugInfo.id).toEqual(bookmark.id); + expect(debugInfo.type).toEqual(BookmarkTypes.TEXT); + expect(debugInfo.textInfo).toBeDefined(); + assert(debugInfo.textInfo); + expect(debugInfo.textInfo.hasText).toEqual(true); + }); + + test<CustomTestContext>("admin can see bookmark tags in debug info", async ({ + apiCallers, + db, + }) => { + // Create an admin user + const adminUser = await db + .insert(users) + .values({ + name: "Admin User", + email: "admin@test.com", + role: "admin", + }) + .returning(); + const adminApi = getApiCaller( + db, + adminUser[0].id, + adminUser[0].email, + "admin", + ); + + // Create a bookmark with tags + const bookmark = await apiCallers[0].bookmarks.createBookmark({ + url: "https://example.com", + type: BookmarkTypes.LINK, + }); + + // Add tags to the bookmark + await apiCallers[0].bookmarks.updateTags({ + bookmarkId: bookmark.id, + attach: [{ tagName: "test-tag-1" }, { tagName: "test-tag-2" }], + detach: [], + }); + + // Admin should be able to see tags in debug info + const debugInfo = await adminApi.admin.getBookmarkDebugInfo({ + bookmarkId: bookmark.id, + }); + + expect(debugInfo.tags).toHaveLength(2); + expect(debugInfo.tags.map((t) => t.name).sort()).toEqual([ + "test-tag-1", + "test-tag-2", + ]); + expect(debugInfo.tags[0].attachedBy).toEqual("human"); + }); + + test<CustomTestContext>("non-admin user cannot access bookmark debug info", async ({ + apiCallers, + }) => { + // Create a bookmark + const bookmark = await apiCallers[0].bookmarks.createBookmark({ + url: "https://example.com", + type: BookmarkTypes.LINK, + }); + + // Non-admin user should not be able to access debug info + // The admin procedure itself will throw FORBIDDEN + await expect(() => + apiCallers[0].admin.getBookmarkDebugInfo({ bookmarkId: bookmark.id }), + ).rejects.toThrow(/FORBIDDEN/); + }); + + test<CustomTestContext>("debug info includes asset URLs with signed tokens", async ({ + apiCallers, + db, + }) => { + // Create an admin user + const adminUser = await db + .insert(users) + .values({ + name: "Admin User", + email: "admin@test.com", + role: "admin", + }) + .returning(); + const adminApi = getApiCaller( + db, + adminUser[0].id, + adminUser[0].email, + "admin", + ); + + // Create a bookmark + const bookmark = await apiCallers[0].bookmarks.createBookmark({ + url: "https://example.com", + type: BookmarkTypes.LINK, + }); + + // Get debug info + const debugInfo = await adminApi.admin.getBookmarkDebugInfo({ + bookmarkId: bookmark.id, + }); + + // Check that assets array is present + expect(debugInfo.assets).toBeDefined(); + expect(Array.isArray(debugInfo.assets)).toBe(true); + + // If there are assets, check that they have signed URLs + if (debugInfo.assets.length > 0) { + const asset = debugInfo.assets[0]; + expect(asset.url).toBeDefined(); + expect(asset.url).toContain("/api/public/assets/"); + expect(asset.url).toContain("token="); + } + }); + + test<CustomTestContext>("debug info truncates HTML content preview", async ({ + apiCallers, + db, + }) => { + // Create an admin user + const adminUser = await db + .insert(users) + .values({ + name: "Admin User", + email: "admin@test.com", + role: "admin", + }) + .returning(); + const adminApi = getApiCaller( + db, + adminUser[0].id, + adminUser[0].email, + "admin", + ); + + // Create a bookmark + const bookmark = await apiCallers[0].bookmarks.createBookmark({ + url: "https://example.com", + type: BookmarkTypes.LINK, + }); + + // Create a large HTML content + const largeContent = "<html><body>" + "x".repeat(2000) + "</body></html>"; + await db + .update(bookmarkLinks) + .set({ + htmlContent: largeContent, + }) + .where(eq(bookmarkLinks.id, bookmark.id)); + + // Get debug info + const debugInfo = await adminApi.admin.getBookmarkDebugInfo({ + bookmarkId: bookmark.id, + }); + + // Check that HTML preview is truncated to 1000 characters + assert(debugInfo.linkInfo); + expect(debugInfo.linkInfo.htmlContentPreview).toBeDefined(); + expect(debugInfo.linkInfo.htmlContentPreview!.length).toBeLessThanOrEqual( + 1000, + ); + }); + }); +}); diff --git a/packages/trpc/routers/admin.ts b/packages/trpc/routers/admin.ts index 44a51cad..3236529d 100644 --- a/packages/trpc/routers/admin.ts +++ b/packages/trpc/routers/admin.ts @@ -17,6 +17,7 @@ import { zAdminMaintenanceTaskSchema, } from "@karakeep/shared-server"; import serverConfig from "@karakeep/shared/config"; +import logger from "@karakeep/shared/logger"; import { PluginManager, PluginType } from "@karakeep/shared/plugins"; import { getSearchClient } from "@karakeep/shared/search"; import { @@ -24,9 +25,11 @@ import { updateUserSchema, zAdminCreateUserSchema, } from "@karakeep/shared/types/admin"; +import { BookmarkTypes } from "@karakeep/shared/types/bookmarks"; import { generatePasswordSalt, hashPassword } from "../auth"; import { adminProcedure, router } from "../index"; +import { Bookmark } from "../models/bookmarks"; import { User } from "../models/users"; export const adminAppRouter = router({ @@ -558,4 +561,173 @@ export const adminAppRouter = router({ queue: queueStatus, }; }), + getBookmarkDebugInfo: adminProcedure + .input(z.object({ bookmarkId: z.string() })) + .output( + z.object({ + id: z.string(), + type: z.enum([ + BookmarkTypes.LINK, + BookmarkTypes.TEXT, + BookmarkTypes.ASSET, + ]), + source: z + .enum([ + "api", + "web", + "extension", + "cli", + "mobile", + "singlefile", + "rss", + "import", + ]) + .nullable(), + createdAt: z.date(), + modifiedAt: z.date().nullable(), + title: z.string().nullable(), + summary: z.string().nullable(), + taggingStatus: z.enum(["pending", "failure", "success"]).nullable(), + summarizationStatus: z + .enum(["pending", "failure", "success"]) + .nullable(), + userId: z.string(), + linkInfo: z + .object({ + url: z.string(), + crawlStatus: z.enum(["pending", "failure", "success"]), + crawlStatusCode: z.number().nullable(), + crawledAt: z.date().nullable(), + hasHtmlContent: z.boolean(), + hasContentAsset: z.boolean(), + htmlContentPreview: z.string().nullable(), + }) + .nullable(), + textInfo: z + .object({ + hasText: z.boolean(), + sourceUrl: z.string().nullable(), + }) + .nullable(), + assetInfo: z + .object({ + assetType: z.enum(["image", "pdf"]), + hasContent: z.boolean(), + fileName: z.string().nullable(), + }) + .nullable(), + tags: z.array( + z.object({ + id: z.string(), + name: z.string(), + attachedBy: z.enum(["ai", "human"]), + }), + ), + assets: z.array( + z.object({ + id: z.string(), + assetType: z.string(), + size: z.number(), + url: z.string().nullable(), + }), + ), + }), + ) + .query(async ({ input, ctx }) => { + logger.info( + `[admin] Admin ${ctx.user.id} accessed debug info for bookmark ${input.bookmarkId}`, + ); + + return await Bookmark.buildDebugInfo(ctx, input.bookmarkId); + }), + adminRecrawlBookmark: adminProcedure + .input(z.object({ bookmarkId: z.string() })) + .mutation(async ({ input, ctx }) => { + // Verify bookmark exists and is a link + const bookmark = await ctx.db.query.bookmarks.findFirst({ + where: eq(bookmarks.id, input.bookmarkId), + }); + + if (!bookmark) { + throw new TRPCError({ + code: "NOT_FOUND", + message: "Bookmark not found", + }); + } + + if (bookmark.type !== BookmarkTypes.LINK) { + throw new TRPCError({ + code: "BAD_REQUEST", + message: "Only link bookmarks can be recrawled", + }); + } + + await LinkCrawlerQueue.enqueue({ + bookmarkId: input.bookmarkId, + }); + }), + adminReindexBookmark: adminProcedure + .input(z.object({ bookmarkId: z.string() })) + .mutation(async ({ input, ctx }) => { + // Verify bookmark exists + const bookmark = await ctx.db.query.bookmarks.findFirst({ + where: eq(bookmarks.id, input.bookmarkId), + }); + + if (!bookmark) { + throw new TRPCError({ + code: "NOT_FOUND", + message: "Bookmark not found", + }); + } + + await triggerSearchReindex(input.bookmarkId); + }), + adminRetagBookmark: adminProcedure + .input(z.object({ bookmarkId: z.string() })) + .mutation(async ({ input, ctx }) => { + // Verify bookmark exists + const bookmark = await ctx.db.query.bookmarks.findFirst({ + where: eq(bookmarks.id, input.bookmarkId), + }); + + if (!bookmark) { + throw new TRPCError({ + code: "NOT_FOUND", + message: "Bookmark not found", + }); + } + + await OpenAIQueue.enqueue({ + bookmarkId: input.bookmarkId, + type: "tag", + }); + }), + adminResummarizeBookmark: adminProcedure + .input(z.object({ bookmarkId: z.string() })) + .mutation(async ({ input, ctx }) => { + // Verify bookmark exists and is a link + const bookmark = await ctx.db.query.bookmarks.findFirst({ + where: eq(bookmarks.id, input.bookmarkId), + }); + + if (!bookmark) { + throw new TRPCError({ + code: "NOT_FOUND", + message: "Bookmark not found", + }); + } + + if (bookmark.type !== BookmarkTypes.LINK) { + throw new TRPCError({ + code: "BAD_REQUEST", + message: "Only link bookmarks can be summarized", + }); + } + + await OpenAIQueue.enqueue({ + bookmarkId: input.bookmarkId, + type: "summarize", + }); + }), }); |
