diff options
Diffstat (limited to 'packages/shared')
26 files changed, 1705 insertions, 388 deletions
diff --git a/packages/shared/config.ts b/packages/shared/config.ts index 60beae1e..cfcf1532 100644 --- a/packages/shared/config.ts +++ b/packages/shared/config.ts @@ -58,6 +58,8 @@ const allEnv = z.object({ TURNSTILE_SECRET_KEY: z.string().optional(), OPENAI_API_KEY: z.string().optional(), OPENAI_BASE_URL: z.string().url().optional(), + OPENAI_PROXY_URL: z.string().url().optional(), + OPENAI_SERVICE_TIER: z.enum(["auto", "default", "flex"]).optional(), OLLAMA_BASE_URL: z.string().url().optional(), OLLAMA_KEEP_ALIVE: z.string().optional(), INFERENCE_JOB_TIMEOUT_SEC: z.coerce.number().default(30), @@ -80,6 +82,7 @@ const allEnv = z.object({ .default("eng") .transform((val) => val.split(",")), OCR_CONFIDENCE_THRESHOLD: z.coerce.number().default(50), + OCR_USE_LLM: stringBool("false"), CRAWLER_HEADLESS_BROWSER: stringBool("true"), BROWSER_WEB_URL: z.string().optional(), BROWSER_WEBSOCKET_URL: z.string().optional(), @@ -93,10 +96,12 @@ const allEnv = z.object({ SEARCH_JOB_TIMEOUT_SEC: z.coerce.number().default(30), WEBHOOK_NUM_WORKERS: z.coerce.number().default(1), ASSET_PREPROCESSING_NUM_WORKERS: z.coerce.number().default(1), + ASSET_PREPROCESSING_JOB_TIMEOUT_SEC: z.coerce.number().default(60), RULE_ENGINE_NUM_WORKERS: z.coerce.number().default(1), CRAWLER_DOWNLOAD_BANNER_IMAGE: stringBool("true"), CRAWLER_STORE_SCREENSHOT: stringBool("true"), CRAWLER_FULL_PAGE_SCREENSHOT: stringBool("false"), + CRAWLER_STORE_PDF: stringBool("false"), CRAWLER_FULL_PAGE_ARCHIVE: stringBool("false"), CRAWLER_VIDEO_DOWNLOAD: stringBool("false"), CRAWLER_VIDEO_DOWNLOAD_MAX_SIZE: z.coerce.number().default(50), @@ -122,8 +127,11 @@ const allEnv = z.object({ INFERENCE_LANG: z.string().default("english"), WEBHOOK_TIMEOUT_SEC: z.coerce.number().default(5), WEBHOOK_RETRY_TIMES: z.coerce.number().int().min(0).default(3), + MAX_RSS_FEEDS_PER_USER: z.coerce.number().default(1000), + MAX_WEBHOOKS_PER_USER: z.coerce.number().default(100), // Build only flag SERVER_VERSION: z.string().optional(), + CHANGELOG_VERSION: z.string().optional(), DISABLE_NEW_RELEASE_CHECK: stringBool("false"), // A flag to detect if the user is running in the old separete containers setup @@ -205,6 +213,12 @@ const allEnv = z.object({ // Database configuration DB_WAL_MODE: stringBool("false"), + + // OpenTelemetry tracing configuration + OTEL_TRACING_ENABLED: stringBool("false"), + OTEL_EXPORTER_OTLP_ENDPOINT: z.string().url().optional(), + OTEL_SERVICE_NAME: z.string().default("karakeep"), + OTEL_SAMPLE_RATE: z.coerce.number().min(0).max(1).default(1.0), }); const serverConfigSchema = allEnv.transform((val, ctx) => { @@ -264,6 +278,8 @@ const serverConfigSchema = allEnv.transform((val, ctx) => { fetchTimeoutSec: val.INFERENCE_FETCH_TIMEOUT_SEC, openAIApiKey: val.OPENAI_API_KEY, openAIBaseUrl: val.OPENAI_BASE_URL, + openAIProxyUrl: val.OPENAI_PROXY_URL, + openAIServiceTier: val.OPENAI_SERVICE_TIER, ollamaBaseUrl: val.OLLAMA_BASE_URL, ollamaKeepAlive: val.OLLAMA_KEEP_ALIVE, textModel: val.INFERENCE_TEXT_MODEL, @@ -296,6 +312,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => { downloadBannerImage: val.CRAWLER_DOWNLOAD_BANNER_IMAGE, storeScreenshot: val.CRAWLER_STORE_SCREENSHOT, fullPageScreenshot: val.CRAWLER_FULL_PAGE_SCREENSHOT, + storePdf: val.CRAWLER_STORE_PDF, fullPageArchive: val.CRAWLER_FULL_PAGE_ARCHIVE, downloadVideo: val.CRAWLER_VIDEO_DOWNLOAD, maxVideoDownloadSize: val.CRAWLER_VIDEO_DOWNLOAD_MAX_SIZE, @@ -321,6 +338,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => { langs: val.OCR_LANGS, cacheDir: val.OCR_CACHE_DIR, confidenceThreshold: val.OCR_CONFIDENCE_THRESHOLD, + useLLM: val.OCR_USE_LLM, }, search: { numWorkers: val.SEARCH_NUM_WORKERS, @@ -338,12 +356,17 @@ const serverConfigSchema = allEnv.transform((val, ctx) => { assetsDir: val.ASSETS_DIR ?? path.join(val.DATA_DIR, "assets"), maxAssetSizeMb: val.MAX_ASSET_SIZE_MB, serverVersion: val.SERVER_VERSION, + changelogVersion: val.CHANGELOG_VERSION, disableNewReleaseCheck: val.DISABLE_NEW_RELEASE_CHECK, usingLegacySeparateContainers: val.USING_LEGACY_SEPARATE_CONTAINERS, webhook: { timeoutSec: val.WEBHOOK_TIMEOUT_SEC, retryTimes: val.WEBHOOK_RETRY_TIMES, numWorkers: val.WEBHOOK_NUM_WORKERS, + maxWebhooksPerUser: val.MAX_WEBHOOKS_PER_USER, + }, + feeds: { + maxRssFeedsPerUser: val.MAX_RSS_FEEDS_PER_USER, }, proxy: { httpProxy: val.CRAWLER_HTTP_PROXY, @@ -353,6 +376,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => { allowedInternalHostnames: val.CRAWLER_ALLOWED_INTERNAL_HOSTNAMES, assetPreprocessing: { numWorkers: val.ASSET_PREPROCESSING_NUM_WORKERS, + jobTimeoutSec: val.ASSET_PREPROCESSING_JOB_TIMEOUT_SEC, }, ruleEngine: { numWorkers: val.RULE_ENGINE_NUM_WORKERS, @@ -399,6 +423,12 @@ const serverConfigSchema = allEnv.transform((val, ctx) => { database: { walMode: val.DB_WAL_MODE, }, + tracing: { + enabled: val.OTEL_TRACING_ENABLED, + otlpEndpoint: val.OTEL_EXPORTER_OTLP_ENDPOINT, + serviceName: val.OTEL_SERVICE_NAME, + sampleRate: val.OTEL_SAMPLE_RATE, + }, }; if (obj.auth.emailVerificationRequired && !obj.email.smtp) { ctx.addIssue({ @@ -441,6 +471,8 @@ export const clientConfig = { inference: { isConfigured: serverConfig.inference.isConfigured, inferredTagLang: serverConfig.inference.inferredTagLang, + enableAutoTagging: serverConfig.inference.enableAutoTagging, + enableAutoSummarization: serverConfig.inference.enableAutoSummarization, }, serverVersion: serverConfig.serverVersion, disableNewReleaseCheck: serverConfig.disableNewReleaseCheck, diff --git a/packages/shared/import-export/importer.test.ts b/packages/shared/import-export/importer.test.ts index 48cd1204..f097f8d5 100644 --- a/packages/shared/import-export/importer.test.ts +++ b/packages/shared/import-export/importer.test.ts @@ -1,13 +1,14 @@ import { describe, expect, it, vi } from "vitest"; -import { importBookmarksFromFile, ParsedBookmark } from "."; +import type { StagedBookmark } from "."; +import { importBookmarksFromFile } from "."; const fakeFile = { text: vi.fn().mockResolvedValue("fake file content"), } as unknown as File; describe("importBookmarksFromFile", () => { - it("creates root list, folders and imports bookmarks with progress", async () => { + it("creates root list, folders and stages bookmarks with progress", async () => { const parsers = { pocket: vi.fn().mockReturnValue([ { @@ -61,32 +62,23 @@ describe("importBookmarksFromFile", () => { }, ); - const createdBookmarks: ParsedBookmark[] = []; - const addedToLists: { bookmarkId: string; listIds: string[] }[] = []; - const updatedTags: { bookmarkId: string; tags: string[] }[] = []; - - const createBookmark = vi.fn(async (bookmark: ParsedBookmark) => { - createdBookmarks.push(bookmark); - return { - id: `bookmark-${createdBookmarks.length}`, - alreadyExists: false, - }; - }); - - const addBookmarkToLists = vi.fn( - async (input: { bookmarkId: string; listIds: string[] }) => { - addedToLists.push(input); + const stagedBookmarks: StagedBookmark[] = []; + const stageImportedBookmarks = vi.fn( + async (input: { + importSessionId: string; + bookmarks: StagedBookmark[]; + }) => { + stagedBookmarks.push(...input.bookmarks); }, ); - const updateBookmarkTags = vi.fn( - async (input: { bookmarkId: string; tags: string[] }) => { - updatedTags.push(input); - }, + const finalizeImportStaging = vi.fn(); + const createImportSession = vi.fn( + async (_input: { name: string; rootListId: string }) => ({ + id: "session-1", + }), ); - const createImportSession = vi.fn(async () => ({ id: "session-1" })); - const progress: number[] = []; const res = await importBookmarksFromFile( { @@ -95,9 +87,8 @@ describe("importBookmarksFromFile", () => { rootListName: "Imported", deps: { createList, - createBookmark, - addBookmarkToLists, - updateBookmarkTags, + stageImportedBookmarks, + finalizeImportStaging, createImportSession, }, onProgress: (d, t) => progress.push(d / t), @@ -106,12 +97,14 @@ describe("importBookmarksFromFile", () => { ); expect(res.rootListId).toBe("Imported"); + expect(res.importSessionId).toBe("session-1"); expect(res.counts).toEqual({ - successes: 5, + successes: 0, failures: 0, alreadyExisted: 0, total: 5, // Using custom parser, no deduplication }); + // Root + all unique folders from paths expect(createdLists).toEqual([ { name: "Imported", icon: "ā¬ļø" }, @@ -122,38 +115,43 @@ describe("importBookmarksFromFile", () => { { name: "Tech", parentId: "Imported/Reading", icon: "š" }, { name: "Duplicates", parentId: "Imported/Development", icon: "š" }, ]); - // Verify we have 5 created bookmarks (no deduplication with custom parser) - expect(createdBookmarks).toHaveLength(5); - // Verify GitHub bookmark exists (will be two separate bookmarks since no deduplication) - const githubBookmarks = createdBookmarks.filter( - (bookmark) => - bookmark.content?.type === "link" && - bookmark.content.url === "https://github.com/example/repo", - ); - expect(githubBookmarks).toHaveLength(2); - // Verify text bookmark exists - const textBookmark = createdBookmarks.find( - (bookmark) => bookmark.content?.type === "text", + + // Verify 5 bookmarks were staged (in 1 batch since < 50) + expect(stagedBookmarks).toHaveLength(5); + expect(stageImportedBookmarks).toHaveBeenCalledTimes(1); + + // Verify GitHub link bookmark was staged correctly + const githubBookmark = stagedBookmarks.find( + (b) => b.url === "https://github.com/example/repo" && b.type === "link", ); + expect(githubBookmark).toBeDefined(); + if (!githubBookmark) { + throw new Error("Expected GitHub bookmark to be staged"); + } + expect(githubBookmark.title).toBe("GitHub Repository"); + expect(githubBookmark.tags).toEqual(["dev", "github"]); + expect(githubBookmark.listIds).toEqual(["Imported/Development/Projects"]); + + // Verify text bookmark was staged correctly + const textBookmark = stagedBookmarks.find((b) => b.type === "text"); expect(textBookmark).toBeDefined(); - expect(textBookmark!.archived).toBe(true); - expect(textBookmark!.notes).toBe("Additional context"); - // Verify bookmark with no path goes to root - const noCategoryBookmark = createdBookmarks.find( - (bookmark) => - bookmark.content?.type === "link" && - bookmark.content.url === "https://example.com/misc", + if (!textBookmark) { + throw new Error("Expected text bookmark to be staged"); + } + expect(textBookmark.content).toBe("Important notes about the project"); + expect(textBookmark.note).toBe("Additional context"); + expect(textBookmark.listIds).toEqual(["Imported/Personal"]); + + // Verify bookmark with empty paths gets root list ID + const noCategoryBookmark = stagedBookmarks.find( + (b) => b.url === "https://example.com/misc", ); expect(noCategoryBookmark).toBeDefined(); - // Find the corresponding list assignment for this bookmark - const noCategoryBookmarkId = `bookmark-${createdBookmarks.indexOf(noCategoryBookmark!) + 1}`; - const listAssignment = addedToLists.find( - (a) => a.bookmarkId === noCategoryBookmarkId, - ); - expect(listAssignment!.listIds).toEqual(["Imported"]); + expect(noCategoryBookmark!.listIds).toEqual(["Imported"]); + + // Verify finalizeImportStaging was called + expect(finalizeImportStaging).toHaveBeenCalledWith("session-1"); - // Verify that tags were updated for bookmarks that have tags - expect(updatedTags.length).toBeGreaterThan(0); expect(progress).toContain(0); expect(progress.at(-1)).toBe(1); }); @@ -167,9 +165,8 @@ describe("importBookmarksFromFile", () => { rootListName: "Imported", deps: { createList: vi.fn(), - createBookmark: vi.fn(), - addBookmarkToLists: vi.fn(), - updateBookmarkTags: vi.fn(), + stageImportedBookmarks: vi.fn(), + finalizeImportStaging: vi.fn(), createImportSession: vi.fn(async () => ({ id: "session-1" })), }, }, @@ -182,29 +179,29 @@ describe("importBookmarksFromFile", () => { }); }); - it("continues import when individual bookmarks fail", async () => { + it("stages all bookmarks successfully", async () => { const parsers = { pocket: vi.fn().mockReturnValue([ { - title: "Success Bookmark 1", - content: { type: "link", url: "https://example.com/success1" }, - tags: ["success"], + title: "Bookmark 1", + content: { type: "link", url: "https://example.com/1" }, + tags: ["tag1"], addDate: 100, - paths: [["Success"]], + paths: [["Category1"]], }, { - title: "Failure Bookmark", - content: { type: "link", url: "https://example.com/failure" }, - tags: ["failure"], + title: "Bookmark 2", + content: { type: "link", url: "https://example.com/2" }, + tags: ["tag2"], addDate: 200, - paths: [["Failure"]], + paths: [["Category2"]], }, { - title: "Success Bookmark 2", - content: { type: "link", url: "https://example.com/success2" }, - tags: ["success"], + title: "Bookmark 3", + content: { type: "link", url: "https://example.com/3" }, + tags: ["tag3"], addDate: 300, - paths: [["Success"]], + paths: [["Category1"]], }, ]), }; @@ -220,37 +217,23 @@ describe("importBookmarksFromFile", () => { }, ); - const createdBookmarks: ParsedBookmark[] = []; - const addedToLists: { bookmarkId: string; listIds: string[] }[] = []; - const updatedTags: { bookmarkId: string; tags: string[] }[] = []; - - const createBookmark = vi.fn(async (bookmark: ParsedBookmark) => { - // Simulate failure for the "Failure Bookmark" - if (bookmark.title === "Failure Bookmark") { - throw new Error("Simulated bookmark creation failure"); - } - - createdBookmarks.push(bookmark); - return { - id: `bookmark-${createdBookmarks.length}`, - alreadyExists: false, - }; - }); - - const addBookmarkToLists = vi.fn( - async (input: { bookmarkId: string; listIds: string[] }) => { - addedToLists.push(input); + const stagedBookmarks: StagedBookmark[] = []; + const stageImportedBookmarks = vi.fn( + async (input: { + importSessionId: string; + bookmarks: StagedBookmark[]; + }) => { + stagedBookmarks.push(...input.bookmarks); }, ); - const updateBookmarkTags = vi.fn( - async (input: { bookmarkId: string; tags: string[] }) => { - updatedTags.push(input); - }, + const finalizeImportStaging = vi.fn(); + const createImportSession = vi.fn( + async (_input: { name: string; rootListId: string }) => ({ + id: "session-1", + }), ); - const createImportSession = vi.fn(async () => ({ id: "session-1" })); - const progress: number[] = []; const res = await importBookmarksFromFile( { @@ -259,9 +242,8 @@ describe("importBookmarksFromFile", () => { rootListName: "Imported", deps: { createList, - createBookmark, - addBookmarkToLists, - updateBookmarkTags, + stageImportedBookmarks, + finalizeImportStaging, createImportSession, }, onProgress: (d, t) => progress.push(d / t), @@ -269,63 +251,57 @@ describe("importBookmarksFromFile", () => { { parsers }, ); - // Should still create the root list expect(res.rootListId).toBe("Imported"); - - // Should track both successes and failures + expect(res.importSessionId).toBe("session-1"); expect(res.counts).toEqual({ - successes: 2, // Two successful bookmarks - failures: 1, // One failed bookmark + successes: 0, + failures: 0, alreadyExisted: 0, total: 3, }); - // Should create folders for all bookmarks (including failed ones) + // Should create folders for all bookmarks expect(createdLists).toEqual([ { name: "Imported", icon: "ā¬ļø" }, - { name: "Success", parentId: "Imported", icon: "š" }, - { name: "Failure", parentId: "Imported", icon: "š" }, + { name: "Category1", parentId: "Imported", icon: "š" }, + { name: "Category2", parentId: "Imported", icon: "š" }, ]); - // Only successful bookmarks should be created - expect(createdBookmarks).toHaveLength(2); - expect(createdBookmarks.map((b) => b.title)).toEqual([ - "Success Bookmark 1", - "Success Bookmark 2", - ]); + // All bookmarks should be staged (in 1 batch since < 50) + expect(stagedBookmarks).toHaveLength(3); + expect(stageImportedBookmarks).toHaveBeenCalledTimes(1); - // Only successful bookmarks should be added to lists and have tags updated - expect(addedToLists).toHaveLength(2); - expect(updatedTags).toHaveLength(2); + // Verify finalizeImportStaging was called + expect(finalizeImportStaging).toHaveBeenCalledWith("session-1"); - // Progress should complete even with failures + // Progress should complete expect(progress).toContain(0); expect(progress.at(-1)).toBe(1); }); - it("handles failures in different stages of bookmark import", async () => { + it("stages bookmarks with different paths", async () => { const parsers = { pocket: vi.fn().mockReturnValue([ { - title: "Success Bookmark", - content: { type: "link", url: "https://example.com/success" }, - tags: ["success"], + title: "Bookmark 1", + content: { type: "link", url: "https://example.com/1" }, + tags: ["tag1"], addDate: 100, - paths: [["Success"]], + paths: [["Path1"]], }, { - title: "Fail at List Assignment", - content: { type: "link", url: "https://example.com/fail-list" }, - tags: ["fail"], + title: "Bookmark 2", + content: { type: "link", url: "https://example.com/2" }, + tags: ["tag2"], addDate: 200, - paths: [["Failure"]], + paths: [["Path2"]], }, { - title: "Fail at Tag Update", - content: { type: "link", url: "https://example.com/fail-tag" }, - tags: ["fail-tag"], + title: "Bookmark 3", + content: { type: "link", url: "https://example.com/3" }, + tags: ["tag3"], addDate: 300, - paths: [["Failure"]], + paths: [["Path2"]], }, ]), }; @@ -338,31 +314,23 @@ describe("importBookmarksFromFile", () => { }, ); - let bookmarkIdCounter = 1; - const createBookmark = vi.fn(async () => { - return { id: `bookmark-${bookmarkIdCounter++}`, alreadyExists: false }; - }); - - const addBookmarkToLists = vi.fn( - async (input: { bookmarkId: string; listIds: string[] }) => { - // Simulate failure for specific bookmark - if (input.bookmarkId === "bookmark-2") { - throw new Error("Failed to add bookmark to lists"); - } + const stagedBookmarks: StagedBookmark[] = []; + const stageImportedBookmarks = vi.fn( + async (input: { + importSessionId: string; + bookmarks: StagedBookmark[]; + }) => { + stagedBookmarks.push(...input.bookmarks); }, ); - const updateBookmarkTags = vi.fn( - async (input: { bookmarkId: string; tags: string[] }) => { - // Simulate failure for specific bookmark - if (input.bookmarkId === "bookmark-3") { - throw new Error("Failed to update bookmark tags"); - } - }, + const finalizeImportStaging = vi.fn(); + const createImportSession = vi.fn( + async (_input: { name: string; rootListId: string }) => ({ + id: "session-1", + }), ); - const createImportSession = vi.fn(async () => ({ id: "session-1" })); - const progress: number[] = []; const res = await importBookmarksFromFile( { @@ -371,9 +339,8 @@ describe("importBookmarksFromFile", () => { rootListName: "Imported", deps: { createList, - createBookmark, - addBookmarkToLists, - updateBookmarkTags, + stageImportedBookmarks, + finalizeImportStaging, createImportSession, }, onProgress: (d, t) => progress.push(d / t), @@ -383,23 +350,110 @@ describe("importBookmarksFromFile", () => { expect(res.rootListId).toBe("Imported"); expect(res.importSessionId).toBe("session-1"); - - // All bookmarks are created successfully, but 2 fail in post-processing expect(res.counts).toEqual({ - successes: 1, // Only one fully successful bookmark - failures: 2, // Two failed in post-processing steps + successes: 0, + failures: 0, alreadyExisted: 0, total: 3, }); - // All bookmarks should be created (failures happen after bookmark creation) - expect(createBookmark).toHaveBeenCalledTimes(3); + // All bookmarks should be staged (in 1 batch since < 50) + expect(stagedBookmarks).toHaveLength(3); + expect(stageImportedBookmarks).toHaveBeenCalledTimes(1); + + // Verify finalizeImportStaging was called + expect(finalizeImportStaging).toHaveBeenCalledWith("session-1"); + }); + + it("handles HTML bookmarks with empty folder names", async () => { + const htmlContent = `<!DOCTYPE NETSCAPE-Bookmark-file-1> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> +<TITLE>Bookmarks</TITLE> +<H1>Bookmarks</H1> +<DL><p> + <DT><H3 ADD_DATE="1765995928" LAST_MODIFIED="1765995928">Bluetooth Fernbedienung</H3> + <DL><p> + <DT><H3 ADD_DATE="1765995928" LAST_MODIFIED="0"></H3> + <DL><p> + <DT><A HREF="https://www.example.com/product.html" ADD_DATE="1593444456">Example Product</A> + </DL><p> + </DL><p> +</DL><p>`; + + const mockFile = { + text: vi.fn().mockResolvedValue(htmlContent), + } as unknown as File; + + const createdLists: { name: string; icon: string; parentId?: string }[] = + []; + const createList = vi.fn( + async (input: { name: string; icon: string; parentId?: string }) => { + createdLists.push(input); + return { + id: `${input.parentId ? input.parentId + "/" : ""}${input.name}`, + }; + }, + ); + + const stagedBookmarks: StagedBookmark[] = []; + const stageImportedBookmarks = vi.fn( + async (input: { + importSessionId: string; + bookmarks: StagedBookmark[]; + }) => { + stagedBookmarks.push(...input.bookmarks); + }, + ); + + const finalizeImportStaging = vi.fn(); + const createImportSession = vi.fn( + async (_input: { name: string; rootListId: string }) => ({ + id: "session-1", + }), + ); + + const res = await importBookmarksFromFile({ + file: mockFile, + source: "html", + rootListName: "HTML Import", + deps: { + createList, + stageImportedBookmarks, + finalizeImportStaging, + createImportSession, + }, + }); + + expect(res.counts).toEqual({ + successes: 0, + failures: 0, + alreadyExisted: 0, + total: 1, + }); + + // Verify that the empty folder name was replaced with "Unnamed" + expect(createdLists).toEqual([ + { name: "HTML Import", icon: "ā¬ļø" }, + { name: "Bluetooth Fernbedienung", parentId: "HTML Import", icon: "š" }, + { + name: "Unnamed", + parentId: "HTML Import/Bluetooth Fernbedienung", + icon: "š", + }, + ]); - // addBookmarkToLists should be called 3 times (but one fails) - expect(addBookmarkToLists).toHaveBeenCalledTimes(3); + // Verify the bookmark was staged with correct listIds + expect(stagedBookmarks).toHaveLength(1); + expect(stagedBookmarks[0]).toMatchObject({ + title: "Example Product", + url: "https://www.example.com/product.html", + type: "link", + tags: [], + listIds: ["HTML Import/Bluetooth Fernbedienung/Unnamed"], + }); - // updateBookmarkTags should be called 2 times (once fails at list assignment, one fails at tag update) - expect(updateBookmarkTags).toHaveBeenCalledTimes(2); + // Verify finalizeImportStaging was called + expect(finalizeImportStaging).toHaveBeenCalledWith("session-1"); }); it("parses mymind CSV export correctly", async () => { @@ -413,14 +467,22 @@ describe("importBookmarksFromFile", () => { text: vi.fn().mockResolvedValue(mymindCsv), } as unknown as File; - const createdBookmarks: ParsedBookmark[] = []; - const createBookmark = vi.fn(async (bookmark: ParsedBookmark) => { - createdBookmarks.push(bookmark); - return { - id: `bookmark-${createdBookmarks.length}`, - alreadyExists: false, - }; - }); + const stagedBookmarks: StagedBookmark[] = []; + const stageImportedBookmarks = vi.fn( + async (input: { + importSessionId: string; + bookmarks: StagedBookmark[]; + }) => { + stagedBookmarks.push(...input.bookmarks); + }, + ); + + const finalizeImportStaging = vi.fn(); + const createImportSession = vi.fn( + async (_input: { name: string; rootListId: string }) => ({ + id: "session-1", + }), + ); const res = await importBookmarksFromFile({ file: mockFile, @@ -432,52 +494,54 @@ describe("importBookmarksFromFile", () => { id: `${input.parentId ? input.parentId + "/" : ""}${input.name}`, }), ), - createBookmark, - addBookmarkToLists: vi.fn(), - updateBookmarkTags: vi.fn(), - createImportSession: vi.fn(async () => ({ id: "session-1" })), + stageImportedBookmarks, + finalizeImportStaging, + createImportSession, }, }); expect(res.counts).toEqual({ - successes: 3, + successes: 0, failures: 0, alreadyExisted: 0, total: 3, }); - // Verify first bookmark (WebPage with URL) - expect(createdBookmarks[0]).toMatchObject({ + // Verify 3 bookmarks were staged + expect(stagedBookmarks).toHaveLength(3); + + // Verify first bookmark (WebPage with URL) - mymind has no paths, so root list + expect(stagedBookmarks[0]).toMatchObject({ title: "mymind", - content: { - type: "link", - url: "https://access.mymind.com/everything", - }, + url: "https://access.mymind.com/everything", + type: "link", tags: ["Wellness", "Self-Improvement", "Psychology"], + listIds: ["mymind Import"], }); - expect(createdBookmarks[0].addDate).toBeCloseTo( - new Date("2024-12-04T23:02:10Z").getTime() / 1000, + expect(stagedBookmarks[0].sourceAddedAt).toEqual( + new Date("2024-12-04T23:02:10Z"), ); // Verify second bookmark (WebPage with note) - expect(createdBookmarks[1]).toMatchObject({ + expect(stagedBookmarks[1]).toMatchObject({ title: "Movies / TV / Anime", - content: { - type: "link", - url: "https://fmhy.pages.dev/videopiracyguide", - }, + url: "https://fmhy.pages.dev/videopiracyguide", + type: "link", tags: ["Tools", "media", "Entertainment"], - notes: "Free Media!", + note: "Free Media!", + listIds: ["mymind Import"], }); // Verify third bookmark (Note with text content) - expect(createdBookmarks[2]).toMatchObject({ + expect(stagedBookmarks[2]).toMatchObject({ title: "", - content: { - type: "text", - text: "⢠Critical Thinking\n⢠Empathy", - }, + content: "⢠Critical Thinking\n⢠Empathy", + type: "text", tags: [], + listIds: ["mymind Import"], }); + + // Verify finalizeImportStaging was called + expect(finalizeImportStaging).toHaveBeenCalledWith("session-1"); }); }); diff --git a/packages/shared/import-export/importer.ts b/packages/shared/import-export/importer.ts index b32c49c1..be24ca73 100644 --- a/packages/shared/import-export/importer.ts +++ b/packages/shared/import-export/importer.ts @@ -1,4 +1,3 @@ -import { limitConcurrency } from "../concurrency"; import { MAX_LIST_NAME_LENGTH } from "../types/lists"; import { ImportSource, ParsedBookmark, parseImportFile } from "./parsers"; @@ -9,28 +8,32 @@ export interface ImportCounts { total: number; } +export interface StagedBookmark { + type: "link" | "text" | "asset"; + url?: string; + title?: string; + content?: string; + note?: string; + tags: string[]; + listIds: string[]; + sourceAddedAt?: Date; +} + export interface ImportDeps { createList: (input: { name: string; icon: string; parentId?: string; }) => Promise<{ id: string }>; - createBookmark: ( - bookmark: ParsedBookmark, - sessionId: string, - ) => Promise<{ id: string; alreadyExists?: boolean }>; - addBookmarkToLists: (input: { - bookmarkId: string; - listIds: string[]; - }) => Promise<void>; - updateBookmarkTags: (input: { - bookmarkId: string; - tags: string[]; + stageImportedBookmarks: (input: { + importSessionId: string; + bookmarks: StagedBookmark[]; }) => Promise<void>; createImportSession: (input: { name: string; rootListId: string; }) => Promise<{ id: string }>; + finalizeImportStaging: (sessionId: string) => Promise<void>; } export interface ImportOptions { @@ -62,7 +65,7 @@ export async function importBookmarksFromFile( }, options: ImportOptions = {}, ): Promise<ImportResult> { - const { concurrencyLimit = 20, parsers } = options; + const { parsers } = options; const textContent = await file.text(); const parsedBookmarks = parsers?.[source] @@ -120,50 +123,74 @@ export async function importBookmarksFromFile( pathMap[pathKey] = folderList.id; } - let done = 0; - const importPromises = parsedBookmarks.map((bookmark) => async () => { - try { - const listIds = bookmark.paths.map( - (path) => pathMap[path.join(PATH_DELIMITER)] || rootList.id, - ); - if (listIds.length === 0) listIds.push(rootList.id); + // Prepare all bookmarks for staging + const bookmarksToStage: StagedBookmark[] = parsedBookmarks.map((bookmark) => { + // Convert paths to list IDs using pathMap + // If no paths, assign to root list + const listIds = + bookmark.paths.length === 0 + ? [rootList.id] + : bookmark.paths + .map((path) => { + if (path.length === 0) { + return rootList.id; + } + const pathKey = path.join(PATH_DELIMITER); + return pathMap[pathKey] || rootList.id; + }) + .filter((id, index, arr) => arr.indexOf(id) === index); // dedupe - const created = await deps.createBookmark(bookmark, session.id); - await deps.addBookmarkToLists({ bookmarkId: created.id, listIds }); - if (bookmark.tags && bookmark.tags.length > 0) { - await deps.updateBookmarkTags({ - bookmarkId: created.id, - tags: bookmark.tags, - }); - } + // Determine type and extract content appropriately + let type: "link" | "text" | "asset" = "link"; + let url: string | undefined; + let textContent: string | undefined; - return created; - } finally { - done += 1; - onProgress?.(done, parsedBookmarks.length); + if (bookmark.content) { + if (bookmark.content.type === "link") { + type = "link"; + url = bookmark.content.url; + } else if (bookmark.content.type === "text") { + type = "text"; + textContent = bookmark.content.text; + } } - }); - const resultsPromises = limitConcurrency(importPromises, concurrencyLimit); - const results = await Promise.allSettled(resultsPromises); + return { + type, + url, + title: bookmark.title, + content: textContent, + note: bookmark.notes, + tags: bookmark.tags ?? [], + listIds, + sourceAddedAt: bookmark.addDate + ? new Date(bookmark.addDate * 1000) + : undefined, + }; + }); - let successes = 0; - let failures = 0; - let alreadyExisted = 0; + // Stage bookmarks in batches of 50 + const BATCH_SIZE = 50; + let staged = 0; - for (const r of results) { - if (r.status === "fulfilled") { - if (r.value.alreadyExists) alreadyExisted++; - else successes++; - } else { - failures++; - } + for (let i = 0; i < bookmarksToStage.length; i += BATCH_SIZE) { + const batch = bookmarksToStage.slice(i, i + BATCH_SIZE); + await deps.stageImportedBookmarks({ + importSessionId: session.id, + bookmarks: batch, + }); + staged += batch.length; + onProgress?.(staged, parsedBookmarks.length); } + + // Finalize staging - marks session as "pending" for worker pickup + await deps.finalizeImportStaging(session.id); + return { counts: { - successes, - failures, - alreadyExisted, + successes: 0, + failures: 0, + alreadyExisted: 0, total: parsedBookmarks.length, }, rootListId: rootList.id, diff --git a/packages/shared/import-export/parsers.test.ts b/packages/shared/import-export/parsers.test.ts new file mode 100644 index 00000000..18502305 --- /dev/null +++ b/packages/shared/import-export/parsers.test.ts @@ -0,0 +1,301 @@ +import { describe, expect, it } from "vitest"; + +import { parseImportFile } from "./parsers"; + +describe("parseNetscapeBookmarkFile", () => { + it("parses a simple bookmark file with single bookmark", () => { + const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> +<TITLE>Bookmarks</TITLE> +<H1>Bookmarks</H1> +<DL><p> + <DT><A HREF="https://example.com" ADD_DATE="1234567890">Example Site</A> +</DL><p>`; + + const result = parseImportFile("html", html); + + expect(result).toHaveLength(1); + expect(result[0]).toMatchObject({ + title: "Example Site", + content: { + type: "link", + url: "https://example.com", + }, + tags: [], + addDate: 1234567890, + paths: [[]], + }); + }); + + it("parses bookmarks with tags", () => { + const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> +<TITLE>Bookmarks</TITLE> +<H1>Bookmarks</H1> +<DL><p> + <DT><A HREF="https://example.com" ADD_DATE="1234567890" TAGS="tag1,tag2,tag3">Example Site</A> +</DL><p>`; + + const result = parseImportFile("html", html); + + expect(result).toHaveLength(1); + expect(result[0].tags).toEqual(["tag1", "tag2", "tag3"]); + }); + + it("parses bookmarks in nested folders", () => { + const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> +<TITLE>Bookmarks</TITLE> +<H1>Bookmarks</H1> +<DL><p> + <DT><H3 ADD_DATE="1234567890" LAST_MODIFIED="1234567891">Folder1</H3> + <DL><p> + <DT><H3 ADD_DATE="1234567892" LAST_MODIFIED="1234567893">Folder2</H3> + <DL><p> + <DT><A HREF="https://example.com" ADD_DATE="1234567894">Nested Bookmark</A> + </DL><p> + </DL><p> +</DL><p>`; + + const result = parseImportFile("html", html); + + expect(result).toHaveLength(1); + expect(result[0]).toMatchObject({ + title: "Nested Bookmark", + content: { + type: "link", + url: "https://example.com", + }, + paths: [["Folder1", "Folder2"]], + }); + }); + + it("handles empty folder names by replacing with 'Unnamed'", () => { + const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> +<TITLE>Bookmarks</TITLE> +<H1>Bookmarks</H1> +<DL><p> + <DT><H3 ADD_DATE="1234567890" LAST_MODIFIED="1234567891">Named Folder</H3> + <DL><p> + <DT><H3 ADD_DATE="1234567892" LAST_MODIFIED="0"></H3> + <DL><p> + <DT><A HREF="https://example.com" ADD_DATE="1234567894">Bookmark</A> + </DL><p> + </DL><p> +</DL><p>`; + + const result = parseImportFile("html", html); + + expect(result).toHaveLength(1); + expect(result[0].paths).toEqual([["Named Folder", "Unnamed"]]); + }); + + it("parses multiple bookmarks in different folders", () => { + const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> +<TITLE>Bookmarks</TITLE> +<H1>Bookmarks</H1> +<DL><p> + <DT><H3 ADD_DATE="1234567890">Tech</H3> + <DL><p> + <DT><A HREF="https://github.com" ADD_DATE="1234567891">GitHub</A> + <DT><A HREF="https://stackoverflow.com" ADD_DATE="1234567892">Stack Overflow</A> + </DL><p> + <DT><H3 ADD_DATE="1234567893">News</H3> + <DL><p> + <DT><A HREF="https://news.ycombinator.com" ADD_DATE="1234567894">Hacker News</A> + </DL><p> +</DL><p>`; + + const result = parseImportFile("html", html); + + expect(result).toHaveLength(3); + + expect(result[0]).toMatchObject({ + title: "GitHub", + content: { type: "link", url: "https://github.com" }, + paths: [["Tech"]], + }); + + expect(result[1]).toMatchObject({ + title: "Stack Overflow", + content: { type: "link", url: "https://stackoverflow.com" }, + paths: [["Tech"]], + }); + + expect(result[2]).toMatchObject({ + title: "Hacker News", + content: { type: "link", url: "https://news.ycombinator.com" }, + paths: [["News"]], + }); + }); + + it("parses bookmarks at root level (no folders)", () => { + const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> +<TITLE>Bookmarks</TITLE> +<H1>Bookmarks</H1> +<DL><p> + <DT><A HREF="https://example1.com" ADD_DATE="1234567890">Bookmark 1</A> + <DT><A HREF="https://example2.com" ADD_DATE="1234567891">Bookmark 2</A> +</DL><p>`; + + const result = parseImportFile("html", html); + + expect(result).toHaveLength(2); + expect(result[0].paths).toEqual([[]]); + expect(result[1].paths).toEqual([[]]); + }); + + it("handles deeply nested folder structures", () => { + const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> +<TITLE>Bookmarks</TITLE> +<H1>Bookmarks</H1> +<DL><p> + <DT><H3>Level1</H3> + <DL><p> + <DT><H3>Level2</H3> + <DL><p> + <DT><H3>Level3</H3> + <DL><p> + <DT><H3>Level4</H3> + <DL><p> + <DT><A HREF="https://example.com" ADD_DATE="1234567890">Deep Bookmark</A> + </DL><p> + </DL><p> + </DL><p> + </DL><p> +</DL><p>`; + + const result = parseImportFile("html", html); + + expect(result).toHaveLength(1); + expect(result[0].paths).toEqual([["Level1", "Level2", "Level3", "Level4"]]); + }); + + it("deduplicates bookmarks with the same URL", () => { + const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> +<TITLE>Bookmarks</TITLE> +<H1>Bookmarks</H1> +<DL><p> + <DT><H3>Folder1</H3> + <DL><p> + <DT><A HREF="https://example.com" ADD_DATE="1234567890" TAGS="tag1">First Instance</A> + </DL><p> + <DT><H3>Folder2</H3> + <DL><p> + <DT><A HREF="https://example.com" ADD_DATE="1234567891" TAGS="tag2">Second Instance</A> + </DL><p> +</DL><p>`; + + const result = parseImportFile("html", html); + + expect(result).toHaveLength(1); + expect(result[0]).toMatchObject({ + content: { type: "link", url: "https://example.com" }, + tags: ["tag1", "tag2"], + addDate: 1234567890, // Should keep the earlier date + }); + expect(result[0].paths).toHaveLength(2); + expect(result[0].paths).toContainEqual(["Folder1"]); + expect(result[0].paths).toContainEqual(["Folder2"]); + }); + + it("merges notes from duplicate bookmarks", () => { + const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> +<TITLE>Bookmarks</TITLE> +<H1>Bookmarks</H1> +<DL><p> + <DT><A HREF="https://example.com" ADD_DATE="1234567890">Bookmark</A> + <DD>First note + <DT><A HREF="https://example.com" ADD_DATE="1234567891">Bookmark</A> + <DD>Second note +</DL><p>`; + + // Note: The current parser doesn't extract DD notes, but this test + // documents the expected behavior if/when DD parsing is added + const result = parseImportFile("html", html); + + expect(result).toHaveLength(1); + expect(result[0].content).toMatchObject({ + type: "link", + url: "https://example.com", + }); + }); + + it("handles bookmarks without ADD_DATE attribute", () => { + const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> +<TITLE>Bookmarks</TITLE> +<H1>Bookmarks</H1> +<DL><p> + <DT><A HREF="https://example.com">No Date Bookmark</A> +</DL><p>`; + + const result = parseImportFile("html", html); + + expect(result).toHaveLength(1); + expect(result[0].addDate).toBeUndefined(); + }); + + it("handles bookmarks without HREF attribute", () => { + const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> +<TITLE>Bookmarks</TITLE> +<H1>Bookmarks</H1> +<DL><p> + <DT><A ADD_DATE="1234567890">No URL Bookmark</A> +</DL><p>`; + + const result = parseImportFile("html", html); + + expect(result).toHaveLength(1); + expect(result[0].content).toBeUndefined(); + }); + + it("handles mixed structure with folders and root-level bookmarks", () => { + const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> +<TITLE>Bookmarks</TITLE> +<H1>Bookmarks</H1> +<DL><p> + <DT><A HREF="https://root1.com" ADD_DATE="1234567890">Root Bookmark 1</A> + <DT><H3>Folder</H3> + <DL><p> + <DT><A HREF="https://folder1.com" ADD_DATE="1234567891">Folder Bookmark</A> + </DL><p> + <DT><A HREF="https://root2.com" ADD_DATE="1234567892">Root Bookmark 2</A> +</DL><p>`; + + const result = parseImportFile("html", html); + + expect(result).toHaveLength(3); + expect(result[0]).toMatchObject({ + title: "Root Bookmark 1", + paths: [[]], + }); + expect(result[1]).toMatchObject({ + title: "Folder Bookmark", + paths: [["Folder"]], + }); + expect(result[2]).toMatchObject({ + title: "Root Bookmark 2", + paths: [[]], + }); + }); + + it("throws error for non-Netscape bookmark files", () => { + const html = `<html> +<head><title>Not a bookmark file</title></head> +<body>Just a regular HTML file</body> +</html>`; + + expect(() => parseImportFile("html", html)).toThrow( + "The uploaded html file does not seem to be a bookmark file", + ); + }); +}); diff --git a/packages/shared/import-export/parsers.ts b/packages/shared/import-export/parsers.ts index f4d3f862..24d85c80 100644 --- a/packages/shared/import-export/parsers.ts +++ b/packages/shared/import-export/parsers.ts @@ -1,5 +1,6 @@ // Copied from https://gist.github.com/devster31/4e8c6548fd16ffb75c02e6f24e27f9b9 +import type { AnyNode } from "domhandler"; import * as cheerio from "cheerio"; import { parse } from "csv-parse/sync"; import { z } from "zod"; @@ -10,11 +11,13 @@ import { zExportSchema } from "./exporters"; export type ImportSource = | "html" | "pocket" + | "matter" | "omnivore" | "karakeep" | "linkwarden" | "tab-session-manager" - | "mymind"; + | "mymind" + | "instapaper"; export interface ParsedBookmark { title: string; @@ -34,41 +37,58 @@ function parseNetscapeBookmarkFile(textContent: string): ParsedBookmark[] { } const $ = cheerio.load(textContent); + const bookmarks: ParsedBookmark[] = []; - return $("a") - .map(function (_index, a) { - const $a = $(a); - const addDate = $a.attr("add_date"); - let tags: string[] = []; + // Recursively traverse the bookmark hierarchy top-down + function traverseFolder( + element: cheerio.Cheerio<AnyNode>, + currentPath: string[], + ) { + element.children().each((_index, child) => { + const $child = $(child); - const tagsStr = $a.attr("tags"); - try { - tags = tagsStr && tagsStr.length > 0 ? tagsStr.split(",") : []; - } catch { - /* empty */ - } - const url = $a.attr("href"); + // Check if this is a folder (DT with H3) + const h3 = $child.children("h3").first(); + if (h3.length > 0) { + const folderName = h3.text().trim() || "Unnamed"; + const newPath = [...currentPath, folderName]; + + // Find the DL that follows this folder and recurse into it + const dl = $child.children("dl").first(); + if (dl.length > 0) { + traverseFolder(dl, newPath); + } + } else { + // Check if this is a bookmark (DT with A) + const anchor = $child.children("a").first(); + if (anchor.length > 0) { + const addDate = anchor.attr("add_date"); + const tagsStr = anchor.attr("tags"); + const tags = tagsStr && tagsStr.length > 0 ? tagsStr.split(",") : []; + const url = anchor.attr("href"); - // Build folder path by traversing up the hierarchy - const path: string[] = []; - let current = $a.parent(); - while (current && current.length > 0) { - const h3 = current.find("> h3").first(); - if (h3.length > 0) { - path.unshift(h3.text()); + bookmarks.push({ + title: anchor.text(), + content: url + ? { type: BookmarkTypes.LINK as const, url } + : undefined, + tags, + addDate: + typeof addDate === "undefined" ? undefined : parseInt(addDate), + paths: [currentPath], + }); } - current = current.parent(); } + }); + } - return { - title: $a.text(), - content: url ? { type: BookmarkTypes.LINK as const, url } : undefined, - tags, - addDate: typeof addDate === "undefined" ? undefined : parseInt(addDate), - paths: [path], - }; - }) - .get(); + // Start traversal from the root DL element + const rootDl = $("dl").first(); + if (rootDl.length > 0) { + traverseFolder(rootDl, []); + } + + return bookmarks; } function parsePocketBookmarkFile(textContent: string): ParsedBookmark[] { @@ -95,6 +115,52 @@ function parsePocketBookmarkFile(textContent: string): ParsedBookmark[] { }); } +function parseMatterBookmarkFile(textContent: string): ParsedBookmark[] { + const zMatterRecordSchema = z.object({ + Title: z.string(), + Author: z.string(), + Publisher: z.string(), + URL: z.string(), + Tags: z + .string() + .transform((tags) => (tags.length > 0 ? tags.split(";") : [])), + "Word Count": z.string(), + "In Queue": z.string().transform((inQueue) => inQueue === "False"), + Favorited: z.string(), + Read: z.string(), + Highlight_Count: z.string(), + "Last Interaction Date": z + .string() + .transform((date) => Date.parse(date) / 1000), + "File Id": z.string(), + }); + + const zMatterExportSchema = z.array(zMatterRecordSchema); + + const records = parse(textContent, { + columns: true, + skip_empty_lines: true, + }); + + const parsed = zMatterExportSchema.safeParse(records); + if (!parsed.success) { + throw new Error( + `The uploaded CSV file contains an invalid Matter bookmark file: ${parsed.error.toString()}`, + ); + } + + return parsed.data.map((record) => { + return { + title: record.Title, + content: { type: BookmarkTypes.LINK as const, url: record.URL }, + tags: record.Tags, + addDate: record["Last Interaction Date"], + archived: record["In Queue"], + paths: [], // TODO + }; + }); +} + function parseKarakeepBookmarkFile(textContent: string): ParsedBookmark[] { const parsed = zExportSchema.safeParse(JSON.parse(textContent)); if (!parsed.success) { @@ -292,6 +358,64 @@ function parseMymindBookmarkFile(textContent: string): ParsedBookmark[] { }); } +function parseInstapaperBookmarkFile(textContent: string): ParsedBookmark[] { + const zInstapaperRecordScheme = z.object({ + URL: z.string(), + Title: z.string(), + Selection: z.string(), + Folder: z.string(), + Timestamp: z.string(), + Tags: z.string(), + }); + + const zInstapaperExportScheme = z.array(zInstapaperRecordScheme); + + const record = parse(textContent, { + columns: true, + skip_empty_lines: true, + }); + + const parsed = zInstapaperExportScheme.safeParse(record); + + if (!parsed.success) { + throw new Error( + `CSV file contains an invalid instapaper bookmark file: ${parsed.error.toString()}`, + ); + } + + return parsed.data.map((record) => { + let content: ParsedBookmark["content"]; + if (record.URL && record.URL.trim().length > 0) { + content = { type: BookmarkTypes.LINK as const, url: record.URL.trim() }; + } else if (record.Selection && record.Selection.trim().length > 0) { + content = { + type: BookmarkTypes.TEXT as const, + text: record.Selection.trim(), + }; + } + + const addDate = parseInt(record.Timestamp); + + let tags: string[] = []; + try { + const parsedTags = JSON.parse(record.Tags); + if (Array.isArray(parsedTags)) { + tags = parsedTags.map((tag) => tag.toString().trim()); + } + } catch { + tags = []; + } + + return { + title: record.Title || "", + content, + addDate, + tags, + paths: [], // TODO + }; + }); +} + function deduplicateBookmarks(bookmarks: ParsedBookmark[]): ParsedBookmark[] { const deduplicatedBookmarksMap = new Map<string, ParsedBookmark>(); const textBookmarks: ParsedBookmark[] = []; @@ -345,6 +469,9 @@ export function parseImportFile( case "pocket": result = parsePocketBookmarkFile(textContent); break; + case "matter": + result = parseMatterBookmarkFile(textContent); + break; case "karakeep": result = parseKarakeepBookmarkFile(textContent); break; @@ -360,6 +487,9 @@ export function parseImportFile( case "mymind": result = parseMymindBookmarkFile(textContent); break; + case "instapaper": + result = parseInstapaperBookmarkFile(textContent); + break; } return deduplicateBookmarks(result); } diff --git a/packages/shared/index.ts b/packages/shared/index.ts index e69de29b..cb0ff5c3 100644 --- a/packages/shared/index.ts +++ b/packages/shared/index.ts @@ -0,0 +1 @@ +export {}; diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts index 7689f4f4..61a621ac 100644 --- a/packages/shared/inference.ts +++ b/packages/shared/inference.ts @@ -1,6 +1,7 @@ import { Ollama } from "ollama"; import OpenAI from "openai"; import { zodResponseFormat } from "openai/helpers/zod"; +import * as undici from "undici"; import { z } from "zod"; import { zodToJsonSchema } from "zod-to-json-schema"; @@ -51,26 +52,49 @@ const mapInferenceOutputSchema = < return opts[type]; }; +export interface OpenAIInferenceConfig { + apiKey: string; + baseURL?: string; + proxyUrl?: string; + serviceTier?: typeof serverConfig.inference.openAIServiceTier; + textModel: string; + imageModel: string; + contextLength: number; + maxOutputTokens: number; + useMaxCompletionTokens: boolean; + outputSchema: "structured" | "json" | "plain"; +} + export class InferenceClientFactory { static build(): InferenceClient | null { if (serverConfig.inference.openAIApiKey) { - return new OpenAIInferenceClient(); + return OpenAIInferenceClient.fromConfig(); } if (serverConfig.inference.ollamaBaseUrl) { - return new OllamaInferenceClient(); + return OllamaInferenceClient.fromConfig(); } return null; } } -class OpenAIInferenceClient implements InferenceClient { +export class OpenAIInferenceClient implements InferenceClient { openAI: OpenAI; + private config: OpenAIInferenceConfig; + + constructor(config: OpenAIInferenceConfig) { + this.config = config; + + const fetchOptions = config.proxyUrl + ? { + dispatcher: new undici.ProxyAgent(config.proxyUrl), + } + : undefined; - constructor() { this.openAI = new OpenAI({ - apiKey: serverConfig.inference.openAIApiKey, - baseURL: serverConfig.inference.openAIBaseUrl, + apiKey: config.apiKey, + baseURL: config.baseURL, + ...(fetchOptions ? { fetchOptions } : {}), defaultHeaders: { "X-Title": "Karakeep", "HTTP-Referer": "https://karakeep.app", @@ -78,6 +102,21 @@ class OpenAIInferenceClient implements InferenceClient { }); } + static fromConfig(): OpenAIInferenceClient { + return new OpenAIInferenceClient({ + apiKey: serverConfig.inference.openAIApiKey!, + baseURL: serverConfig.inference.openAIBaseUrl, + proxyUrl: serverConfig.inference.openAIProxyUrl, + serviceTier: serverConfig.inference.openAIServiceTier, + textModel: serverConfig.inference.textModel, + imageModel: serverConfig.inference.imageModel, + contextLength: serverConfig.inference.contextLength, + maxOutputTokens: serverConfig.inference.maxOutputTokens, + useMaxCompletionTokens: serverConfig.inference.useMaxCompletionTokens, + outputSchema: serverConfig.inference.outputSchema, + }); + } + async inferFromText( prompt: string, _opts: Partial<InferenceOptions>, @@ -89,10 +128,13 @@ class OpenAIInferenceClient implements InferenceClient { const chatCompletion = await this.openAI.chat.completions.create( { messages: [{ role: "user", content: prompt }], - model: serverConfig.inference.textModel, - ...(serverConfig.inference.useMaxCompletionTokens - ? { max_completion_tokens: serverConfig.inference.maxOutputTokens } - : { max_tokens: serverConfig.inference.maxOutputTokens }), + model: this.config.textModel, + ...(this.config.serviceTier + ? { service_tier: this.config.serviceTier } + : {}), + ...(this.config.useMaxCompletionTokens + ? { max_completion_tokens: this.config.maxOutputTokens } + : { max_tokens: this.config.maxOutputTokens }), response_format: mapInferenceOutputSchema( { structured: optsWithDefaults.schema @@ -101,7 +143,7 @@ class OpenAIInferenceClient implements InferenceClient { json: { type: "json_object" }, plain: undefined, }, - serverConfig.inference.outputSchema, + this.config.outputSchema, ), }, { @@ -128,10 +170,13 @@ class OpenAIInferenceClient implements InferenceClient { }; const chatCompletion = await this.openAI.chat.completions.create( { - model: serverConfig.inference.imageModel, - ...(serverConfig.inference.useMaxCompletionTokens - ? { max_completion_tokens: serverConfig.inference.maxOutputTokens } - : { max_tokens: serverConfig.inference.maxOutputTokens }), + model: this.config.imageModel, + ...(this.config.serviceTier + ? { service_tier: this.config.serviceTier } + : {}), + ...(this.config.useMaxCompletionTokens + ? { max_completion_tokens: this.config.maxOutputTokens } + : { max_tokens: this.config.maxOutputTokens }), response_format: mapInferenceOutputSchema( { structured: optsWithDefaults.schema @@ -140,7 +185,7 @@ class OpenAIInferenceClient implements InferenceClient { json: { type: "json_object" }, plain: undefined, }, - serverConfig.inference.outputSchema, + this.config.outputSchema, ), messages: [ { @@ -185,16 +230,40 @@ class OpenAIInferenceClient implements InferenceClient { } } +export interface OllamaInferenceConfig { + baseUrl: string; + textModel: string; + imageModel: string; + contextLength: number; + maxOutputTokens: number; + keepAlive?: string; + outputSchema: "structured" | "json" | "plain"; +} + class OllamaInferenceClient implements InferenceClient { ollama: Ollama; + private config: OllamaInferenceConfig; - constructor() { + constructor(config: OllamaInferenceConfig) { + this.config = config; this.ollama = new Ollama({ - host: serverConfig.inference.ollamaBaseUrl, + host: config.baseUrl, fetch: customFetch, // Use the custom fetch with configurable timeout }); } + static fromConfig(): OllamaInferenceClient { + return new OllamaInferenceClient({ + baseUrl: serverConfig.inference.ollamaBaseUrl!, + textModel: serverConfig.inference.textModel, + imageModel: serverConfig.inference.imageModel, + contextLength: serverConfig.inference.contextLength, + maxOutputTokens: serverConfig.inference.maxOutputTokens, + keepAlive: serverConfig.inference.ollamaKeepAlive, + outputSchema: serverConfig.inference.outputSchema, + }); + } + async runModel( model: string, prompt: string, @@ -213,7 +282,7 @@ class OllamaInferenceClient implements InferenceClient { this.ollama.abort(); }; } - const chatCompletion = await this.ollama.chat({ + const chatCompletion = await this.ollama.generate({ model: model, format: mapInferenceOutputSchema( { @@ -223,24 +292,23 @@ class OllamaInferenceClient implements InferenceClient { json: "json", plain: undefined, }, - serverConfig.inference.outputSchema, + this.config.outputSchema, ), stream: true, - keep_alive: serverConfig.inference.ollamaKeepAlive, + keep_alive: this.config.keepAlive, options: { - num_ctx: serverConfig.inference.contextLength, - num_predict: serverConfig.inference.maxOutputTokens, + num_ctx: this.config.contextLength, + num_predict: this.config.maxOutputTokens, }, - messages: [ - { role: "user", content: prompt, images: image ? [image] : undefined }, - ], + prompt: prompt, + images: image ? [image] : undefined, }); let totalTokens = 0; let response = ""; try { for await (const part of chatCompletion) { - response += part.message.content; + response += part.response; if (!isNaN(part.eval_count)) { totalTokens += part.eval_count; } @@ -277,7 +345,7 @@ class OllamaInferenceClient implements InferenceClient { ..._opts, }; return await this.runModel( - serverConfig.inference.textModel, + this.config.textModel, prompt, optsWithDefaults, undefined, @@ -295,7 +363,7 @@ class OllamaInferenceClient implements InferenceClient { ..._opts, }; return await this.runModel( - serverConfig.inference.imageModel, + this.config.imageModel, prompt, optsWithDefaults, image, diff --git a/packages/shared/logger.ts b/packages/shared/logger.ts index efe78ff3..f3c5d45d 100644 --- a/packages/shared/logger.ts +++ b/packages/shared/logger.ts @@ -14,4 +14,16 @@ const logger = winston.createLogger({ transports: [new winston.transports.Console()], }); +export function throttledLogger(periodMs: number) { + let lastLogTime = 0; + + return (level: string, message: string) => { + const now = Date.now(); + if (now - lastLogTime >= periodMs) { + lastLogTime = now; + logger.log(level, message); + } + }; +} + export default logger; diff --git a/packages/shared/prompts.server.ts b/packages/shared/prompts.server.ts new file mode 100644 index 00000000..c53f4190 --- /dev/null +++ b/packages/shared/prompts.server.ts @@ -0,0 +1,88 @@ +import type { Tiktoken } from "js-tiktoken"; + +import type { ZTagStyle } from "./types/users"; +import { constructSummaryPrompt, constructTextTaggingPrompt } from "./prompts"; + +let encoding: Tiktoken | null = null; + +/** + * Lazy load the encoding to avoid loading the tiktoken data into memory + * until it's actually needed + */ +async function getEncodingInstance(): Promise<Tiktoken> { + if (!encoding) { + // Dynamic import to lazy load the tiktoken module + const { getEncoding } = await import("js-tiktoken"); + encoding = getEncoding("o200k_base"); + } + return encoding; +} + +async function calculateNumTokens(text: string): Promise<number> { + const enc = await getEncodingInstance(); + return enc.encode(text).length; +} + +async function truncateContent( + content: string, + length: number, +): Promise<string> { + const enc = await getEncodingInstance(); + const tokens = enc.encode(content); + if (tokens.length <= length) { + return content; + } + const truncatedTokens = tokens.slice(0, length); + return enc.decode(truncatedTokens); +} + +/** + * Remove duplicate whitespaces to avoid tokenization issues + */ +function preprocessContent(content: string) { + return content.replace(/(\s){10,}/g, "$1"); +} + +export async function buildTextPrompt( + lang: string, + customPrompts: string[], + content: string, + contextLength: number, + tagStyle: ZTagStyle, + curatedTags?: string[], +): Promise<string> { + content = preprocessContent(content); + const promptTemplate = constructTextTaggingPrompt( + lang, + customPrompts, + "", + tagStyle, + curatedTags, + ); + const promptSize = await calculateNumTokens(promptTemplate); + const available = Math.max(0, contextLength - promptSize); + const truncatedContent = + available === 0 ? "" : await truncateContent(content, available); + return constructTextTaggingPrompt( + lang, + customPrompts, + truncatedContent, + tagStyle, + curatedTags, + ); +} + +export async function buildSummaryPrompt( + lang: string, + customPrompts: string[], + content: string, + contextLength: number, +): Promise<string> { + content = preprocessContent(content); + const promptTemplate = constructSummaryPrompt(lang, customPrompts, ""); + const promptSize = await calculateNumTokens(promptTemplate); + const available = Math.max(0, contextLength - promptSize); + const truncatedContent = + available === 0 ? "" : await truncateContent(content, available); + return constructSummaryPrompt(lang, customPrompts, truncatedContent); +} diff --git a/packages/shared/prompts.ts b/packages/shared/prompts.ts index 5a6a705e..6c5c02c4 100644 --- a/packages/shared/prompts.ts +++ b/packages/shared/prompts.ts @@ -1,19 +1,5 @@ -import type { Tiktoken } from "js-tiktoken"; - -let encoding: Tiktoken | null = null; - -/** - * Lazy load the encoding to avoid loading the tiktoken data into memory - * until it's actually needed - */ -async function getEncodingInstance(): Promise<Tiktoken> { - if (!encoding) { - // Dynamic import to lazy load the tiktoken module - const { getEncoding } = await import("js-tiktoken"); - encoding = getEncoding("o200k_base"); - } - return encoding; -} +import type { ZTagStyle } from "./types/users"; +import { getCuratedTagsPrompt, getTagStylePrompt } from "./utils/tag"; /** * Remove duplicate whitespaces to avoid tokenization issues @@ -22,33 +8,25 @@ function preprocessContent(content: string) { return content.replace(/(\s){10,}/g, "$1"); } -async function calculateNumTokens(text: string): Promise<number> { - const enc = await getEncodingInstance(); - return enc.encode(text).length; -} - -async function truncateContent( - content: string, - length: number, -): Promise<string> { - const enc = await getEncodingInstance(); - const tokens = enc.encode(content); - if (tokens.length <= length) { - return content; - } - const truncatedTokens = tokens.slice(0, length); - return enc.decode(truncatedTokens); -} +export function buildImagePrompt( + lang: string, + customPrompts: string[], + tagStyle: ZTagStyle, + curatedTags?: string[], +) { + const tagStyleInstruction = getTagStylePrompt(tagStyle); + const curatedInstruction = getCuratedTagsPrompt(curatedTags); -export function buildImagePrompt(lang: string, customPrompts: string[]) { return ` -You are an expert whose responsibility is to help with automatic text tagging for a read-it-later app. -Please analyze the attached image and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are: +You are an expert whose responsibility is to help with automatic text tagging for a read-it-later/bookmarking app. +Analyze the attached image and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are: - Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres. - The tags must be in ${lang}. - If the tag is not generic enough, don't include it. - Aim for 10-15 tags. - If there are no good tags, don't emit any. +${curatedInstruction} +${tagStyleInstruction} ${customPrompts && customPrompts.map((p) => `- ${p}`).join("\n")} You must respond in valid JSON with the key "tags" and the value is list of tags. Don't wrap the response in a markdown code.`; } @@ -56,20 +34,29 @@ You must respond in valid JSON with the key "tags" and the value is list of tags /** * Construct tagging prompt for text content */ -function constructTextTaggingPrompt( +export function constructTextTaggingPrompt( lang: string, customPrompts: string[], content: string, + tagStyle: ZTagStyle, + curatedTags?: string[], ): string { + const tagStyleInstruction = getTagStylePrompt(tagStyle); + const curatedInstruction = getCuratedTagsPrompt(curatedTags); + return ` -You are an expert whose responsibility is to help with automatic tagging for a read-it-later app. -Please analyze the TEXT_CONTENT below and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are: +You are an expert whose responsibility is to help with automatic tagging for a read-it-later/bookmarking app. +Analyze the TEXT_CONTENT below and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are: - Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres. - The tags must be in ${lang}. - If the tag is not generic enough, don't include it. -- The content can include text for cookie consent and privacy policy, ignore those while tagging. +- Do NOT generate tags related to: + - An error page (404, 403, blocked, not found, dns errors) + - Boilerplate content (cookie consent, login walls, GDPR notices) - Aim for 3-5 tags. - If there are no good tags, leave the array empty. +${curatedInstruction} +${tagStyleInstruction} ${customPrompts && customPrompts.map((p) => `- ${p}`).join("\n")} <TEXT_CONTENT> @@ -81,7 +68,7 @@ You must respond in JSON with the key "tags" and the value is an array of string /** * Construct summary prompt */ -function constructSummaryPrompt( +export function constructSummaryPrompt( lang: string, customPrompts: string[], content: string, @@ -101,46 +88,18 @@ export function buildTextPromptUntruncated( lang: string, customPrompts: string[], content: string, + tagStyle: ZTagStyle, + curatedTags?: string[], ): string { return constructTextTaggingPrompt( lang, customPrompts, preprocessContent(content), + tagStyle, + curatedTags, ); } -export async function buildTextPrompt( - lang: string, - customPrompts: string[], - content: string, - contextLength: number, -): Promise<string> { - content = preprocessContent(content); - const promptTemplate = constructTextTaggingPrompt(lang, customPrompts, ""); - const promptSize = await calculateNumTokens(promptTemplate); - const truncatedContent = await truncateContent( - content, - contextLength - promptSize, - ); - return constructTextTaggingPrompt(lang, customPrompts, truncatedContent); -} - -export async function buildSummaryPrompt( - lang: string, - customPrompts: string[], - content: string, - contextLength: number, -): Promise<string> { - content = preprocessContent(content); - const promptTemplate = constructSummaryPrompt(lang, customPrompts, ""); - const promptSize = await calculateNumTokens(promptTemplate); - const truncatedContent = await truncateContent( - content, - contextLength - promptSize, - ); - return constructSummaryPrompt(lang, customPrompts, truncatedContent); -} - /** * Build summary prompt without truncation (for previews/UI) */ @@ -155,3 +114,19 @@ export function buildSummaryPromptUntruncated( preprocessContent(content), ); } + +/** + * Build OCR prompt for extracting text from images using LLM + */ +export function buildOCRPrompt(): string { + return `You are an OCR (Optical Character Recognition) expert. Your task is to extract ALL text from this image. + +Rules: +- Extract every piece of text visible in the image, including titles, body text, captions, labels, watermarks, and any other textual content. +- Preserve the original structure and formatting as much as possible (e.g., paragraphs, lists, headings). +- If text appears in multiple columns, read from left to right, top to bottom. +- If text is partially obscured or unclear, make your best attempt and indicate uncertainty with [unclear] if needed. +- Do not add any commentary, explanations, or descriptions of non-text elements. +- If there is no text in the image, respond with an empty string. +- Output ONLY the extracted text, nothing else.`; +} diff --git a/packages/shared/queueing.ts b/packages/shared/queueing.ts index 0dd6ed6b..d1f4bcef 100644 --- a/packages/shared/queueing.ts +++ b/packages/shared/queueing.ts @@ -2,6 +2,21 @@ import { ZodType } from "zod"; import { PluginManager, PluginType } from "./plugins"; +/** + * Special error that indicates a job should be retried after a delay + * without counting against the retry attempts limit. + * Useful for handling rate limiting scenarios. + */ +export class QueueRetryAfterError extends Error { + constructor( + message: string, + public readonly delayMs: number, + ) { + super(message); + this.name = "QueueRetryAfterError"; + } +} + export interface EnqueueOptions { idempotencyKey?: string; priority?: number; @@ -48,6 +63,7 @@ export interface RunnerOptions<T> { export interface Queue<T> { opts: QueueOptions; + ensureInit(): Promise<void>; name(): string; enqueue(payload: T, options?: EnqueueOptions): Promise<string | undefined>; stats(): Promise<{ diff --git a/packages/shared/search.ts b/packages/shared/search.ts index d23ab29f..651b5245 100644 --- a/packages/shared/search.ts +++ b/packages/shared/search.ts @@ -60,9 +60,20 @@ export interface SearchResponse { processingTimeMs: number; } +export interface IndexingOptions { + /** + * Whether to batch requests. Defaults to true. + * Set to false to bypass batching for improved reliability (e.g., on retries). + */ + batch?: boolean; +} + export interface SearchIndexClient { - addDocuments(documents: BookmarkSearchDocument[]): Promise<void>; - deleteDocuments(ids: string[]): Promise<void>; + addDocuments( + documents: BookmarkSearchDocument[], + options?: IndexingOptions, + ): Promise<void>; + deleteDocuments(ids: string[], options?: IndexingOptions): Promise<void>; search(options: SearchOptions): Promise<SearchResponse>; clearIndex(): Promise<void>; } diff --git a/packages/shared/searchQueryParser.test.ts b/packages/shared/searchQueryParser.test.ts index aa11433f..37275284 100644 --- a/packages/shared/searchQueryParser.test.ts +++ b/packages/shared/searchQueryParser.test.ts @@ -332,7 +332,176 @@ describe("Search Query Parser", () => { inverse: true, }, }); + expect(parseSearchQuery("source:rss")).toEqual({ + result: "full", + text: "", + matcher: { + type: "source", + source: "rss", + inverse: false, + }, + }); + expect(parseSearchQuery("-source:rss")).toEqual({ + result: "full", + text: "", + matcher: { + type: "source", + source: "rss", + inverse: true, + }, + }); + expect(parseSearchQuery("source:web")).toEqual({ + result: "full", + text: "", + matcher: { + type: "source", + source: "web", + inverse: false, + }, + }); + expect(parseSearchQuery("-source:web")).toEqual({ + result: "full", + text: "", + matcher: { + type: "source", + source: "web", + inverse: true, + }, + }); + }); + test("! negation alias for -", () => { + // ! should work exactly like - for negation + expect(parseSearchQuery("!is:archived")).toEqual({ + result: "full", + text: "", + matcher: { + type: "archived", + archived: false, + }, + }); + expect(parseSearchQuery("!is:fav")).toEqual({ + result: "full", + text: "", + matcher: { + type: "favourited", + favourited: false, + }, + }); + expect(parseSearchQuery("!#my-tag")).toEqual({ + result: "full", + text: "", + matcher: { + type: "tagName", + tagName: "my-tag", + inverse: true, + }, + }); + expect(parseSearchQuery("!tag:my-tag")).toEqual({ + result: "full", + text: "", + matcher: { + type: "tagName", + tagName: "my-tag", + inverse: true, + }, + }); + expect(parseSearchQuery("!url:example.com")).toEqual({ + result: "full", + text: "", + matcher: { + type: "url", + url: "example.com", + inverse: true, + }, + }); + expect(parseSearchQuery("!list:my-list")).toEqual({ + result: "full", + text: "", + matcher: { + type: "listName", + listName: "my-list", + inverse: true, + }, + }); + expect(parseSearchQuery("!is:link")).toEqual({ + result: "full", + text: "", + matcher: { + type: "type", + typeName: BookmarkTypes.LINK, + inverse: true, + }, + }); + // Combined with complex queries + expect(parseSearchQuery("is:fav !is:archived")).toEqual({ + result: "full", + text: "", + matcher: { + type: "and", + matchers: [ + { + type: "favourited", + favourited: true, + }, + { + type: "archived", + archived: false, + }, + ], + }, + }); }); + + test("tag: qualifier alias for #", () => { + // tag: should work exactly like # + expect(parseSearchQuery("tag:my-tag")).toEqual({ + result: "full", + text: "", + matcher: { + type: "tagName", + tagName: "my-tag", + inverse: false, + }, + }); + expect(parseSearchQuery("-tag:my-tag")).toEqual({ + result: "full", + text: "", + matcher: { + type: "tagName", + tagName: "my-tag", + inverse: true, + }, + }); + expect(parseSearchQuery('tag:"my tag"')).toEqual({ + result: "full", + text: "", + matcher: { + type: "tagName", + tagName: "my tag", + inverse: false, + }, + }); + expect(parseSearchQuery('-tag:"my tag"')).toEqual({ + result: "full", + text: "", + matcher: { + type: "tagName", + tagName: "my tag", + inverse: true, + }, + }); + // Tags starting with qualifiers should be treated correctly + expect(parseSearchQuery("tag:android")).toEqual({ + result: "full", + text: "", + matcher: { + type: "tagName", + tagName: "android", + inverse: false, + }, + }); + }); + test("date queries", () => { expect(parseSearchQuery("after:2023-10-12")).toEqual({ result: "full", diff --git a/packages/shared/searchQueryParser.ts b/packages/shared/searchQueryParser.ts index 7447593a..7eb3b185 100644 --- a/packages/shared/searchQueryParser.ts +++ b/packages/shared/searchQueryParser.ts @@ -16,7 +16,7 @@ import { } from "typescript-parsec"; import { z } from "zod"; -import { BookmarkTypes } from "./types/bookmarks"; +import { BookmarkTypes, zBookmarkSourceSchema } from "./types/bookmarks"; import { Matcher } from "./types/search"; import { parseRelativeDate } from "./utils/relativeDateUtils"; @@ -33,6 +33,7 @@ enum TokenType { Space = "SPACE", Hash = "HASH", Minus = "MINUS", + Exclamation = "EXCLAMATION", } // Rules are in order of priority @@ -41,7 +42,10 @@ const lexerRules: [RegExp, TokenType][] = [ [/^\s+or/i, TokenType.Or], [/^#/, TokenType.Hash], - [/^(is|url|list|after|before|age|feed|title):/, TokenType.Qualifier], + [ + /^(is|url|list|after|before|age|feed|title|tag|source):/, + TokenType.Qualifier, + ], [/^"([^"]+)"/, TokenType.StringLiteral], @@ -49,6 +53,7 @@ const lexerRules: [RegExp, TokenType][] = [ [/^\)/, TokenType.RParen], [/^\s+/, TokenType.Space], [/^-/, TokenType.Minus], + [/^!/, TokenType.Exclamation], // This needs to be last as it matches a lot of stuff [/^[^ )(]+/, TokenType.Ident], @@ -116,7 +121,10 @@ const EXP = rule<TokenType, TextAndMatcher>(); MATCHER.setPattern( alt_sc( apply( - seq(opt(str("-")), kright(str("is:"), tok(TokenType.Ident))), + seq( + opt(alt(str("-"), str("!"))), + kright(str("is:"), tok(TokenType.Ident)), + ), ([minus, ident]) => { switch (ident.text) { case "fav": @@ -182,7 +190,7 @@ MATCHER.setPattern( ), apply( seq( - opt(str("-")), + opt(alt(str("-"), str("!"))), alt(tok(TokenType.Qualifier), tok(TokenType.Hash)), alt( apply(tok(TokenType.Ident), (tok) => { @@ -206,6 +214,7 @@ MATCHER.setPattern( matcher: { type: "title", title: ident, inverse: !!minus }, }; case "#": + case "tag:": return { text: "", matcher: { type: "tagName", tagName: ident, inverse: !!minus }, @@ -224,6 +233,23 @@ MATCHER.setPattern( inverse: !!minus, }, }; + case "source:": { + const parsed = zBookmarkSourceSchema.safeParse(ident); + if (!parsed.success) { + return { + text: (minus?.text ?? "") + qualifier.text + ident, + matcher: undefined, + }; + } + return { + text: "", + matcher: { + type: "source", + source: parsed.data, + inverse: !!minus, + }, + }; + } case "after:": try { return { diff --git a/packages/shared/types/bookmarks.ts b/packages/shared/types/bookmarks.ts index cbaa4574..2f32bd51 100644 --- a/packages/shared/types/bookmarks.ts +++ b/packages/shared/types/bookmarks.ts @@ -1,7 +1,7 @@ import { z } from "zod"; import { zCursorV2 } from "./pagination"; -import { zBookmarkTagSchema } from "./tags"; +import { zAttachedByEnumSchema, zBookmarkTagSchema } from "./tags"; export const MAX_BOOKMARK_TITLE_LENGTH = 1000; @@ -18,6 +18,7 @@ export type ZSortOrder = z.infer<typeof zSortOrder>; export const zAssetTypesSchema = z.enum([ "linkHtmlContent", "screenshot", + "pdf", "assetScreenshot", "bannerImage", "fullPageArchive", @@ -25,6 +26,7 @@ export const zAssetTypesSchema = z.enum([ "bookmarkAsset", "precrawledArchive", "userUploaded", + "avatar", "unknown", ]); export type ZAssetType = z.infer<typeof zAssetTypesSchema>; @@ -43,6 +45,7 @@ export const zBookmarkedLinkSchema = z.object({ imageUrl: z.string().nullish(), imageAssetId: z.string().nullish(), screenshotAssetId: z.string().nullish(), + pdfAssetId: z.string().nullish(), fullPageArchiveAssetId: z.string().nullish(), precrawledArchiveAssetId: z.string().nullish(), videoAssetId: z.string().nullish(), @@ -50,6 +53,7 @@ export const zBookmarkedLinkSchema = z.object({ htmlContent: z.string().nullish(), contentAssetId: z.string().nullish(), crawledAt: z.date().nullish(), + crawlStatus: z.enum(["success", "failure", "pending"]).nullish(), author: z.string().nullish(), publisher: z.string().nullish(), datePublished: z.date().nullish(), @@ -160,6 +164,7 @@ export const zNewBookmarkRequestSchema = z // A mechanism to prioritize crawling of bookmarks depending on whether // they were created by a user interaction or by a bulk import. crawlPriority: z.enum(["low", "normal"]).optional(), + // Deprecated importSessionId: z.string().optional(), source: zBookmarkSourceSchema.optional(), }) @@ -248,6 +253,7 @@ export const zManipulatedTagSchema = z // At least one of the two must be set tagId: z.string().optional(), // If the tag already exists and we know its id we should pass it tagName: z.string().optional(), + attachedBy: zAttachedByEnumSchema.optional().default("human"), }) .refine((val) => !!val.tagId || !!val.tagName, { message: "You must provide either a tagId or a tagName", diff --git a/packages/shared/types/config.ts b/packages/shared/types/config.ts new file mode 100644 index 00000000..bd4310f9 --- /dev/null +++ b/packages/shared/types/config.ts @@ -0,0 +1,29 @@ +import { z } from "zod"; + +export const zClientConfigSchema = z.object({ + publicUrl: z.string(), + publicApiUrl: z.string(), + demoMode: z + .object({ + email: z.string().optional(), + password: z.string().optional(), + }) + .optional(), + auth: z.object({ + disableSignups: z.boolean(), + disablePasswordAuth: z.boolean(), + }), + turnstile: z + .object({ + siteKey: z.string(), + }) + .nullable(), + inference: z.object({ + isConfigured: z.boolean(), + inferredTagLang: z.string(), + enableAutoTagging: z.boolean(), + enableAutoSummarization: z.boolean(), + }), + serverVersion: z.string().optional(), + disableNewReleaseCheck: z.boolean(), +}); diff --git a/packages/shared/types/importSessions.ts b/packages/shared/types/importSessions.ts index 0c1edd03..44022a74 100644 --- a/packages/shared/types/importSessions.ts +++ b/packages/shared/types/importSessions.ts @@ -1,8 +1,10 @@ import { z } from "zod"; export const zImportSessionStatusSchema = z.enum([ + "staging", "pending", - "in_progress", + "running", + "paused", "completed", "failed", ]); @@ -24,13 +26,13 @@ export const zImportSessionSchema = z.object({ userId: z.string(), message: z.string().nullable(), rootListId: z.string().nullable(), + status: zImportSessionStatusSchema, createdAt: z.date(), modifiedAt: z.date().nullable(), }); export type ZImportSession = z.infer<typeof zImportSessionSchema>; export const zImportSessionWithStatsSchema = zImportSessionSchema.extend({ - status: z.enum(["pending", "in_progress", "completed", "failed"]), totalBookmarks: z.number(), completedBookmarks: z.number(), failedBookmarks: z.number(), diff --git a/packages/shared/types/readers.ts b/packages/shared/types/readers.ts new file mode 100644 index 00000000..117dd51b --- /dev/null +++ b/packages/shared/types/readers.ts @@ -0,0 +1,59 @@ +import { z } from "zod"; + +import { ZReaderFontFamily, zReaderFontFamilySchema } from "./users"; + +export const READER_DEFAULTS = { + fontSize: 18, + lineHeight: 1.6, + fontFamily: "serif" as const, +} as const; + +export const READER_FONT_FAMILIES: Record<ZReaderFontFamily, string> = { + serif: "ui-serif, Georgia, Cambria, serif", + sans: "ui-sans-serif, system-ui, sans-serif", + mono: "ui-monospace, Menlo, Monaco, monospace", +} as const; + +// Setting constraints for UI controls +export const READER_SETTING_CONSTRAINTS = { + fontSize: { min: 12, max: 24, step: 1 }, + lineHeight: { min: 1.2, max: 2.5, step: 0.1 }, +} as const; + +// Formatting functions for display +export function formatFontSize(value: number): string { + return `${value}px`; +} + +export function formatLineHeight(value: number): string { + return value.toFixed(1); +} + +export function formatFontFamily( + value: ZReaderFontFamily, + t?: (key: string) => string, +): string { + if (t) { + return t(`settings.info.reader_settings.${value}`); + } + // Fallback labels when no translation function provided + switch (value) { + case "serif": + return "Serif"; + case "sans": + return "Sans Serif"; + case "mono": + return "Monospace"; + } +} + +export const zReaderSettings = z.object({ + fontSize: z.number().int().min(12).max(24), + lineHeight: z.number().min(1.2).max(2.5), + fontFamily: zReaderFontFamilySchema, +}); + +export type ReaderSettings = z.infer<typeof zReaderSettings>; + +export const zReaderSettingsPartial = zReaderSettings.partial(); +export type ReaderSettingsPartial = z.infer<typeof zReaderSettingsPartial>; diff --git a/packages/shared/types/rules.ts b/packages/shared/types/rules.ts index 92300b3c..fd99c266 100644 --- a/packages/shared/types/rules.ts +++ b/packages/shared/types/rules.ts @@ -54,6 +54,21 @@ const zUrlContainsCondition = z.object({ str: z.string(), }); +const zUrlDoesNotContainCondition = z.object({ + type: z.literal("urlDoesNotContain"), + str: z.string(), +}); + +const zTitleContainsCondition = z.object({ + type: z.literal("titleContains"), + str: z.string(), +}); + +const zTitleDoesNotContainCondition = z.object({ + type: z.literal("titleDoesNotContain"), + str: z.string(), +}); + const zImportedFromFeedCondition = z.object({ type: z.literal("importedFromFeed"), feedId: z.string(), @@ -80,6 +95,9 @@ const zIsArchivedCondition = z.object({ const nonRecursiveCondition = z.discriminatedUnion("type", [ zAlwaysTrueCondition, zUrlContainsCondition, + zUrlDoesNotContainCondition, + zTitleContainsCondition, + zTitleDoesNotContainCondition, zImportedFromFeedCondition, zBookmarkTypeIsCondition, zHasTagCondition, @@ -98,6 +116,9 @@ export const zRuleEngineConditionSchema: z.ZodType<RuleEngineCondition> = z.discriminatedUnion("type", [ zAlwaysTrueCondition, zUrlContainsCondition, + zUrlDoesNotContainCondition, + zTitleContainsCondition, + zTitleDoesNotContainCondition, zImportedFromFeedCondition, zBookmarkTypeIsCondition, zHasTagCondition, @@ -227,6 +248,7 @@ const ruleValidaitorFn = ( case "isArchived": return true; case "urlContains": + case "urlDoesNotContain": if (condition.str.length == 0) { ctx.addIssue({ code: "custom", @@ -236,6 +258,17 @@ const ruleValidaitorFn = ( return false; } return true; + case "titleContains": + case "titleDoesNotContain": + if (condition.str.length == 0) { + ctx.addIssue({ + code: "custom", + message: "You must specify a title for this condition type", + path: ["condition", "str"], + }); + return false; + } + return true; case "hasTag": if (condition.tagId.length == 0) { ctx.addIssue({ diff --git a/packages/shared/types/search.ts b/packages/shared/types/search.ts index c29270b8..b653d883 100644 --- a/packages/shared/types/search.ts +++ b/packages/shared/types/search.ts @@ -1,6 +1,6 @@ import { z } from "zod"; -import { BookmarkTypes } from "./bookmarks"; +import { BookmarkTypes, zBookmarkSourceSchema } from "./bookmarks"; const zTagNameMatcher = z.object({ type: z.literal("tagName"), @@ -88,6 +88,12 @@ const zBrokenLinksMatcher = z.object({ brokenLinks: z.boolean(), }); +const zSourceMatcher = z.object({ + type: z.literal("source"), + source: zBookmarkSourceSchema, + inverse: z.boolean(), +}); + const zNonRecursiveMatcher = z.union([ zTagNameMatcher, zListNameMatcher, @@ -103,6 +109,7 @@ const zNonRecursiveMatcher = z.union([ zTypeMatcher, zRssFeedNameMatcher, zBrokenLinksMatcher, + zSourceMatcher, ]); type NonRecursiveMatcher = z.infer<typeof zNonRecursiveMatcher>; @@ -127,6 +134,7 @@ export const zMatcherSchema: z.ZodType<Matcher> = z.lazy(() => { zTypeMatcher, zRssFeedNameMatcher, zBrokenLinksMatcher, + zSourceMatcher, z.object({ type: z.literal("and"), matchers: z.array(zMatcherSchema), diff --git a/packages/shared/types/tags.ts b/packages/shared/types/tags.ts index 91ad1d96..7ce70477 100644 --- a/packages/shared/types/tags.ts +++ b/packages/shared/types/tags.ts @@ -47,6 +47,7 @@ export const zTagCursorSchema = z.object({ export const zTagListRequestSchema = z.object({ nameContains: z.string().optional(), + ids: z.array(z.string()).optional(), attachedBy: z.enum([...zAttachedByEnumSchema.options, "none"]).optional(), sortBy: z.enum(["name", "usage", "relevance"]).optional().default("usage"), cursor: zTagCursorSchema.nullish().default({ page: 0 }), diff --git a/packages/shared/types/users.ts b/packages/shared/types/users.ts index 9f020d52..df4697f0 100644 --- a/packages/shared/types/users.ts +++ b/packages/shared/types/users.ts @@ -5,6 +5,17 @@ import { zBookmarkSourceSchema } from "./bookmarks"; export const PASSWORD_MIN_LENGTH = 8; export const PASSWORD_MAX_LENGTH = 100; +export const zTagStyleSchema = z.enum([ + "lowercase-hyphens", + "lowercase-spaces", + "lowercase-underscores", + "titlecase-spaces", + "titlecase-hyphens", + "camelCase", + "as-generated", +]); +export type ZTagStyle = z.infer<typeof zTagStyleSchema>; + export const zSignUpSchema = z .object({ name: z.string().min(1, { message: "Name can't be empty" }), @@ -38,6 +49,7 @@ export const zWhoAmIResponseSchema = z.object({ id: z.string(), name: z.string().nullish(), email: z.string().nullish(), + image: z.string().nullish(), localUser: z.boolean(), }); @@ -102,6 +114,76 @@ export const zUserStatsResponseSchema = z.object({ ), }); +export const zWrappedStatsResponseSchema = z.object({ + year: z.number(), + totalBookmarks: z.number(), + totalFavorites: z.number(), + totalArchived: z.number(), + totalHighlights: z.number(), + totalTags: z.number(), + totalLists: z.number(), + + firstBookmark: z + .object({ + id: z.string(), + title: z.string().nullable(), + createdAt: z.date(), + type: z.enum(["link", "text", "asset"]), + }) + .nullable(), + + mostActiveDay: z + .object({ + date: z.string(), + count: z.number(), + }) + .nullable(), + + topDomains: z + .array( + z.object({ + domain: z.string(), + count: z.number(), + }), + ) + .max(5), + + topTags: z + .array( + z.object({ + name: z.string(), + count: z.number(), + }), + ) + .max(5), + + bookmarksByType: z.object({ + link: z.number(), + text: z.number(), + asset: z.number(), + }), + + bookmarksBySource: z.array( + z.object({ + source: zBookmarkSourceSchema.nullable(), + count: z.number(), + }), + ), + + monthlyActivity: z.array( + z.object({ + month: z.number(), + count: z.number(), + }), + ), + + peakHour: z.number(), + peakDayOfWeek: z.number(), +}); + +export const zReaderFontFamilySchema = z.enum(["serif", "sans", "mono"]); +export type ZReaderFontFamily = z.infer<typeof zReaderFontFamilySchema>; + export const zUserSettingsSchema = z.object({ bookmarkClickAction: z.enum([ "open_original_link", @@ -112,6 +194,16 @@ export const zUserSettingsSchema = z.object({ backupsEnabled: z.boolean(), backupsFrequency: z.enum(["daily", "weekly"]), backupsRetentionDays: z.number().int().min(1).max(365), + // Reader settings (nullable = opt-in, null means use client default) + readerFontSize: z.number().int().min(12).max(24).nullable(), + readerLineHeight: z.number().min(1.2).max(2.5).nullable(), + readerFontFamily: zReaderFontFamilySchema.nullable(), + // AI settings (nullable = opt-in, null means use server default) + autoTaggingEnabled: z.boolean().nullable(), + autoSummarizationEnabled: z.boolean().nullable(), + tagStyle: zTagStyleSchema, + curatedTagIds: z.array(z.string()).nullable(), + inferredTagLang: z.string().nullable(), }); export type ZUserSettings = z.infer<typeof zUserSettingsSchema>; @@ -123,6 +215,14 @@ export const zUpdateUserSettingsSchema = zUserSettingsSchema.partial().pick({ backupsEnabled: true, backupsFrequency: true, backupsRetentionDays: true, + readerFontSize: true, + readerLineHeight: true, + readerFontFamily: true, + autoTaggingEnabled: true, + autoSummarizationEnabled: true, + tagStyle: true, + curatedTagIds: true, + inferredTagLang: true, }); export const zUpdateBackupSettingsSchema = zUpdateUserSettingsSchema.pick({ diff --git a/packages/shared/utils/bookmarkUtils.ts b/packages/shared/utils/bookmarkUtils.ts index 9d4659b1..c9587c6c 100644 --- a/packages/shared/utils/bookmarkUtils.ts +++ b/packages/shared/utils/bookmarkUtils.ts @@ -28,9 +28,13 @@ export function getBookmarkLinkImageUrl(bookmark: ZBookmarkedLink) { } export function isBookmarkStillCrawling(bookmark: ZBookmark) { - return ( - bookmark.content.type == BookmarkTypes.LINK && !bookmark.content.crawledAt - ); + if (bookmark.content.type != BookmarkTypes.LINK) { + return false; + } + if (bookmark.content.crawlStatus) { + return bookmark.content.crawlStatus === "pending"; + } + return !bookmark.content.crawledAt; } export function isBookmarkStillTagging(bookmark: ZBookmark) { diff --git a/packages/shared/utils/redirectUrl.test.ts b/packages/shared/utils/redirectUrl.test.ts new file mode 100644 index 00000000..97d52cf2 --- /dev/null +++ b/packages/shared/utils/redirectUrl.test.ts @@ -0,0 +1,89 @@ +import { describe, expect, it } from "vitest"; + +import { isMobileAppRedirect, validateRedirectUrl } from "./redirectUrl"; + +describe("validateRedirectUrl", () => { + it("should return undefined for null input", () => { + expect(validateRedirectUrl(null)).toBe(undefined); + }); + + it("should return undefined for undefined input", () => { + expect(validateRedirectUrl(undefined)).toBe(undefined); + }); + + it("should return undefined for empty string", () => { + expect(validateRedirectUrl("")).toBe(undefined); + }); + + it("should allow relative paths starting with '/'", () => { + expect(validateRedirectUrl("/")).toBe("/"); + expect(validateRedirectUrl("/dashboard")).toBe("/dashboard"); + expect(validateRedirectUrl("/settings/profile")).toBe("/settings/profile"); + expect(validateRedirectUrl("/path?query=value")).toBe("/path?query=value"); + expect(validateRedirectUrl("/path#hash")).toBe("/path#hash"); + }); + + it("should reject protocol-relative URLs (//)", () => { + expect(validateRedirectUrl("//evil.com")).toBe(undefined); + expect(validateRedirectUrl("//evil.com/path")).toBe(undefined); + }); + + it("should allow karakeep:// scheme for mobile app", () => { + expect(validateRedirectUrl("karakeep://")).toBe("karakeep://"); + expect(validateRedirectUrl("karakeep://callback")).toBe( + "karakeep://callback", + ); + expect(validateRedirectUrl("karakeep://callback/path")).toBe( + "karakeep://callback/path", + ); + expect(validateRedirectUrl("karakeep://callback?param=value")).toBe( + "karakeep://callback?param=value", + ); + }); + + it("should reject http:// scheme", () => { + expect(validateRedirectUrl("http://example.com")).toBe(undefined); + expect(validateRedirectUrl("http://localhost:3000")).toBe(undefined); + }); + + it("should reject https:// scheme", () => { + expect(validateRedirectUrl("https://example.com")).toBe(undefined); + expect(validateRedirectUrl("https://evil.com/phishing")).toBe(undefined); + }); + + it("should reject javascript: scheme", () => { + expect(validateRedirectUrl("javascript:alert(1)")).toBe(undefined); + }); + + it("should reject data: scheme", () => { + expect( + validateRedirectUrl("data:text/html,<script>alert(1)</script>"), + ).toBe(undefined); + }); + + it("should reject other custom schemes", () => { + expect(validateRedirectUrl("file:///etc/passwd")).toBe(undefined); + expect(validateRedirectUrl("ftp://example.com")).toBe(undefined); + expect(validateRedirectUrl("mailto:test@example.com")).toBe(undefined); + }); + + it("should reject paths not starting with /", () => { + expect(validateRedirectUrl("dashboard")).toBe(undefined); + expect(validateRedirectUrl("path/to/page")).toBe(undefined); + }); +}); + +describe("isMobileAppRedirect", () => { + it("should return true for karakeep:// URLs", () => { + expect(isMobileAppRedirect("karakeep://")).toBe(true); + expect(isMobileAppRedirect("karakeep://callback")).toBe(true); + expect(isMobileAppRedirect("karakeep://callback/path")).toBe(true); + }); + + it("should return false for other URLs", () => { + expect(isMobileAppRedirect("/")).toBe(false); + expect(isMobileAppRedirect("/dashboard")).toBe(false); + expect(isMobileAppRedirect("https://example.com")).toBe(false); + expect(isMobileAppRedirect("http://localhost")).toBe(false); + }); +}); diff --git a/packages/shared/utils/redirectUrl.ts b/packages/shared/utils/redirectUrl.ts new file mode 100644 index 00000000..c2adffc0 --- /dev/null +++ b/packages/shared/utils/redirectUrl.ts @@ -0,0 +1,35 @@ +/** + * Validates a redirect URL to prevent open redirect attacks. + * Only allows: + * - Relative paths starting with "/" (but not "//" to prevent protocol-relative URLs) + * - The karakeep:// scheme for the mobile app + * + * @returns The validated URL if valid, otherwise undefined. + */ +export function validateRedirectUrl( + url: string | null | undefined, +): string | undefined { + if (!url) { + return undefined; + } + + // Allow relative paths starting with "/" but not "//" (protocol-relative URLs) + if (url.startsWith("/") && !url.startsWith("//")) { + return url; + } + + // Allow karakeep:// scheme for mobile app deep links + if (url.startsWith("karakeep://")) { + return url; + } + + // Reject all other schemes (http, https, javascript, data, etc.) + return undefined; +} + +/** + * Checks if the redirect URL is a mobile app deep link. + */ +export function isMobileAppRedirect(url: string): boolean { + return url.startsWith("karakeep://"); +} diff --git a/packages/shared/utils/tag.ts b/packages/shared/utils/tag.ts index 8e1bd105..b69b817e 100644 --- a/packages/shared/utils/tag.ts +++ b/packages/shared/utils/tag.ts @@ -1,6 +1,37 @@ +import type { ZTagStyle } from "../types/users"; + /** * Ensures exactly ONE leading # */ export function normalizeTagName(raw: string): string { return raw.trim().replace(/^#+/, ""); // strip every leading # } + +export type TagStyle = ZTagStyle; + +export function getTagStylePrompt(style: TagStyle): string { + switch (style) { + case "lowercase-hyphens": + return "- Use lowercase letters with hyphens between words (e.g., 'machine-learning', 'web-development')"; + case "lowercase-spaces": + return "- Use lowercase letters with spaces between words (e.g., 'machine learning', 'web development')"; + case "lowercase-underscores": + return "- Use lowercase letters with underscores between words (e.g., 'machine_learning', 'web_development')"; + case "titlecase-spaces": + return "- Use title case with spaces between words (e.g., 'Machine Learning', 'Web Development')"; + case "titlecase-hyphens": + return "- Use title case with hyphens between words (e.g., 'Machine-Learning', 'Web-Development')"; + case "camelCase": + return "- Use camelCase format (e.g., 'machineLearning', 'webDevelopment')"; + case "as-generated": + default: + return ""; + } +} + +export function getCuratedTagsPrompt(curatedTags?: string[]): string { + if (curatedTags && curatedTags.length > 0) { + return `- ONLY use tags from this predefined list: [${curatedTags.join(", ")}]. Do not create any new tags outside this list. If no tags fit, don't emit any.`; + } + return ""; +} |
