aboutsummaryrefslogtreecommitdiffstats
path: root/packages/shared
diff options
context:
space:
mode:
Diffstat (limited to 'packages/shared')
-rw-r--r--packages/shared/config.ts32
-rw-r--r--packages/shared/import-export/importer.test.ts454
-rw-r--r--packages/shared/import-export/importer.ts123
-rw-r--r--packages/shared/import-export/parsers.test.ts301
-rw-r--r--packages/shared/import-export/parsers.ts190
-rw-r--r--packages/shared/index.ts1
-rw-r--r--packages/shared/inference.ts126
-rw-r--r--packages/shared/logger.ts12
-rw-r--r--packages/shared/prompts.server.ts88
-rw-r--r--packages/shared/prompts.ts121
-rw-r--r--packages/shared/queueing.ts16
-rw-r--r--packages/shared/search.ts15
-rw-r--r--packages/shared/searchQueryParser.test.ts169
-rw-r--r--packages/shared/searchQueryParser.ts34
-rw-r--r--packages/shared/types/bookmarks.ts8
-rw-r--r--packages/shared/types/config.ts29
-rw-r--r--packages/shared/types/importSessions.ts6
-rw-r--r--packages/shared/types/readers.ts59
-rw-r--r--packages/shared/types/rules.ts33
-rw-r--r--packages/shared/types/search.ts10
-rw-r--r--packages/shared/types/tags.ts1
-rw-r--r--packages/shared/types/users.ts100
-rw-r--r--packages/shared/utils/bookmarkUtils.ts10
-rw-r--r--packages/shared/utils/redirectUrl.test.ts89
-rw-r--r--packages/shared/utils/redirectUrl.ts35
-rw-r--r--packages/shared/utils/tag.ts31
26 files changed, 1705 insertions, 388 deletions
diff --git a/packages/shared/config.ts b/packages/shared/config.ts
index 60beae1e..cfcf1532 100644
--- a/packages/shared/config.ts
+++ b/packages/shared/config.ts
@@ -58,6 +58,8 @@ const allEnv = z.object({
TURNSTILE_SECRET_KEY: z.string().optional(),
OPENAI_API_KEY: z.string().optional(),
OPENAI_BASE_URL: z.string().url().optional(),
+ OPENAI_PROXY_URL: z.string().url().optional(),
+ OPENAI_SERVICE_TIER: z.enum(["auto", "default", "flex"]).optional(),
OLLAMA_BASE_URL: z.string().url().optional(),
OLLAMA_KEEP_ALIVE: z.string().optional(),
INFERENCE_JOB_TIMEOUT_SEC: z.coerce.number().default(30),
@@ -80,6 +82,7 @@ const allEnv = z.object({
.default("eng")
.transform((val) => val.split(",")),
OCR_CONFIDENCE_THRESHOLD: z.coerce.number().default(50),
+ OCR_USE_LLM: stringBool("false"),
CRAWLER_HEADLESS_BROWSER: stringBool("true"),
BROWSER_WEB_URL: z.string().optional(),
BROWSER_WEBSOCKET_URL: z.string().optional(),
@@ -93,10 +96,12 @@ const allEnv = z.object({
SEARCH_JOB_TIMEOUT_SEC: z.coerce.number().default(30),
WEBHOOK_NUM_WORKERS: z.coerce.number().default(1),
ASSET_PREPROCESSING_NUM_WORKERS: z.coerce.number().default(1),
+ ASSET_PREPROCESSING_JOB_TIMEOUT_SEC: z.coerce.number().default(60),
RULE_ENGINE_NUM_WORKERS: z.coerce.number().default(1),
CRAWLER_DOWNLOAD_BANNER_IMAGE: stringBool("true"),
CRAWLER_STORE_SCREENSHOT: stringBool("true"),
CRAWLER_FULL_PAGE_SCREENSHOT: stringBool("false"),
+ CRAWLER_STORE_PDF: stringBool("false"),
CRAWLER_FULL_PAGE_ARCHIVE: stringBool("false"),
CRAWLER_VIDEO_DOWNLOAD: stringBool("false"),
CRAWLER_VIDEO_DOWNLOAD_MAX_SIZE: z.coerce.number().default(50),
@@ -122,8 +127,11 @@ const allEnv = z.object({
INFERENCE_LANG: z.string().default("english"),
WEBHOOK_TIMEOUT_SEC: z.coerce.number().default(5),
WEBHOOK_RETRY_TIMES: z.coerce.number().int().min(0).default(3),
+ MAX_RSS_FEEDS_PER_USER: z.coerce.number().default(1000),
+ MAX_WEBHOOKS_PER_USER: z.coerce.number().default(100),
// Build only flag
SERVER_VERSION: z.string().optional(),
+ CHANGELOG_VERSION: z.string().optional(),
DISABLE_NEW_RELEASE_CHECK: stringBool("false"),
// A flag to detect if the user is running in the old separete containers setup
@@ -205,6 +213,12 @@ const allEnv = z.object({
// Database configuration
DB_WAL_MODE: stringBool("false"),
+
+ // OpenTelemetry tracing configuration
+ OTEL_TRACING_ENABLED: stringBool("false"),
+ OTEL_EXPORTER_OTLP_ENDPOINT: z.string().url().optional(),
+ OTEL_SERVICE_NAME: z.string().default("karakeep"),
+ OTEL_SAMPLE_RATE: z.coerce.number().min(0).max(1).default(1.0),
});
const serverConfigSchema = allEnv.transform((val, ctx) => {
@@ -264,6 +278,8 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
fetchTimeoutSec: val.INFERENCE_FETCH_TIMEOUT_SEC,
openAIApiKey: val.OPENAI_API_KEY,
openAIBaseUrl: val.OPENAI_BASE_URL,
+ openAIProxyUrl: val.OPENAI_PROXY_URL,
+ openAIServiceTier: val.OPENAI_SERVICE_TIER,
ollamaBaseUrl: val.OLLAMA_BASE_URL,
ollamaKeepAlive: val.OLLAMA_KEEP_ALIVE,
textModel: val.INFERENCE_TEXT_MODEL,
@@ -296,6 +312,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
downloadBannerImage: val.CRAWLER_DOWNLOAD_BANNER_IMAGE,
storeScreenshot: val.CRAWLER_STORE_SCREENSHOT,
fullPageScreenshot: val.CRAWLER_FULL_PAGE_SCREENSHOT,
+ storePdf: val.CRAWLER_STORE_PDF,
fullPageArchive: val.CRAWLER_FULL_PAGE_ARCHIVE,
downloadVideo: val.CRAWLER_VIDEO_DOWNLOAD,
maxVideoDownloadSize: val.CRAWLER_VIDEO_DOWNLOAD_MAX_SIZE,
@@ -321,6 +338,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
langs: val.OCR_LANGS,
cacheDir: val.OCR_CACHE_DIR,
confidenceThreshold: val.OCR_CONFIDENCE_THRESHOLD,
+ useLLM: val.OCR_USE_LLM,
},
search: {
numWorkers: val.SEARCH_NUM_WORKERS,
@@ -338,12 +356,17 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
assetsDir: val.ASSETS_DIR ?? path.join(val.DATA_DIR, "assets"),
maxAssetSizeMb: val.MAX_ASSET_SIZE_MB,
serverVersion: val.SERVER_VERSION,
+ changelogVersion: val.CHANGELOG_VERSION,
disableNewReleaseCheck: val.DISABLE_NEW_RELEASE_CHECK,
usingLegacySeparateContainers: val.USING_LEGACY_SEPARATE_CONTAINERS,
webhook: {
timeoutSec: val.WEBHOOK_TIMEOUT_SEC,
retryTimes: val.WEBHOOK_RETRY_TIMES,
numWorkers: val.WEBHOOK_NUM_WORKERS,
+ maxWebhooksPerUser: val.MAX_WEBHOOKS_PER_USER,
+ },
+ feeds: {
+ maxRssFeedsPerUser: val.MAX_RSS_FEEDS_PER_USER,
},
proxy: {
httpProxy: val.CRAWLER_HTTP_PROXY,
@@ -353,6 +376,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
allowedInternalHostnames: val.CRAWLER_ALLOWED_INTERNAL_HOSTNAMES,
assetPreprocessing: {
numWorkers: val.ASSET_PREPROCESSING_NUM_WORKERS,
+ jobTimeoutSec: val.ASSET_PREPROCESSING_JOB_TIMEOUT_SEC,
},
ruleEngine: {
numWorkers: val.RULE_ENGINE_NUM_WORKERS,
@@ -399,6 +423,12 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
database: {
walMode: val.DB_WAL_MODE,
},
+ tracing: {
+ enabled: val.OTEL_TRACING_ENABLED,
+ otlpEndpoint: val.OTEL_EXPORTER_OTLP_ENDPOINT,
+ serviceName: val.OTEL_SERVICE_NAME,
+ sampleRate: val.OTEL_SAMPLE_RATE,
+ },
};
if (obj.auth.emailVerificationRequired && !obj.email.smtp) {
ctx.addIssue({
@@ -441,6 +471,8 @@ export const clientConfig = {
inference: {
isConfigured: serverConfig.inference.isConfigured,
inferredTagLang: serverConfig.inference.inferredTagLang,
+ enableAutoTagging: serverConfig.inference.enableAutoTagging,
+ enableAutoSummarization: serverConfig.inference.enableAutoSummarization,
},
serverVersion: serverConfig.serverVersion,
disableNewReleaseCheck: serverConfig.disableNewReleaseCheck,
diff --git a/packages/shared/import-export/importer.test.ts b/packages/shared/import-export/importer.test.ts
index 48cd1204..f097f8d5 100644
--- a/packages/shared/import-export/importer.test.ts
+++ b/packages/shared/import-export/importer.test.ts
@@ -1,13 +1,14 @@
import { describe, expect, it, vi } from "vitest";
-import { importBookmarksFromFile, ParsedBookmark } from ".";
+import type { StagedBookmark } from ".";
+import { importBookmarksFromFile } from ".";
const fakeFile = {
text: vi.fn().mockResolvedValue("fake file content"),
} as unknown as File;
describe("importBookmarksFromFile", () => {
- it("creates root list, folders and imports bookmarks with progress", async () => {
+ it("creates root list, folders and stages bookmarks with progress", async () => {
const parsers = {
pocket: vi.fn().mockReturnValue([
{
@@ -61,32 +62,23 @@ describe("importBookmarksFromFile", () => {
},
);
- const createdBookmarks: ParsedBookmark[] = [];
- const addedToLists: { bookmarkId: string; listIds: string[] }[] = [];
- const updatedTags: { bookmarkId: string; tags: string[] }[] = [];
-
- const createBookmark = vi.fn(async (bookmark: ParsedBookmark) => {
- createdBookmarks.push(bookmark);
- return {
- id: `bookmark-${createdBookmarks.length}`,
- alreadyExists: false,
- };
- });
-
- const addBookmarkToLists = vi.fn(
- async (input: { bookmarkId: string; listIds: string[] }) => {
- addedToLists.push(input);
+ const stagedBookmarks: StagedBookmark[] = [];
+ const stageImportedBookmarks = vi.fn(
+ async (input: {
+ importSessionId: string;
+ bookmarks: StagedBookmark[];
+ }) => {
+ stagedBookmarks.push(...input.bookmarks);
},
);
- const updateBookmarkTags = vi.fn(
- async (input: { bookmarkId: string; tags: string[] }) => {
- updatedTags.push(input);
- },
+ const finalizeImportStaging = vi.fn();
+ const createImportSession = vi.fn(
+ async (_input: { name: string; rootListId: string }) => ({
+ id: "session-1",
+ }),
);
- const createImportSession = vi.fn(async () => ({ id: "session-1" }));
-
const progress: number[] = [];
const res = await importBookmarksFromFile(
{
@@ -95,9 +87,8 @@ describe("importBookmarksFromFile", () => {
rootListName: "Imported",
deps: {
createList,
- createBookmark,
- addBookmarkToLists,
- updateBookmarkTags,
+ stageImportedBookmarks,
+ finalizeImportStaging,
createImportSession,
},
onProgress: (d, t) => progress.push(d / t),
@@ -106,12 +97,14 @@ describe("importBookmarksFromFile", () => {
);
expect(res.rootListId).toBe("Imported");
+ expect(res.importSessionId).toBe("session-1");
expect(res.counts).toEqual({
- successes: 5,
+ successes: 0,
failures: 0,
alreadyExisted: 0,
total: 5, // Using custom parser, no deduplication
});
+
// Root + all unique folders from paths
expect(createdLists).toEqual([
{ name: "Imported", icon: "ā¬†ļø" },
@@ -122,38 +115,43 @@ describe("importBookmarksFromFile", () => {
{ name: "Tech", parentId: "Imported/Reading", icon: "šŸ“" },
{ name: "Duplicates", parentId: "Imported/Development", icon: "šŸ“" },
]);
- // Verify we have 5 created bookmarks (no deduplication with custom parser)
- expect(createdBookmarks).toHaveLength(5);
- // Verify GitHub bookmark exists (will be two separate bookmarks since no deduplication)
- const githubBookmarks = createdBookmarks.filter(
- (bookmark) =>
- bookmark.content?.type === "link" &&
- bookmark.content.url === "https://github.com/example/repo",
- );
- expect(githubBookmarks).toHaveLength(2);
- // Verify text bookmark exists
- const textBookmark = createdBookmarks.find(
- (bookmark) => bookmark.content?.type === "text",
+
+ // Verify 5 bookmarks were staged (in 1 batch since < 50)
+ expect(stagedBookmarks).toHaveLength(5);
+ expect(stageImportedBookmarks).toHaveBeenCalledTimes(1);
+
+ // Verify GitHub link bookmark was staged correctly
+ const githubBookmark = stagedBookmarks.find(
+ (b) => b.url === "https://github.com/example/repo" && b.type === "link",
);
+ expect(githubBookmark).toBeDefined();
+ if (!githubBookmark) {
+ throw new Error("Expected GitHub bookmark to be staged");
+ }
+ expect(githubBookmark.title).toBe("GitHub Repository");
+ expect(githubBookmark.tags).toEqual(["dev", "github"]);
+ expect(githubBookmark.listIds).toEqual(["Imported/Development/Projects"]);
+
+ // Verify text bookmark was staged correctly
+ const textBookmark = stagedBookmarks.find((b) => b.type === "text");
expect(textBookmark).toBeDefined();
- expect(textBookmark!.archived).toBe(true);
- expect(textBookmark!.notes).toBe("Additional context");
- // Verify bookmark with no path goes to root
- const noCategoryBookmark = createdBookmarks.find(
- (bookmark) =>
- bookmark.content?.type === "link" &&
- bookmark.content.url === "https://example.com/misc",
+ if (!textBookmark) {
+ throw new Error("Expected text bookmark to be staged");
+ }
+ expect(textBookmark.content).toBe("Important notes about the project");
+ expect(textBookmark.note).toBe("Additional context");
+ expect(textBookmark.listIds).toEqual(["Imported/Personal"]);
+
+ // Verify bookmark with empty paths gets root list ID
+ const noCategoryBookmark = stagedBookmarks.find(
+ (b) => b.url === "https://example.com/misc",
);
expect(noCategoryBookmark).toBeDefined();
- // Find the corresponding list assignment for this bookmark
- const noCategoryBookmarkId = `bookmark-${createdBookmarks.indexOf(noCategoryBookmark!) + 1}`;
- const listAssignment = addedToLists.find(
- (a) => a.bookmarkId === noCategoryBookmarkId,
- );
- expect(listAssignment!.listIds).toEqual(["Imported"]);
+ expect(noCategoryBookmark!.listIds).toEqual(["Imported"]);
+
+ // Verify finalizeImportStaging was called
+ expect(finalizeImportStaging).toHaveBeenCalledWith("session-1");
- // Verify that tags were updated for bookmarks that have tags
- expect(updatedTags.length).toBeGreaterThan(0);
expect(progress).toContain(0);
expect(progress.at(-1)).toBe(1);
});
@@ -167,9 +165,8 @@ describe("importBookmarksFromFile", () => {
rootListName: "Imported",
deps: {
createList: vi.fn(),
- createBookmark: vi.fn(),
- addBookmarkToLists: vi.fn(),
- updateBookmarkTags: vi.fn(),
+ stageImportedBookmarks: vi.fn(),
+ finalizeImportStaging: vi.fn(),
createImportSession: vi.fn(async () => ({ id: "session-1" })),
},
},
@@ -182,29 +179,29 @@ describe("importBookmarksFromFile", () => {
});
});
- it("continues import when individual bookmarks fail", async () => {
+ it("stages all bookmarks successfully", async () => {
const parsers = {
pocket: vi.fn().mockReturnValue([
{
- title: "Success Bookmark 1",
- content: { type: "link", url: "https://example.com/success1" },
- tags: ["success"],
+ title: "Bookmark 1",
+ content: { type: "link", url: "https://example.com/1" },
+ tags: ["tag1"],
addDate: 100,
- paths: [["Success"]],
+ paths: [["Category1"]],
},
{
- title: "Failure Bookmark",
- content: { type: "link", url: "https://example.com/failure" },
- tags: ["failure"],
+ title: "Bookmark 2",
+ content: { type: "link", url: "https://example.com/2" },
+ tags: ["tag2"],
addDate: 200,
- paths: [["Failure"]],
+ paths: [["Category2"]],
},
{
- title: "Success Bookmark 2",
- content: { type: "link", url: "https://example.com/success2" },
- tags: ["success"],
+ title: "Bookmark 3",
+ content: { type: "link", url: "https://example.com/3" },
+ tags: ["tag3"],
addDate: 300,
- paths: [["Success"]],
+ paths: [["Category1"]],
},
]),
};
@@ -220,37 +217,23 @@ describe("importBookmarksFromFile", () => {
},
);
- const createdBookmarks: ParsedBookmark[] = [];
- const addedToLists: { bookmarkId: string; listIds: string[] }[] = [];
- const updatedTags: { bookmarkId: string; tags: string[] }[] = [];
-
- const createBookmark = vi.fn(async (bookmark: ParsedBookmark) => {
- // Simulate failure for the "Failure Bookmark"
- if (bookmark.title === "Failure Bookmark") {
- throw new Error("Simulated bookmark creation failure");
- }
-
- createdBookmarks.push(bookmark);
- return {
- id: `bookmark-${createdBookmarks.length}`,
- alreadyExists: false,
- };
- });
-
- const addBookmarkToLists = vi.fn(
- async (input: { bookmarkId: string; listIds: string[] }) => {
- addedToLists.push(input);
+ const stagedBookmarks: StagedBookmark[] = [];
+ const stageImportedBookmarks = vi.fn(
+ async (input: {
+ importSessionId: string;
+ bookmarks: StagedBookmark[];
+ }) => {
+ stagedBookmarks.push(...input.bookmarks);
},
);
- const updateBookmarkTags = vi.fn(
- async (input: { bookmarkId: string; tags: string[] }) => {
- updatedTags.push(input);
- },
+ const finalizeImportStaging = vi.fn();
+ const createImportSession = vi.fn(
+ async (_input: { name: string; rootListId: string }) => ({
+ id: "session-1",
+ }),
);
- const createImportSession = vi.fn(async () => ({ id: "session-1" }));
-
const progress: number[] = [];
const res = await importBookmarksFromFile(
{
@@ -259,9 +242,8 @@ describe("importBookmarksFromFile", () => {
rootListName: "Imported",
deps: {
createList,
- createBookmark,
- addBookmarkToLists,
- updateBookmarkTags,
+ stageImportedBookmarks,
+ finalizeImportStaging,
createImportSession,
},
onProgress: (d, t) => progress.push(d / t),
@@ -269,63 +251,57 @@ describe("importBookmarksFromFile", () => {
{ parsers },
);
- // Should still create the root list
expect(res.rootListId).toBe("Imported");
-
- // Should track both successes and failures
+ expect(res.importSessionId).toBe("session-1");
expect(res.counts).toEqual({
- successes: 2, // Two successful bookmarks
- failures: 1, // One failed bookmark
+ successes: 0,
+ failures: 0,
alreadyExisted: 0,
total: 3,
});
- // Should create folders for all bookmarks (including failed ones)
+ // Should create folders for all bookmarks
expect(createdLists).toEqual([
{ name: "Imported", icon: "ā¬†ļø" },
- { name: "Success", parentId: "Imported", icon: "šŸ“" },
- { name: "Failure", parentId: "Imported", icon: "šŸ“" },
+ { name: "Category1", parentId: "Imported", icon: "šŸ“" },
+ { name: "Category2", parentId: "Imported", icon: "šŸ“" },
]);
- // Only successful bookmarks should be created
- expect(createdBookmarks).toHaveLength(2);
- expect(createdBookmarks.map((b) => b.title)).toEqual([
- "Success Bookmark 1",
- "Success Bookmark 2",
- ]);
+ // All bookmarks should be staged (in 1 batch since < 50)
+ expect(stagedBookmarks).toHaveLength(3);
+ expect(stageImportedBookmarks).toHaveBeenCalledTimes(1);
- // Only successful bookmarks should be added to lists and have tags updated
- expect(addedToLists).toHaveLength(2);
- expect(updatedTags).toHaveLength(2);
+ // Verify finalizeImportStaging was called
+ expect(finalizeImportStaging).toHaveBeenCalledWith("session-1");
- // Progress should complete even with failures
+ // Progress should complete
expect(progress).toContain(0);
expect(progress.at(-1)).toBe(1);
});
- it("handles failures in different stages of bookmark import", async () => {
+ it("stages bookmarks with different paths", async () => {
const parsers = {
pocket: vi.fn().mockReturnValue([
{
- title: "Success Bookmark",
- content: { type: "link", url: "https://example.com/success" },
- tags: ["success"],
+ title: "Bookmark 1",
+ content: { type: "link", url: "https://example.com/1" },
+ tags: ["tag1"],
addDate: 100,
- paths: [["Success"]],
+ paths: [["Path1"]],
},
{
- title: "Fail at List Assignment",
- content: { type: "link", url: "https://example.com/fail-list" },
- tags: ["fail"],
+ title: "Bookmark 2",
+ content: { type: "link", url: "https://example.com/2" },
+ tags: ["tag2"],
addDate: 200,
- paths: [["Failure"]],
+ paths: [["Path2"]],
},
{
- title: "Fail at Tag Update",
- content: { type: "link", url: "https://example.com/fail-tag" },
- tags: ["fail-tag"],
+ title: "Bookmark 3",
+ content: { type: "link", url: "https://example.com/3" },
+ tags: ["tag3"],
addDate: 300,
- paths: [["Failure"]],
+ paths: [["Path2"]],
},
]),
};
@@ -338,31 +314,23 @@ describe("importBookmarksFromFile", () => {
},
);
- let bookmarkIdCounter = 1;
- const createBookmark = vi.fn(async () => {
- return { id: `bookmark-${bookmarkIdCounter++}`, alreadyExists: false };
- });
-
- const addBookmarkToLists = vi.fn(
- async (input: { bookmarkId: string; listIds: string[] }) => {
- // Simulate failure for specific bookmark
- if (input.bookmarkId === "bookmark-2") {
- throw new Error("Failed to add bookmark to lists");
- }
+ const stagedBookmarks: StagedBookmark[] = [];
+ const stageImportedBookmarks = vi.fn(
+ async (input: {
+ importSessionId: string;
+ bookmarks: StagedBookmark[];
+ }) => {
+ stagedBookmarks.push(...input.bookmarks);
},
);
- const updateBookmarkTags = vi.fn(
- async (input: { bookmarkId: string; tags: string[] }) => {
- // Simulate failure for specific bookmark
- if (input.bookmarkId === "bookmark-3") {
- throw new Error("Failed to update bookmark tags");
- }
- },
+ const finalizeImportStaging = vi.fn();
+ const createImportSession = vi.fn(
+ async (_input: { name: string; rootListId: string }) => ({
+ id: "session-1",
+ }),
);
- const createImportSession = vi.fn(async () => ({ id: "session-1" }));
-
const progress: number[] = [];
const res = await importBookmarksFromFile(
{
@@ -371,9 +339,8 @@ describe("importBookmarksFromFile", () => {
rootListName: "Imported",
deps: {
createList,
- createBookmark,
- addBookmarkToLists,
- updateBookmarkTags,
+ stageImportedBookmarks,
+ finalizeImportStaging,
createImportSession,
},
onProgress: (d, t) => progress.push(d / t),
@@ -383,23 +350,110 @@ describe("importBookmarksFromFile", () => {
expect(res.rootListId).toBe("Imported");
expect(res.importSessionId).toBe("session-1");
-
- // All bookmarks are created successfully, but 2 fail in post-processing
expect(res.counts).toEqual({
- successes: 1, // Only one fully successful bookmark
- failures: 2, // Two failed in post-processing steps
+ successes: 0,
+ failures: 0,
alreadyExisted: 0,
total: 3,
});
- // All bookmarks should be created (failures happen after bookmark creation)
- expect(createBookmark).toHaveBeenCalledTimes(3);
+ // All bookmarks should be staged (in 1 batch since < 50)
+ expect(stagedBookmarks).toHaveLength(3);
+ expect(stageImportedBookmarks).toHaveBeenCalledTimes(1);
+
+ // Verify finalizeImportStaging was called
+ expect(finalizeImportStaging).toHaveBeenCalledWith("session-1");
+ });
+
+ it("handles HTML bookmarks with empty folder names", async () => {
+ const htmlContent = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3 ADD_DATE="1765995928" LAST_MODIFIED="1765995928">Bluetooth Fernbedienung</H3>
+ <DL><p>
+ <DT><H3 ADD_DATE="1765995928" LAST_MODIFIED="0"></H3>
+ <DL><p>
+ <DT><A HREF="https://www.example.com/product.html" ADD_DATE="1593444456">Example Product</A>
+ </DL><p>
+ </DL><p>
+</DL><p>`;
+
+ const mockFile = {
+ text: vi.fn().mockResolvedValue(htmlContent),
+ } as unknown as File;
+
+ const createdLists: { name: string; icon: string; parentId?: string }[] =
+ [];
+ const createList = vi.fn(
+ async (input: { name: string; icon: string; parentId?: string }) => {
+ createdLists.push(input);
+ return {
+ id: `${input.parentId ? input.parentId + "/" : ""}${input.name}`,
+ };
+ },
+ );
+
+ const stagedBookmarks: StagedBookmark[] = [];
+ const stageImportedBookmarks = vi.fn(
+ async (input: {
+ importSessionId: string;
+ bookmarks: StagedBookmark[];
+ }) => {
+ stagedBookmarks.push(...input.bookmarks);
+ },
+ );
+
+ const finalizeImportStaging = vi.fn();
+ const createImportSession = vi.fn(
+ async (_input: { name: string; rootListId: string }) => ({
+ id: "session-1",
+ }),
+ );
+
+ const res = await importBookmarksFromFile({
+ file: mockFile,
+ source: "html",
+ rootListName: "HTML Import",
+ deps: {
+ createList,
+ stageImportedBookmarks,
+ finalizeImportStaging,
+ createImportSession,
+ },
+ });
+
+ expect(res.counts).toEqual({
+ successes: 0,
+ failures: 0,
+ alreadyExisted: 0,
+ total: 1,
+ });
+
+ // Verify that the empty folder name was replaced with "Unnamed"
+ expect(createdLists).toEqual([
+ { name: "HTML Import", icon: "ā¬†ļø" },
+ { name: "Bluetooth Fernbedienung", parentId: "HTML Import", icon: "šŸ“" },
+ {
+ name: "Unnamed",
+ parentId: "HTML Import/Bluetooth Fernbedienung",
+ icon: "šŸ“",
+ },
+ ]);
- // addBookmarkToLists should be called 3 times (but one fails)
- expect(addBookmarkToLists).toHaveBeenCalledTimes(3);
+ // Verify the bookmark was staged with correct listIds
+ expect(stagedBookmarks).toHaveLength(1);
+ expect(stagedBookmarks[0]).toMatchObject({
+ title: "Example Product",
+ url: "https://www.example.com/product.html",
+ type: "link",
+ tags: [],
+ listIds: ["HTML Import/Bluetooth Fernbedienung/Unnamed"],
+ });
- // updateBookmarkTags should be called 2 times (once fails at list assignment, one fails at tag update)
- expect(updateBookmarkTags).toHaveBeenCalledTimes(2);
+ // Verify finalizeImportStaging was called
+ expect(finalizeImportStaging).toHaveBeenCalledWith("session-1");
});
it("parses mymind CSV export correctly", async () => {
@@ -413,14 +467,22 @@ describe("importBookmarksFromFile", () => {
text: vi.fn().mockResolvedValue(mymindCsv),
} as unknown as File;
- const createdBookmarks: ParsedBookmark[] = [];
- const createBookmark = vi.fn(async (bookmark: ParsedBookmark) => {
- createdBookmarks.push(bookmark);
- return {
- id: `bookmark-${createdBookmarks.length}`,
- alreadyExists: false,
- };
- });
+ const stagedBookmarks: StagedBookmark[] = [];
+ const stageImportedBookmarks = vi.fn(
+ async (input: {
+ importSessionId: string;
+ bookmarks: StagedBookmark[];
+ }) => {
+ stagedBookmarks.push(...input.bookmarks);
+ },
+ );
+
+ const finalizeImportStaging = vi.fn();
+ const createImportSession = vi.fn(
+ async (_input: { name: string; rootListId: string }) => ({
+ id: "session-1",
+ }),
+ );
const res = await importBookmarksFromFile({
file: mockFile,
@@ -432,52 +494,54 @@ describe("importBookmarksFromFile", () => {
id: `${input.parentId ? input.parentId + "/" : ""}${input.name}`,
}),
),
- createBookmark,
- addBookmarkToLists: vi.fn(),
- updateBookmarkTags: vi.fn(),
- createImportSession: vi.fn(async () => ({ id: "session-1" })),
+ stageImportedBookmarks,
+ finalizeImportStaging,
+ createImportSession,
},
});
expect(res.counts).toEqual({
- successes: 3,
+ successes: 0,
failures: 0,
alreadyExisted: 0,
total: 3,
});
- // Verify first bookmark (WebPage with URL)
- expect(createdBookmarks[0]).toMatchObject({
+ // Verify 3 bookmarks were staged
+ expect(stagedBookmarks).toHaveLength(3);
+
+ // Verify first bookmark (WebPage with URL) - mymind has no paths, so root list
+ expect(stagedBookmarks[0]).toMatchObject({
title: "mymind",
- content: {
- type: "link",
- url: "https://access.mymind.com/everything",
- },
+ url: "https://access.mymind.com/everything",
+ type: "link",
tags: ["Wellness", "Self-Improvement", "Psychology"],
+ listIds: ["mymind Import"],
});
- expect(createdBookmarks[0].addDate).toBeCloseTo(
- new Date("2024-12-04T23:02:10Z").getTime() / 1000,
+ expect(stagedBookmarks[0].sourceAddedAt).toEqual(
+ new Date("2024-12-04T23:02:10Z"),
);
// Verify second bookmark (WebPage with note)
- expect(createdBookmarks[1]).toMatchObject({
+ expect(stagedBookmarks[1]).toMatchObject({
title: "Movies / TV / Anime",
- content: {
- type: "link",
- url: "https://fmhy.pages.dev/videopiracyguide",
- },
+ url: "https://fmhy.pages.dev/videopiracyguide",
+ type: "link",
tags: ["Tools", "media", "Entertainment"],
- notes: "Free Media!",
+ note: "Free Media!",
+ listIds: ["mymind Import"],
});
// Verify third bookmark (Note with text content)
- expect(createdBookmarks[2]).toMatchObject({
+ expect(stagedBookmarks[2]).toMatchObject({
title: "",
- content: {
- type: "text",
- text: "• Critical Thinking\n• Empathy",
- },
+ content: "• Critical Thinking\n• Empathy",
+ type: "text",
tags: [],
+ listIds: ["mymind Import"],
});
+
+ // Verify finalizeImportStaging was called
+ expect(finalizeImportStaging).toHaveBeenCalledWith("session-1");
});
});
diff --git a/packages/shared/import-export/importer.ts b/packages/shared/import-export/importer.ts
index b32c49c1..be24ca73 100644
--- a/packages/shared/import-export/importer.ts
+++ b/packages/shared/import-export/importer.ts
@@ -1,4 +1,3 @@
-import { limitConcurrency } from "../concurrency";
import { MAX_LIST_NAME_LENGTH } from "../types/lists";
import { ImportSource, ParsedBookmark, parseImportFile } from "./parsers";
@@ -9,28 +8,32 @@ export interface ImportCounts {
total: number;
}
+export interface StagedBookmark {
+ type: "link" | "text" | "asset";
+ url?: string;
+ title?: string;
+ content?: string;
+ note?: string;
+ tags: string[];
+ listIds: string[];
+ sourceAddedAt?: Date;
+}
+
export interface ImportDeps {
createList: (input: {
name: string;
icon: string;
parentId?: string;
}) => Promise<{ id: string }>;
- createBookmark: (
- bookmark: ParsedBookmark,
- sessionId: string,
- ) => Promise<{ id: string; alreadyExists?: boolean }>;
- addBookmarkToLists: (input: {
- bookmarkId: string;
- listIds: string[];
- }) => Promise<void>;
- updateBookmarkTags: (input: {
- bookmarkId: string;
- tags: string[];
+ stageImportedBookmarks: (input: {
+ importSessionId: string;
+ bookmarks: StagedBookmark[];
}) => Promise<void>;
createImportSession: (input: {
name: string;
rootListId: string;
}) => Promise<{ id: string }>;
+ finalizeImportStaging: (sessionId: string) => Promise<void>;
}
export interface ImportOptions {
@@ -62,7 +65,7 @@ export async function importBookmarksFromFile(
},
options: ImportOptions = {},
): Promise<ImportResult> {
- const { concurrencyLimit = 20, parsers } = options;
+ const { parsers } = options;
const textContent = await file.text();
const parsedBookmarks = parsers?.[source]
@@ -120,50 +123,74 @@ export async function importBookmarksFromFile(
pathMap[pathKey] = folderList.id;
}
- let done = 0;
- const importPromises = parsedBookmarks.map((bookmark) => async () => {
- try {
- const listIds = bookmark.paths.map(
- (path) => pathMap[path.join(PATH_DELIMITER)] || rootList.id,
- );
- if (listIds.length === 0) listIds.push(rootList.id);
+ // Prepare all bookmarks for staging
+ const bookmarksToStage: StagedBookmark[] = parsedBookmarks.map((bookmark) => {
+ // Convert paths to list IDs using pathMap
+ // If no paths, assign to root list
+ const listIds =
+ bookmark.paths.length === 0
+ ? [rootList.id]
+ : bookmark.paths
+ .map((path) => {
+ if (path.length === 0) {
+ return rootList.id;
+ }
+ const pathKey = path.join(PATH_DELIMITER);
+ return pathMap[pathKey] || rootList.id;
+ })
+ .filter((id, index, arr) => arr.indexOf(id) === index); // dedupe
- const created = await deps.createBookmark(bookmark, session.id);
- await deps.addBookmarkToLists({ bookmarkId: created.id, listIds });
- if (bookmark.tags && bookmark.tags.length > 0) {
- await deps.updateBookmarkTags({
- bookmarkId: created.id,
- tags: bookmark.tags,
- });
- }
+ // Determine type and extract content appropriately
+ let type: "link" | "text" | "asset" = "link";
+ let url: string | undefined;
+ let textContent: string | undefined;
- return created;
- } finally {
- done += 1;
- onProgress?.(done, parsedBookmarks.length);
+ if (bookmark.content) {
+ if (bookmark.content.type === "link") {
+ type = "link";
+ url = bookmark.content.url;
+ } else if (bookmark.content.type === "text") {
+ type = "text";
+ textContent = bookmark.content.text;
+ }
}
- });
- const resultsPromises = limitConcurrency(importPromises, concurrencyLimit);
- const results = await Promise.allSettled(resultsPromises);
+ return {
+ type,
+ url,
+ title: bookmark.title,
+ content: textContent,
+ note: bookmark.notes,
+ tags: bookmark.tags ?? [],
+ listIds,
+ sourceAddedAt: bookmark.addDate
+ ? new Date(bookmark.addDate * 1000)
+ : undefined,
+ };
+ });
- let successes = 0;
- let failures = 0;
- let alreadyExisted = 0;
+ // Stage bookmarks in batches of 50
+ const BATCH_SIZE = 50;
+ let staged = 0;
- for (const r of results) {
- if (r.status === "fulfilled") {
- if (r.value.alreadyExists) alreadyExisted++;
- else successes++;
- } else {
- failures++;
- }
+ for (let i = 0; i < bookmarksToStage.length; i += BATCH_SIZE) {
+ const batch = bookmarksToStage.slice(i, i + BATCH_SIZE);
+ await deps.stageImportedBookmarks({
+ importSessionId: session.id,
+ bookmarks: batch,
+ });
+ staged += batch.length;
+ onProgress?.(staged, parsedBookmarks.length);
}
+
+ // Finalize staging - marks session as "pending" for worker pickup
+ await deps.finalizeImportStaging(session.id);
+
return {
counts: {
- successes,
- failures,
- alreadyExisted,
+ successes: 0,
+ failures: 0,
+ alreadyExisted: 0,
total: parsedBookmarks.length,
},
rootListId: rootList.id,
diff --git a/packages/shared/import-export/parsers.test.ts b/packages/shared/import-export/parsers.test.ts
new file mode 100644
index 00000000..18502305
--- /dev/null
+++ b/packages/shared/import-export/parsers.test.ts
@@ -0,0 +1,301 @@
+import { describe, expect, it } from "vitest";
+
+import { parseImportFile } from "./parsers";
+
+describe("parseNetscapeBookmarkFile", () => {
+ it("parses a simple bookmark file with single bookmark", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567890">Example Site</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0]).toMatchObject({
+ title: "Example Site",
+ content: {
+ type: "link",
+ url: "https://example.com",
+ },
+ tags: [],
+ addDate: 1234567890,
+ paths: [[]],
+ });
+ });
+
+ it("parses bookmarks with tags", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567890" TAGS="tag1,tag2,tag3">Example Site</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].tags).toEqual(["tag1", "tag2", "tag3"]);
+ });
+
+ it("parses bookmarks in nested folders", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3 ADD_DATE="1234567890" LAST_MODIFIED="1234567891">Folder1</H3>
+ <DL><p>
+ <DT><H3 ADD_DATE="1234567892" LAST_MODIFIED="1234567893">Folder2</H3>
+ <DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567894">Nested Bookmark</A>
+ </DL><p>
+ </DL><p>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0]).toMatchObject({
+ title: "Nested Bookmark",
+ content: {
+ type: "link",
+ url: "https://example.com",
+ },
+ paths: [["Folder1", "Folder2"]],
+ });
+ });
+
+ it("handles empty folder names by replacing with 'Unnamed'", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3 ADD_DATE="1234567890" LAST_MODIFIED="1234567891">Named Folder</H3>
+ <DL><p>
+ <DT><H3 ADD_DATE="1234567892" LAST_MODIFIED="0"></H3>
+ <DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567894">Bookmark</A>
+ </DL><p>
+ </DL><p>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].paths).toEqual([["Named Folder", "Unnamed"]]);
+ });
+
+ it("parses multiple bookmarks in different folders", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3 ADD_DATE="1234567890">Tech</H3>
+ <DL><p>
+ <DT><A HREF="https://github.com" ADD_DATE="1234567891">GitHub</A>
+ <DT><A HREF="https://stackoverflow.com" ADD_DATE="1234567892">Stack Overflow</A>
+ </DL><p>
+ <DT><H3 ADD_DATE="1234567893">News</H3>
+ <DL><p>
+ <DT><A HREF="https://news.ycombinator.com" ADD_DATE="1234567894">Hacker News</A>
+ </DL><p>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(3);
+
+ expect(result[0]).toMatchObject({
+ title: "GitHub",
+ content: { type: "link", url: "https://github.com" },
+ paths: [["Tech"]],
+ });
+
+ expect(result[1]).toMatchObject({
+ title: "Stack Overflow",
+ content: { type: "link", url: "https://stackoverflow.com" },
+ paths: [["Tech"]],
+ });
+
+ expect(result[2]).toMatchObject({
+ title: "Hacker News",
+ content: { type: "link", url: "https://news.ycombinator.com" },
+ paths: [["News"]],
+ });
+ });
+
+ it("parses bookmarks at root level (no folders)", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://example1.com" ADD_DATE="1234567890">Bookmark 1</A>
+ <DT><A HREF="https://example2.com" ADD_DATE="1234567891">Bookmark 2</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(2);
+ expect(result[0].paths).toEqual([[]]);
+ expect(result[1].paths).toEqual([[]]);
+ });
+
+ it("handles deeply nested folder structures", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3>Level1</H3>
+ <DL><p>
+ <DT><H3>Level2</H3>
+ <DL><p>
+ <DT><H3>Level3</H3>
+ <DL><p>
+ <DT><H3>Level4</H3>
+ <DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567890">Deep Bookmark</A>
+ </DL><p>
+ </DL><p>
+ </DL><p>
+ </DL><p>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].paths).toEqual([["Level1", "Level2", "Level3", "Level4"]]);
+ });
+
+ it("deduplicates bookmarks with the same URL", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3>Folder1</H3>
+ <DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567890" TAGS="tag1">First Instance</A>
+ </DL><p>
+ <DT><H3>Folder2</H3>
+ <DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567891" TAGS="tag2">Second Instance</A>
+ </DL><p>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0]).toMatchObject({
+ content: { type: "link", url: "https://example.com" },
+ tags: ["tag1", "tag2"],
+ addDate: 1234567890, // Should keep the earlier date
+ });
+ expect(result[0].paths).toHaveLength(2);
+ expect(result[0].paths).toContainEqual(["Folder1"]);
+ expect(result[0].paths).toContainEqual(["Folder2"]);
+ });
+
+ it("merges notes from duplicate bookmarks", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567890">Bookmark</A>
+ <DD>First note
+ <DT><A HREF="https://example.com" ADD_DATE="1234567891">Bookmark</A>
+ <DD>Second note
+</DL><p>`;
+
+ // Note: The current parser doesn't extract DD notes, but this test
+ // documents the expected behavior if/when DD parsing is added
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].content).toMatchObject({
+ type: "link",
+ url: "https://example.com",
+ });
+ });
+
+ it("handles bookmarks without ADD_DATE attribute", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://example.com">No Date Bookmark</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].addDate).toBeUndefined();
+ });
+
+ it("handles bookmarks without HREF attribute", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A ADD_DATE="1234567890">No URL Bookmark</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].content).toBeUndefined();
+ });
+
+ it("handles mixed structure with folders and root-level bookmarks", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://root1.com" ADD_DATE="1234567890">Root Bookmark 1</A>
+ <DT><H3>Folder</H3>
+ <DL><p>
+ <DT><A HREF="https://folder1.com" ADD_DATE="1234567891">Folder Bookmark</A>
+ </DL><p>
+ <DT><A HREF="https://root2.com" ADD_DATE="1234567892">Root Bookmark 2</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(3);
+ expect(result[0]).toMatchObject({
+ title: "Root Bookmark 1",
+ paths: [[]],
+ });
+ expect(result[1]).toMatchObject({
+ title: "Folder Bookmark",
+ paths: [["Folder"]],
+ });
+ expect(result[2]).toMatchObject({
+ title: "Root Bookmark 2",
+ paths: [[]],
+ });
+ });
+
+ it("throws error for non-Netscape bookmark files", () => {
+ const html = `<html>
+<head><title>Not a bookmark file</title></head>
+<body>Just a regular HTML file</body>
+</html>`;
+
+ expect(() => parseImportFile("html", html)).toThrow(
+ "The uploaded html file does not seem to be a bookmark file",
+ );
+ });
+});
diff --git a/packages/shared/import-export/parsers.ts b/packages/shared/import-export/parsers.ts
index f4d3f862..24d85c80 100644
--- a/packages/shared/import-export/parsers.ts
+++ b/packages/shared/import-export/parsers.ts
@@ -1,5 +1,6 @@
// Copied from https://gist.github.com/devster31/4e8c6548fd16ffb75c02e6f24e27f9b9
+import type { AnyNode } from "domhandler";
import * as cheerio from "cheerio";
import { parse } from "csv-parse/sync";
import { z } from "zod";
@@ -10,11 +11,13 @@ import { zExportSchema } from "./exporters";
export type ImportSource =
| "html"
| "pocket"
+ | "matter"
| "omnivore"
| "karakeep"
| "linkwarden"
| "tab-session-manager"
- | "mymind";
+ | "mymind"
+ | "instapaper";
export interface ParsedBookmark {
title: string;
@@ -34,41 +37,58 @@ function parseNetscapeBookmarkFile(textContent: string): ParsedBookmark[] {
}
const $ = cheerio.load(textContent);
+ const bookmarks: ParsedBookmark[] = [];
- return $("a")
- .map(function (_index, a) {
- const $a = $(a);
- const addDate = $a.attr("add_date");
- let tags: string[] = [];
+ // Recursively traverse the bookmark hierarchy top-down
+ function traverseFolder(
+ element: cheerio.Cheerio<AnyNode>,
+ currentPath: string[],
+ ) {
+ element.children().each((_index, child) => {
+ const $child = $(child);
- const tagsStr = $a.attr("tags");
- try {
- tags = tagsStr && tagsStr.length > 0 ? tagsStr.split(",") : [];
- } catch {
- /* empty */
- }
- const url = $a.attr("href");
+ // Check if this is a folder (DT with H3)
+ const h3 = $child.children("h3").first();
+ if (h3.length > 0) {
+ const folderName = h3.text().trim() || "Unnamed";
+ const newPath = [...currentPath, folderName];
+
+ // Find the DL that follows this folder and recurse into it
+ const dl = $child.children("dl").first();
+ if (dl.length > 0) {
+ traverseFolder(dl, newPath);
+ }
+ } else {
+ // Check if this is a bookmark (DT with A)
+ const anchor = $child.children("a").first();
+ if (anchor.length > 0) {
+ const addDate = anchor.attr("add_date");
+ const tagsStr = anchor.attr("tags");
+ const tags = tagsStr && tagsStr.length > 0 ? tagsStr.split(",") : [];
+ const url = anchor.attr("href");
- // Build folder path by traversing up the hierarchy
- const path: string[] = [];
- let current = $a.parent();
- while (current && current.length > 0) {
- const h3 = current.find("> h3").first();
- if (h3.length > 0) {
- path.unshift(h3.text());
+ bookmarks.push({
+ title: anchor.text(),
+ content: url
+ ? { type: BookmarkTypes.LINK as const, url }
+ : undefined,
+ tags,
+ addDate:
+ typeof addDate === "undefined" ? undefined : parseInt(addDate),
+ paths: [currentPath],
+ });
}
- current = current.parent();
}
+ });
+ }
- return {
- title: $a.text(),
- content: url ? { type: BookmarkTypes.LINK as const, url } : undefined,
- tags,
- addDate: typeof addDate === "undefined" ? undefined : parseInt(addDate),
- paths: [path],
- };
- })
- .get();
+ // Start traversal from the root DL element
+ const rootDl = $("dl").first();
+ if (rootDl.length > 0) {
+ traverseFolder(rootDl, []);
+ }
+
+ return bookmarks;
}
function parsePocketBookmarkFile(textContent: string): ParsedBookmark[] {
@@ -95,6 +115,52 @@ function parsePocketBookmarkFile(textContent: string): ParsedBookmark[] {
});
}
+function parseMatterBookmarkFile(textContent: string): ParsedBookmark[] {
+ const zMatterRecordSchema = z.object({
+ Title: z.string(),
+ Author: z.string(),
+ Publisher: z.string(),
+ URL: z.string(),
+ Tags: z
+ .string()
+ .transform((tags) => (tags.length > 0 ? tags.split(";") : [])),
+ "Word Count": z.string(),
+ "In Queue": z.string().transform((inQueue) => inQueue === "False"),
+ Favorited: z.string(),
+ Read: z.string(),
+ Highlight_Count: z.string(),
+ "Last Interaction Date": z
+ .string()
+ .transform((date) => Date.parse(date) / 1000),
+ "File Id": z.string(),
+ });
+
+ const zMatterExportSchema = z.array(zMatterRecordSchema);
+
+ const records = parse(textContent, {
+ columns: true,
+ skip_empty_lines: true,
+ });
+
+ const parsed = zMatterExportSchema.safeParse(records);
+ if (!parsed.success) {
+ throw new Error(
+ `The uploaded CSV file contains an invalid Matter bookmark file: ${parsed.error.toString()}`,
+ );
+ }
+
+ return parsed.data.map((record) => {
+ return {
+ title: record.Title,
+ content: { type: BookmarkTypes.LINK as const, url: record.URL },
+ tags: record.Tags,
+ addDate: record["Last Interaction Date"],
+ archived: record["In Queue"],
+ paths: [], // TODO
+ };
+ });
+}
+
function parseKarakeepBookmarkFile(textContent: string): ParsedBookmark[] {
const parsed = zExportSchema.safeParse(JSON.parse(textContent));
if (!parsed.success) {
@@ -292,6 +358,64 @@ function parseMymindBookmarkFile(textContent: string): ParsedBookmark[] {
});
}
+function parseInstapaperBookmarkFile(textContent: string): ParsedBookmark[] {
+ const zInstapaperRecordScheme = z.object({
+ URL: z.string(),
+ Title: z.string(),
+ Selection: z.string(),
+ Folder: z.string(),
+ Timestamp: z.string(),
+ Tags: z.string(),
+ });
+
+ const zInstapaperExportScheme = z.array(zInstapaperRecordScheme);
+
+ const record = parse(textContent, {
+ columns: true,
+ skip_empty_lines: true,
+ });
+
+ const parsed = zInstapaperExportScheme.safeParse(record);
+
+ if (!parsed.success) {
+ throw new Error(
+ `CSV file contains an invalid instapaper bookmark file: ${parsed.error.toString()}`,
+ );
+ }
+
+ return parsed.data.map((record) => {
+ let content: ParsedBookmark["content"];
+ if (record.URL && record.URL.trim().length > 0) {
+ content = { type: BookmarkTypes.LINK as const, url: record.URL.trim() };
+ } else if (record.Selection && record.Selection.trim().length > 0) {
+ content = {
+ type: BookmarkTypes.TEXT as const,
+ text: record.Selection.trim(),
+ };
+ }
+
+ const addDate = parseInt(record.Timestamp);
+
+ let tags: string[] = [];
+ try {
+ const parsedTags = JSON.parse(record.Tags);
+ if (Array.isArray(parsedTags)) {
+ tags = parsedTags.map((tag) => tag.toString().trim());
+ }
+ } catch {
+ tags = [];
+ }
+
+ return {
+ title: record.Title || "",
+ content,
+ addDate,
+ tags,
+ paths: [], // TODO
+ };
+ });
+}
+
function deduplicateBookmarks(bookmarks: ParsedBookmark[]): ParsedBookmark[] {
const deduplicatedBookmarksMap = new Map<string, ParsedBookmark>();
const textBookmarks: ParsedBookmark[] = [];
@@ -345,6 +469,9 @@ export function parseImportFile(
case "pocket":
result = parsePocketBookmarkFile(textContent);
break;
+ case "matter":
+ result = parseMatterBookmarkFile(textContent);
+ break;
case "karakeep":
result = parseKarakeepBookmarkFile(textContent);
break;
@@ -360,6 +487,9 @@ export function parseImportFile(
case "mymind":
result = parseMymindBookmarkFile(textContent);
break;
+ case "instapaper":
+ result = parseInstapaperBookmarkFile(textContent);
+ break;
}
return deduplicateBookmarks(result);
}
diff --git a/packages/shared/index.ts b/packages/shared/index.ts
index e69de29b..cb0ff5c3 100644
--- a/packages/shared/index.ts
+++ b/packages/shared/index.ts
@@ -0,0 +1 @@
+export {};
diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts
index 7689f4f4..61a621ac 100644
--- a/packages/shared/inference.ts
+++ b/packages/shared/inference.ts
@@ -1,6 +1,7 @@
import { Ollama } from "ollama";
import OpenAI from "openai";
import { zodResponseFormat } from "openai/helpers/zod";
+import * as undici from "undici";
import { z } from "zod";
import { zodToJsonSchema } from "zod-to-json-schema";
@@ -51,26 +52,49 @@ const mapInferenceOutputSchema = <
return opts[type];
};
+export interface OpenAIInferenceConfig {
+ apiKey: string;
+ baseURL?: string;
+ proxyUrl?: string;
+ serviceTier?: typeof serverConfig.inference.openAIServiceTier;
+ textModel: string;
+ imageModel: string;
+ contextLength: number;
+ maxOutputTokens: number;
+ useMaxCompletionTokens: boolean;
+ outputSchema: "structured" | "json" | "plain";
+}
+
export class InferenceClientFactory {
static build(): InferenceClient | null {
if (serverConfig.inference.openAIApiKey) {
- return new OpenAIInferenceClient();
+ return OpenAIInferenceClient.fromConfig();
}
if (serverConfig.inference.ollamaBaseUrl) {
- return new OllamaInferenceClient();
+ return OllamaInferenceClient.fromConfig();
}
return null;
}
}
-class OpenAIInferenceClient implements InferenceClient {
+export class OpenAIInferenceClient implements InferenceClient {
openAI: OpenAI;
+ private config: OpenAIInferenceConfig;
+
+ constructor(config: OpenAIInferenceConfig) {
+ this.config = config;
+
+ const fetchOptions = config.proxyUrl
+ ? {
+ dispatcher: new undici.ProxyAgent(config.proxyUrl),
+ }
+ : undefined;
- constructor() {
this.openAI = new OpenAI({
- apiKey: serverConfig.inference.openAIApiKey,
- baseURL: serverConfig.inference.openAIBaseUrl,
+ apiKey: config.apiKey,
+ baseURL: config.baseURL,
+ ...(fetchOptions ? { fetchOptions } : {}),
defaultHeaders: {
"X-Title": "Karakeep",
"HTTP-Referer": "https://karakeep.app",
@@ -78,6 +102,21 @@ class OpenAIInferenceClient implements InferenceClient {
});
}
+ static fromConfig(): OpenAIInferenceClient {
+ return new OpenAIInferenceClient({
+ apiKey: serverConfig.inference.openAIApiKey!,
+ baseURL: serverConfig.inference.openAIBaseUrl,
+ proxyUrl: serverConfig.inference.openAIProxyUrl,
+ serviceTier: serverConfig.inference.openAIServiceTier,
+ textModel: serverConfig.inference.textModel,
+ imageModel: serverConfig.inference.imageModel,
+ contextLength: serverConfig.inference.contextLength,
+ maxOutputTokens: serverConfig.inference.maxOutputTokens,
+ useMaxCompletionTokens: serverConfig.inference.useMaxCompletionTokens,
+ outputSchema: serverConfig.inference.outputSchema,
+ });
+ }
+
async inferFromText(
prompt: string,
_opts: Partial<InferenceOptions>,
@@ -89,10 +128,13 @@ class OpenAIInferenceClient implements InferenceClient {
const chatCompletion = await this.openAI.chat.completions.create(
{
messages: [{ role: "user", content: prompt }],
- model: serverConfig.inference.textModel,
- ...(serverConfig.inference.useMaxCompletionTokens
- ? { max_completion_tokens: serverConfig.inference.maxOutputTokens }
- : { max_tokens: serverConfig.inference.maxOutputTokens }),
+ model: this.config.textModel,
+ ...(this.config.serviceTier
+ ? { service_tier: this.config.serviceTier }
+ : {}),
+ ...(this.config.useMaxCompletionTokens
+ ? { max_completion_tokens: this.config.maxOutputTokens }
+ : { max_tokens: this.config.maxOutputTokens }),
response_format: mapInferenceOutputSchema(
{
structured: optsWithDefaults.schema
@@ -101,7 +143,7 @@ class OpenAIInferenceClient implements InferenceClient {
json: { type: "json_object" },
plain: undefined,
},
- serverConfig.inference.outputSchema,
+ this.config.outputSchema,
),
},
{
@@ -128,10 +170,13 @@ class OpenAIInferenceClient implements InferenceClient {
};
const chatCompletion = await this.openAI.chat.completions.create(
{
- model: serverConfig.inference.imageModel,
- ...(serverConfig.inference.useMaxCompletionTokens
- ? { max_completion_tokens: serverConfig.inference.maxOutputTokens }
- : { max_tokens: serverConfig.inference.maxOutputTokens }),
+ model: this.config.imageModel,
+ ...(this.config.serviceTier
+ ? { service_tier: this.config.serviceTier }
+ : {}),
+ ...(this.config.useMaxCompletionTokens
+ ? { max_completion_tokens: this.config.maxOutputTokens }
+ : { max_tokens: this.config.maxOutputTokens }),
response_format: mapInferenceOutputSchema(
{
structured: optsWithDefaults.schema
@@ -140,7 +185,7 @@ class OpenAIInferenceClient implements InferenceClient {
json: { type: "json_object" },
plain: undefined,
},
- serverConfig.inference.outputSchema,
+ this.config.outputSchema,
),
messages: [
{
@@ -185,16 +230,40 @@ class OpenAIInferenceClient implements InferenceClient {
}
}
+export interface OllamaInferenceConfig {
+ baseUrl: string;
+ textModel: string;
+ imageModel: string;
+ contextLength: number;
+ maxOutputTokens: number;
+ keepAlive?: string;
+ outputSchema: "structured" | "json" | "plain";
+}
+
class OllamaInferenceClient implements InferenceClient {
ollama: Ollama;
+ private config: OllamaInferenceConfig;
- constructor() {
+ constructor(config: OllamaInferenceConfig) {
+ this.config = config;
this.ollama = new Ollama({
- host: serverConfig.inference.ollamaBaseUrl,
+ host: config.baseUrl,
fetch: customFetch, // Use the custom fetch with configurable timeout
});
}
+ static fromConfig(): OllamaInferenceClient {
+ return new OllamaInferenceClient({
+ baseUrl: serverConfig.inference.ollamaBaseUrl!,
+ textModel: serverConfig.inference.textModel,
+ imageModel: serverConfig.inference.imageModel,
+ contextLength: serverConfig.inference.contextLength,
+ maxOutputTokens: serverConfig.inference.maxOutputTokens,
+ keepAlive: serverConfig.inference.ollamaKeepAlive,
+ outputSchema: serverConfig.inference.outputSchema,
+ });
+ }
+
async runModel(
model: string,
prompt: string,
@@ -213,7 +282,7 @@ class OllamaInferenceClient implements InferenceClient {
this.ollama.abort();
};
}
- const chatCompletion = await this.ollama.chat({
+ const chatCompletion = await this.ollama.generate({
model: model,
format: mapInferenceOutputSchema(
{
@@ -223,24 +292,23 @@ class OllamaInferenceClient implements InferenceClient {
json: "json",
plain: undefined,
},
- serverConfig.inference.outputSchema,
+ this.config.outputSchema,
),
stream: true,
- keep_alive: serverConfig.inference.ollamaKeepAlive,
+ keep_alive: this.config.keepAlive,
options: {
- num_ctx: serverConfig.inference.contextLength,
- num_predict: serverConfig.inference.maxOutputTokens,
+ num_ctx: this.config.contextLength,
+ num_predict: this.config.maxOutputTokens,
},
- messages: [
- { role: "user", content: prompt, images: image ? [image] : undefined },
- ],
+ prompt: prompt,
+ images: image ? [image] : undefined,
});
let totalTokens = 0;
let response = "";
try {
for await (const part of chatCompletion) {
- response += part.message.content;
+ response += part.response;
if (!isNaN(part.eval_count)) {
totalTokens += part.eval_count;
}
@@ -277,7 +345,7 @@ class OllamaInferenceClient implements InferenceClient {
..._opts,
};
return await this.runModel(
- serverConfig.inference.textModel,
+ this.config.textModel,
prompt,
optsWithDefaults,
undefined,
@@ -295,7 +363,7 @@ class OllamaInferenceClient implements InferenceClient {
..._opts,
};
return await this.runModel(
- serverConfig.inference.imageModel,
+ this.config.imageModel,
prompt,
optsWithDefaults,
image,
diff --git a/packages/shared/logger.ts b/packages/shared/logger.ts
index efe78ff3..f3c5d45d 100644
--- a/packages/shared/logger.ts
+++ b/packages/shared/logger.ts
@@ -14,4 +14,16 @@ const logger = winston.createLogger({
transports: [new winston.transports.Console()],
});
+export function throttledLogger(periodMs: number) {
+ let lastLogTime = 0;
+
+ return (level: string, message: string) => {
+ const now = Date.now();
+ if (now - lastLogTime >= periodMs) {
+ lastLogTime = now;
+ logger.log(level, message);
+ }
+ };
+}
+
export default logger;
diff --git a/packages/shared/prompts.server.ts b/packages/shared/prompts.server.ts
new file mode 100644
index 00000000..c53f4190
--- /dev/null
+++ b/packages/shared/prompts.server.ts
@@ -0,0 +1,88 @@
+import type { Tiktoken } from "js-tiktoken";
+
+import type { ZTagStyle } from "./types/users";
+import { constructSummaryPrompt, constructTextTaggingPrompt } from "./prompts";
+
+let encoding: Tiktoken | null = null;
+
+/**
+ * Lazy load the encoding to avoid loading the tiktoken data into memory
+ * until it's actually needed
+ */
+async function getEncodingInstance(): Promise<Tiktoken> {
+ if (!encoding) {
+ // Dynamic import to lazy load the tiktoken module
+ const { getEncoding } = await import("js-tiktoken");
+ encoding = getEncoding("o200k_base");
+ }
+ return encoding;
+}
+
+async function calculateNumTokens(text: string): Promise<number> {
+ const enc = await getEncodingInstance();
+ return enc.encode(text).length;
+}
+
+async function truncateContent(
+ content: string,
+ length: number,
+): Promise<string> {
+ const enc = await getEncodingInstance();
+ const tokens = enc.encode(content);
+ if (tokens.length <= length) {
+ return content;
+ }
+ const truncatedTokens = tokens.slice(0, length);
+ return enc.decode(truncatedTokens);
+}
+
+/**
+ * Remove duplicate whitespaces to avoid tokenization issues
+ */
+function preprocessContent(content: string) {
+ return content.replace(/(\s){10,}/g, "$1");
+}
+
+export async function buildTextPrompt(
+ lang: string,
+ customPrompts: string[],
+ content: string,
+ contextLength: number,
+ tagStyle: ZTagStyle,
+ curatedTags?: string[],
+): Promise<string> {
+ content = preprocessContent(content);
+ const promptTemplate = constructTextTaggingPrompt(
+ lang,
+ customPrompts,
+ "",
+ tagStyle,
+ curatedTags,
+ );
+ const promptSize = await calculateNumTokens(promptTemplate);
+ const available = Math.max(0, contextLength - promptSize);
+ const truncatedContent =
+ available === 0 ? "" : await truncateContent(content, available);
+ return constructTextTaggingPrompt(
+ lang,
+ customPrompts,
+ truncatedContent,
+ tagStyle,
+ curatedTags,
+ );
+}
+
+export async function buildSummaryPrompt(
+ lang: string,
+ customPrompts: string[],
+ content: string,
+ contextLength: number,
+): Promise<string> {
+ content = preprocessContent(content);
+ const promptTemplate = constructSummaryPrompt(lang, customPrompts, "");
+ const promptSize = await calculateNumTokens(promptTemplate);
+ const available = Math.max(0, contextLength - promptSize);
+ const truncatedContent =
+ available === 0 ? "" : await truncateContent(content, available);
+ return constructSummaryPrompt(lang, customPrompts, truncatedContent);
+}
diff --git a/packages/shared/prompts.ts b/packages/shared/prompts.ts
index 5a6a705e..6c5c02c4 100644
--- a/packages/shared/prompts.ts
+++ b/packages/shared/prompts.ts
@@ -1,19 +1,5 @@
-import type { Tiktoken } from "js-tiktoken";
-
-let encoding: Tiktoken | null = null;
-
-/**
- * Lazy load the encoding to avoid loading the tiktoken data into memory
- * until it's actually needed
- */
-async function getEncodingInstance(): Promise<Tiktoken> {
- if (!encoding) {
- // Dynamic import to lazy load the tiktoken module
- const { getEncoding } = await import("js-tiktoken");
- encoding = getEncoding("o200k_base");
- }
- return encoding;
-}
+import type { ZTagStyle } from "./types/users";
+import { getCuratedTagsPrompt, getTagStylePrompt } from "./utils/tag";
/**
* Remove duplicate whitespaces to avoid tokenization issues
@@ -22,33 +8,25 @@ function preprocessContent(content: string) {
return content.replace(/(\s){10,}/g, "$1");
}
-async function calculateNumTokens(text: string): Promise<number> {
- const enc = await getEncodingInstance();
- return enc.encode(text).length;
-}
-
-async function truncateContent(
- content: string,
- length: number,
-): Promise<string> {
- const enc = await getEncodingInstance();
- const tokens = enc.encode(content);
- if (tokens.length <= length) {
- return content;
- }
- const truncatedTokens = tokens.slice(0, length);
- return enc.decode(truncatedTokens);
-}
+export function buildImagePrompt(
+ lang: string,
+ customPrompts: string[],
+ tagStyle: ZTagStyle,
+ curatedTags?: string[],
+) {
+ const tagStyleInstruction = getTagStylePrompt(tagStyle);
+ const curatedInstruction = getCuratedTagsPrompt(curatedTags);
-export function buildImagePrompt(lang: string, customPrompts: string[]) {
return `
-You are an expert whose responsibility is to help with automatic text tagging for a read-it-later app.
-Please analyze the attached image and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are:
+You are an expert whose responsibility is to help with automatic text tagging for a read-it-later/bookmarking app.
+Analyze the attached image and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are:
- Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres.
- The tags must be in ${lang}.
- If the tag is not generic enough, don't include it.
- Aim for 10-15 tags.
- If there are no good tags, don't emit any.
+${curatedInstruction}
+${tagStyleInstruction}
${customPrompts && customPrompts.map((p) => `- ${p}`).join("\n")}
You must respond in valid JSON with the key "tags" and the value is list of tags. Don't wrap the response in a markdown code.`;
}
@@ -56,20 +34,29 @@ You must respond in valid JSON with the key "tags" and the value is list of tags
/**
* Construct tagging prompt for text content
*/
-function constructTextTaggingPrompt(
+export function constructTextTaggingPrompt(
lang: string,
customPrompts: string[],
content: string,
+ tagStyle: ZTagStyle,
+ curatedTags?: string[],
): string {
+ const tagStyleInstruction = getTagStylePrompt(tagStyle);
+ const curatedInstruction = getCuratedTagsPrompt(curatedTags);
+
return `
-You are an expert whose responsibility is to help with automatic tagging for a read-it-later app.
-Please analyze the TEXT_CONTENT below and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are:
+You are an expert whose responsibility is to help with automatic tagging for a read-it-later/bookmarking app.
+Analyze the TEXT_CONTENT below and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are:
- Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres.
- The tags must be in ${lang}.
- If the tag is not generic enough, don't include it.
-- The content can include text for cookie consent and privacy policy, ignore those while tagging.
+- Do NOT generate tags related to:
+ - An error page (404, 403, blocked, not found, dns errors)
+ - Boilerplate content (cookie consent, login walls, GDPR notices)
- Aim for 3-5 tags.
- If there are no good tags, leave the array empty.
+${curatedInstruction}
+${tagStyleInstruction}
${customPrompts && customPrompts.map((p) => `- ${p}`).join("\n")}
<TEXT_CONTENT>
@@ -81,7 +68,7 @@ You must respond in JSON with the key "tags" and the value is an array of string
/**
* Construct summary prompt
*/
-function constructSummaryPrompt(
+export function constructSummaryPrompt(
lang: string,
customPrompts: string[],
content: string,
@@ -101,46 +88,18 @@ export function buildTextPromptUntruncated(
lang: string,
customPrompts: string[],
content: string,
+ tagStyle: ZTagStyle,
+ curatedTags?: string[],
): string {
return constructTextTaggingPrompt(
lang,
customPrompts,
preprocessContent(content),
+ tagStyle,
+ curatedTags,
);
}
-export async function buildTextPrompt(
- lang: string,
- customPrompts: string[],
- content: string,
- contextLength: number,
-): Promise<string> {
- content = preprocessContent(content);
- const promptTemplate = constructTextTaggingPrompt(lang, customPrompts, "");
- const promptSize = await calculateNumTokens(promptTemplate);
- const truncatedContent = await truncateContent(
- content,
- contextLength - promptSize,
- );
- return constructTextTaggingPrompt(lang, customPrompts, truncatedContent);
-}
-
-export async function buildSummaryPrompt(
- lang: string,
- customPrompts: string[],
- content: string,
- contextLength: number,
-): Promise<string> {
- content = preprocessContent(content);
- const promptTemplate = constructSummaryPrompt(lang, customPrompts, "");
- const promptSize = await calculateNumTokens(promptTemplate);
- const truncatedContent = await truncateContent(
- content,
- contextLength - promptSize,
- );
- return constructSummaryPrompt(lang, customPrompts, truncatedContent);
-}
-
/**
* Build summary prompt without truncation (for previews/UI)
*/
@@ -155,3 +114,19 @@ export function buildSummaryPromptUntruncated(
preprocessContent(content),
);
}
+
+/**
+ * Build OCR prompt for extracting text from images using LLM
+ */
+export function buildOCRPrompt(): string {
+ return `You are an OCR (Optical Character Recognition) expert. Your task is to extract ALL text from this image.
+
+Rules:
+- Extract every piece of text visible in the image, including titles, body text, captions, labels, watermarks, and any other textual content.
+- Preserve the original structure and formatting as much as possible (e.g., paragraphs, lists, headings).
+- If text appears in multiple columns, read from left to right, top to bottom.
+- If text is partially obscured or unclear, make your best attempt and indicate uncertainty with [unclear] if needed.
+- Do not add any commentary, explanations, or descriptions of non-text elements.
+- If there is no text in the image, respond with an empty string.
+- Output ONLY the extracted text, nothing else.`;
+}
diff --git a/packages/shared/queueing.ts b/packages/shared/queueing.ts
index 0dd6ed6b..d1f4bcef 100644
--- a/packages/shared/queueing.ts
+++ b/packages/shared/queueing.ts
@@ -2,6 +2,21 @@ import { ZodType } from "zod";
import { PluginManager, PluginType } from "./plugins";
+/**
+ * Special error that indicates a job should be retried after a delay
+ * without counting against the retry attempts limit.
+ * Useful for handling rate limiting scenarios.
+ */
+export class QueueRetryAfterError extends Error {
+ constructor(
+ message: string,
+ public readonly delayMs: number,
+ ) {
+ super(message);
+ this.name = "QueueRetryAfterError";
+ }
+}
+
export interface EnqueueOptions {
idempotencyKey?: string;
priority?: number;
@@ -48,6 +63,7 @@ export interface RunnerOptions<T> {
export interface Queue<T> {
opts: QueueOptions;
+ ensureInit(): Promise<void>;
name(): string;
enqueue(payload: T, options?: EnqueueOptions): Promise<string | undefined>;
stats(): Promise<{
diff --git a/packages/shared/search.ts b/packages/shared/search.ts
index d23ab29f..651b5245 100644
--- a/packages/shared/search.ts
+++ b/packages/shared/search.ts
@@ -60,9 +60,20 @@ export interface SearchResponse {
processingTimeMs: number;
}
+export interface IndexingOptions {
+ /**
+ * Whether to batch requests. Defaults to true.
+ * Set to false to bypass batching for improved reliability (e.g., on retries).
+ */
+ batch?: boolean;
+}
+
export interface SearchIndexClient {
- addDocuments(documents: BookmarkSearchDocument[]): Promise<void>;
- deleteDocuments(ids: string[]): Promise<void>;
+ addDocuments(
+ documents: BookmarkSearchDocument[],
+ options?: IndexingOptions,
+ ): Promise<void>;
+ deleteDocuments(ids: string[], options?: IndexingOptions): Promise<void>;
search(options: SearchOptions): Promise<SearchResponse>;
clearIndex(): Promise<void>;
}
diff --git a/packages/shared/searchQueryParser.test.ts b/packages/shared/searchQueryParser.test.ts
index aa11433f..37275284 100644
--- a/packages/shared/searchQueryParser.test.ts
+++ b/packages/shared/searchQueryParser.test.ts
@@ -332,7 +332,176 @@ describe("Search Query Parser", () => {
inverse: true,
},
});
+ expect(parseSearchQuery("source:rss")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "source",
+ source: "rss",
+ inverse: false,
+ },
+ });
+ expect(parseSearchQuery("-source:rss")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "source",
+ source: "rss",
+ inverse: true,
+ },
+ });
+ expect(parseSearchQuery("source:web")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "source",
+ source: "web",
+ inverse: false,
+ },
+ });
+ expect(parseSearchQuery("-source:web")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "source",
+ source: "web",
+ inverse: true,
+ },
+ });
+ });
+ test("! negation alias for -", () => {
+ // ! should work exactly like - for negation
+ expect(parseSearchQuery("!is:archived")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "archived",
+ archived: false,
+ },
+ });
+ expect(parseSearchQuery("!is:fav")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "favourited",
+ favourited: false,
+ },
+ });
+ expect(parseSearchQuery("!#my-tag")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "tagName",
+ tagName: "my-tag",
+ inverse: true,
+ },
+ });
+ expect(parseSearchQuery("!tag:my-tag")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "tagName",
+ tagName: "my-tag",
+ inverse: true,
+ },
+ });
+ expect(parseSearchQuery("!url:example.com")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "url",
+ url: "example.com",
+ inverse: true,
+ },
+ });
+ expect(parseSearchQuery("!list:my-list")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "listName",
+ listName: "my-list",
+ inverse: true,
+ },
+ });
+ expect(parseSearchQuery("!is:link")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "type",
+ typeName: BookmarkTypes.LINK,
+ inverse: true,
+ },
+ });
+ // Combined with complex queries
+ expect(parseSearchQuery("is:fav !is:archived")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "and",
+ matchers: [
+ {
+ type: "favourited",
+ favourited: true,
+ },
+ {
+ type: "archived",
+ archived: false,
+ },
+ ],
+ },
+ });
});
+
+ test("tag: qualifier alias for #", () => {
+ // tag: should work exactly like #
+ expect(parseSearchQuery("tag:my-tag")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "tagName",
+ tagName: "my-tag",
+ inverse: false,
+ },
+ });
+ expect(parseSearchQuery("-tag:my-tag")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "tagName",
+ tagName: "my-tag",
+ inverse: true,
+ },
+ });
+ expect(parseSearchQuery('tag:"my tag"')).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "tagName",
+ tagName: "my tag",
+ inverse: false,
+ },
+ });
+ expect(parseSearchQuery('-tag:"my tag"')).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "tagName",
+ tagName: "my tag",
+ inverse: true,
+ },
+ });
+ // Tags starting with qualifiers should be treated correctly
+ expect(parseSearchQuery("tag:android")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "tagName",
+ tagName: "android",
+ inverse: false,
+ },
+ });
+ });
+
test("date queries", () => {
expect(parseSearchQuery("after:2023-10-12")).toEqual({
result: "full",
diff --git a/packages/shared/searchQueryParser.ts b/packages/shared/searchQueryParser.ts
index 7447593a..7eb3b185 100644
--- a/packages/shared/searchQueryParser.ts
+++ b/packages/shared/searchQueryParser.ts
@@ -16,7 +16,7 @@ import {
} from "typescript-parsec";
import { z } from "zod";
-import { BookmarkTypes } from "./types/bookmarks";
+import { BookmarkTypes, zBookmarkSourceSchema } from "./types/bookmarks";
import { Matcher } from "./types/search";
import { parseRelativeDate } from "./utils/relativeDateUtils";
@@ -33,6 +33,7 @@ enum TokenType {
Space = "SPACE",
Hash = "HASH",
Minus = "MINUS",
+ Exclamation = "EXCLAMATION",
}
// Rules are in order of priority
@@ -41,7 +42,10 @@ const lexerRules: [RegExp, TokenType][] = [
[/^\s+or/i, TokenType.Or],
[/^#/, TokenType.Hash],
- [/^(is|url|list|after|before|age|feed|title):/, TokenType.Qualifier],
+ [
+ /^(is|url|list|after|before|age|feed|title|tag|source):/,
+ TokenType.Qualifier,
+ ],
[/^"([^"]+)"/, TokenType.StringLiteral],
@@ -49,6 +53,7 @@ const lexerRules: [RegExp, TokenType][] = [
[/^\)/, TokenType.RParen],
[/^\s+/, TokenType.Space],
[/^-/, TokenType.Minus],
+ [/^!/, TokenType.Exclamation],
// This needs to be last as it matches a lot of stuff
[/^[^ )(]+/, TokenType.Ident],
@@ -116,7 +121,10 @@ const EXP = rule<TokenType, TextAndMatcher>();
MATCHER.setPattern(
alt_sc(
apply(
- seq(opt(str("-")), kright(str("is:"), tok(TokenType.Ident))),
+ seq(
+ opt(alt(str("-"), str("!"))),
+ kright(str("is:"), tok(TokenType.Ident)),
+ ),
([minus, ident]) => {
switch (ident.text) {
case "fav":
@@ -182,7 +190,7 @@ MATCHER.setPattern(
),
apply(
seq(
- opt(str("-")),
+ opt(alt(str("-"), str("!"))),
alt(tok(TokenType.Qualifier), tok(TokenType.Hash)),
alt(
apply(tok(TokenType.Ident), (tok) => {
@@ -206,6 +214,7 @@ MATCHER.setPattern(
matcher: { type: "title", title: ident, inverse: !!minus },
};
case "#":
+ case "tag:":
return {
text: "",
matcher: { type: "tagName", tagName: ident, inverse: !!minus },
@@ -224,6 +233,23 @@ MATCHER.setPattern(
inverse: !!minus,
},
};
+ case "source:": {
+ const parsed = zBookmarkSourceSchema.safeParse(ident);
+ if (!parsed.success) {
+ return {
+ text: (minus?.text ?? "") + qualifier.text + ident,
+ matcher: undefined,
+ };
+ }
+ return {
+ text: "",
+ matcher: {
+ type: "source",
+ source: parsed.data,
+ inverse: !!minus,
+ },
+ };
+ }
case "after:":
try {
return {
diff --git a/packages/shared/types/bookmarks.ts b/packages/shared/types/bookmarks.ts
index cbaa4574..2f32bd51 100644
--- a/packages/shared/types/bookmarks.ts
+++ b/packages/shared/types/bookmarks.ts
@@ -1,7 +1,7 @@
import { z } from "zod";
import { zCursorV2 } from "./pagination";
-import { zBookmarkTagSchema } from "./tags";
+import { zAttachedByEnumSchema, zBookmarkTagSchema } from "./tags";
export const MAX_BOOKMARK_TITLE_LENGTH = 1000;
@@ -18,6 +18,7 @@ export type ZSortOrder = z.infer<typeof zSortOrder>;
export const zAssetTypesSchema = z.enum([
"linkHtmlContent",
"screenshot",
+ "pdf",
"assetScreenshot",
"bannerImage",
"fullPageArchive",
@@ -25,6 +26,7 @@ export const zAssetTypesSchema = z.enum([
"bookmarkAsset",
"precrawledArchive",
"userUploaded",
+ "avatar",
"unknown",
]);
export type ZAssetType = z.infer<typeof zAssetTypesSchema>;
@@ -43,6 +45,7 @@ export const zBookmarkedLinkSchema = z.object({
imageUrl: z.string().nullish(),
imageAssetId: z.string().nullish(),
screenshotAssetId: z.string().nullish(),
+ pdfAssetId: z.string().nullish(),
fullPageArchiveAssetId: z.string().nullish(),
precrawledArchiveAssetId: z.string().nullish(),
videoAssetId: z.string().nullish(),
@@ -50,6 +53,7 @@ export const zBookmarkedLinkSchema = z.object({
htmlContent: z.string().nullish(),
contentAssetId: z.string().nullish(),
crawledAt: z.date().nullish(),
+ crawlStatus: z.enum(["success", "failure", "pending"]).nullish(),
author: z.string().nullish(),
publisher: z.string().nullish(),
datePublished: z.date().nullish(),
@@ -160,6 +164,7 @@ export const zNewBookmarkRequestSchema = z
// A mechanism to prioritize crawling of bookmarks depending on whether
// they were created by a user interaction or by a bulk import.
crawlPriority: z.enum(["low", "normal"]).optional(),
+ // Deprecated
importSessionId: z.string().optional(),
source: zBookmarkSourceSchema.optional(),
})
@@ -248,6 +253,7 @@ export const zManipulatedTagSchema = z
// At least one of the two must be set
tagId: z.string().optional(), // If the tag already exists and we know its id we should pass it
tagName: z.string().optional(),
+ attachedBy: zAttachedByEnumSchema.optional().default("human"),
})
.refine((val) => !!val.tagId || !!val.tagName, {
message: "You must provide either a tagId or a tagName",
diff --git a/packages/shared/types/config.ts b/packages/shared/types/config.ts
new file mode 100644
index 00000000..bd4310f9
--- /dev/null
+++ b/packages/shared/types/config.ts
@@ -0,0 +1,29 @@
+import { z } from "zod";
+
+export const zClientConfigSchema = z.object({
+ publicUrl: z.string(),
+ publicApiUrl: z.string(),
+ demoMode: z
+ .object({
+ email: z.string().optional(),
+ password: z.string().optional(),
+ })
+ .optional(),
+ auth: z.object({
+ disableSignups: z.boolean(),
+ disablePasswordAuth: z.boolean(),
+ }),
+ turnstile: z
+ .object({
+ siteKey: z.string(),
+ })
+ .nullable(),
+ inference: z.object({
+ isConfigured: z.boolean(),
+ inferredTagLang: z.string(),
+ enableAutoTagging: z.boolean(),
+ enableAutoSummarization: z.boolean(),
+ }),
+ serverVersion: z.string().optional(),
+ disableNewReleaseCheck: z.boolean(),
+});
diff --git a/packages/shared/types/importSessions.ts b/packages/shared/types/importSessions.ts
index 0c1edd03..44022a74 100644
--- a/packages/shared/types/importSessions.ts
+++ b/packages/shared/types/importSessions.ts
@@ -1,8 +1,10 @@
import { z } from "zod";
export const zImportSessionStatusSchema = z.enum([
+ "staging",
"pending",
- "in_progress",
+ "running",
+ "paused",
"completed",
"failed",
]);
@@ -24,13 +26,13 @@ export const zImportSessionSchema = z.object({
userId: z.string(),
message: z.string().nullable(),
rootListId: z.string().nullable(),
+ status: zImportSessionStatusSchema,
createdAt: z.date(),
modifiedAt: z.date().nullable(),
});
export type ZImportSession = z.infer<typeof zImportSessionSchema>;
export const zImportSessionWithStatsSchema = zImportSessionSchema.extend({
- status: z.enum(["pending", "in_progress", "completed", "failed"]),
totalBookmarks: z.number(),
completedBookmarks: z.number(),
failedBookmarks: z.number(),
diff --git a/packages/shared/types/readers.ts b/packages/shared/types/readers.ts
new file mode 100644
index 00000000..117dd51b
--- /dev/null
+++ b/packages/shared/types/readers.ts
@@ -0,0 +1,59 @@
+import { z } from "zod";
+
+import { ZReaderFontFamily, zReaderFontFamilySchema } from "./users";
+
+export const READER_DEFAULTS = {
+ fontSize: 18,
+ lineHeight: 1.6,
+ fontFamily: "serif" as const,
+} as const;
+
+export const READER_FONT_FAMILIES: Record<ZReaderFontFamily, string> = {
+ serif: "ui-serif, Georgia, Cambria, serif",
+ sans: "ui-sans-serif, system-ui, sans-serif",
+ mono: "ui-monospace, Menlo, Monaco, monospace",
+} as const;
+
+// Setting constraints for UI controls
+export const READER_SETTING_CONSTRAINTS = {
+ fontSize: { min: 12, max: 24, step: 1 },
+ lineHeight: { min: 1.2, max: 2.5, step: 0.1 },
+} as const;
+
+// Formatting functions for display
+export function formatFontSize(value: number): string {
+ return `${value}px`;
+}
+
+export function formatLineHeight(value: number): string {
+ return value.toFixed(1);
+}
+
+export function formatFontFamily(
+ value: ZReaderFontFamily,
+ t?: (key: string) => string,
+): string {
+ if (t) {
+ return t(`settings.info.reader_settings.${value}`);
+ }
+ // Fallback labels when no translation function provided
+ switch (value) {
+ case "serif":
+ return "Serif";
+ case "sans":
+ return "Sans Serif";
+ case "mono":
+ return "Monospace";
+ }
+}
+
+export const zReaderSettings = z.object({
+ fontSize: z.number().int().min(12).max(24),
+ lineHeight: z.number().min(1.2).max(2.5),
+ fontFamily: zReaderFontFamilySchema,
+});
+
+export type ReaderSettings = z.infer<typeof zReaderSettings>;
+
+export const zReaderSettingsPartial = zReaderSettings.partial();
+export type ReaderSettingsPartial = z.infer<typeof zReaderSettingsPartial>;
diff --git a/packages/shared/types/rules.ts b/packages/shared/types/rules.ts
index 92300b3c..fd99c266 100644
--- a/packages/shared/types/rules.ts
+++ b/packages/shared/types/rules.ts
@@ -54,6 +54,21 @@ const zUrlContainsCondition = z.object({
str: z.string(),
});
+const zUrlDoesNotContainCondition = z.object({
+ type: z.literal("urlDoesNotContain"),
+ str: z.string(),
+});
+
+const zTitleContainsCondition = z.object({
+ type: z.literal("titleContains"),
+ str: z.string(),
+});
+
+const zTitleDoesNotContainCondition = z.object({
+ type: z.literal("titleDoesNotContain"),
+ str: z.string(),
+});
+
const zImportedFromFeedCondition = z.object({
type: z.literal("importedFromFeed"),
feedId: z.string(),
@@ -80,6 +95,9 @@ const zIsArchivedCondition = z.object({
const nonRecursiveCondition = z.discriminatedUnion("type", [
zAlwaysTrueCondition,
zUrlContainsCondition,
+ zUrlDoesNotContainCondition,
+ zTitleContainsCondition,
+ zTitleDoesNotContainCondition,
zImportedFromFeedCondition,
zBookmarkTypeIsCondition,
zHasTagCondition,
@@ -98,6 +116,9 @@ export const zRuleEngineConditionSchema: z.ZodType<RuleEngineCondition> =
z.discriminatedUnion("type", [
zAlwaysTrueCondition,
zUrlContainsCondition,
+ zUrlDoesNotContainCondition,
+ zTitleContainsCondition,
+ zTitleDoesNotContainCondition,
zImportedFromFeedCondition,
zBookmarkTypeIsCondition,
zHasTagCondition,
@@ -227,6 +248,7 @@ const ruleValidaitorFn = (
case "isArchived":
return true;
case "urlContains":
+ case "urlDoesNotContain":
if (condition.str.length == 0) {
ctx.addIssue({
code: "custom",
@@ -236,6 +258,17 @@ const ruleValidaitorFn = (
return false;
}
return true;
+ case "titleContains":
+ case "titleDoesNotContain":
+ if (condition.str.length == 0) {
+ ctx.addIssue({
+ code: "custom",
+ message: "You must specify a title for this condition type",
+ path: ["condition", "str"],
+ });
+ return false;
+ }
+ return true;
case "hasTag":
if (condition.tagId.length == 0) {
ctx.addIssue({
diff --git a/packages/shared/types/search.ts b/packages/shared/types/search.ts
index c29270b8..b653d883 100644
--- a/packages/shared/types/search.ts
+++ b/packages/shared/types/search.ts
@@ -1,6 +1,6 @@
import { z } from "zod";
-import { BookmarkTypes } from "./bookmarks";
+import { BookmarkTypes, zBookmarkSourceSchema } from "./bookmarks";
const zTagNameMatcher = z.object({
type: z.literal("tagName"),
@@ -88,6 +88,12 @@ const zBrokenLinksMatcher = z.object({
brokenLinks: z.boolean(),
});
+const zSourceMatcher = z.object({
+ type: z.literal("source"),
+ source: zBookmarkSourceSchema,
+ inverse: z.boolean(),
+});
+
const zNonRecursiveMatcher = z.union([
zTagNameMatcher,
zListNameMatcher,
@@ -103,6 +109,7 @@ const zNonRecursiveMatcher = z.union([
zTypeMatcher,
zRssFeedNameMatcher,
zBrokenLinksMatcher,
+ zSourceMatcher,
]);
type NonRecursiveMatcher = z.infer<typeof zNonRecursiveMatcher>;
@@ -127,6 +134,7 @@ export const zMatcherSchema: z.ZodType<Matcher> = z.lazy(() => {
zTypeMatcher,
zRssFeedNameMatcher,
zBrokenLinksMatcher,
+ zSourceMatcher,
z.object({
type: z.literal("and"),
matchers: z.array(zMatcherSchema),
diff --git a/packages/shared/types/tags.ts b/packages/shared/types/tags.ts
index 91ad1d96..7ce70477 100644
--- a/packages/shared/types/tags.ts
+++ b/packages/shared/types/tags.ts
@@ -47,6 +47,7 @@ export const zTagCursorSchema = z.object({
export const zTagListRequestSchema = z.object({
nameContains: z.string().optional(),
+ ids: z.array(z.string()).optional(),
attachedBy: z.enum([...zAttachedByEnumSchema.options, "none"]).optional(),
sortBy: z.enum(["name", "usage", "relevance"]).optional().default("usage"),
cursor: zTagCursorSchema.nullish().default({ page: 0 }),
diff --git a/packages/shared/types/users.ts b/packages/shared/types/users.ts
index 9f020d52..df4697f0 100644
--- a/packages/shared/types/users.ts
+++ b/packages/shared/types/users.ts
@@ -5,6 +5,17 @@ import { zBookmarkSourceSchema } from "./bookmarks";
export const PASSWORD_MIN_LENGTH = 8;
export const PASSWORD_MAX_LENGTH = 100;
+export const zTagStyleSchema = z.enum([
+ "lowercase-hyphens",
+ "lowercase-spaces",
+ "lowercase-underscores",
+ "titlecase-spaces",
+ "titlecase-hyphens",
+ "camelCase",
+ "as-generated",
+]);
+export type ZTagStyle = z.infer<typeof zTagStyleSchema>;
+
export const zSignUpSchema = z
.object({
name: z.string().min(1, { message: "Name can't be empty" }),
@@ -38,6 +49,7 @@ export const zWhoAmIResponseSchema = z.object({
id: z.string(),
name: z.string().nullish(),
email: z.string().nullish(),
+ image: z.string().nullish(),
localUser: z.boolean(),
});
@@ -102,6 +114,76 @@ export const zUserStatsResponseSchema = z.object({
),
});
+export const zWrappedStatsResponseSchema = z.object({
+ year: z.number(),
+ totalBookmarks: z.number(),
+ totalFavorites: z.number(),
+ totalArchived: z.number(),
+ totalHighlights: z.number(),
+ totalTags: z.number(),
+ totalLists: z.number(),
+
+ firstBookmark: z
+ .object({
+ id: z.string(),
+ title: z.string().nullable(),
+ createdAt: z.date(),
+ type: z.enum(["link", "text", "asset"]),
+ })
+ .nullable(),
+
+ mostActiveDay: z
+ .object({
+ date: z.string(),
+ count: z.number(),
+ })
+ .nullable(),
+
+ topDomains: z
+ .array(
+ z.object({
+ domain: z.string(),
+ count: z.number(),
+ }),
+ )
+ .max(5),
+
+ topTags: z
+ .array(
+ z.object({
+ name: z.string(),
+ count: z.number(),
+ }),
+ )
+ .max(5),
+
+ bookmarksByType: z.object({
+ link: z.number(),
+ text: z.number(),
+ asset: z.number(),
+ }),
+
+ bookmarksBySource: z.array(
+ z.object({
+ source: zBookmarkSourceSchema.nullable(),
+ count: z.number(),
+ }),
+ ),
+
+ monthlyActivity: z.array(
+ z.object({
+ month: z.number(),
+ count: z.number(),
+ }),
+ ),
+
+ peakHour: z.number(),
+ peakDayOfWeek: z.number(),
+});
+
+export const zReaderFontFamilySchema = z.enum(["serif", "sans", "mono"]);
+export type ZReaderFontFamily = z.infer<typeof zReaderFontFamilySchema>;
+
export const zUserSettingsSchema = z.object({
bookmarkClickAction: z.enum([
"open_original_link",
@@ -112,6 +194,16 @@ export const zUserSettingsSchema = z.object({
backupsEnabled: z.boolean(),
backupsFrequency: z.enum(["daily", "weekly"]),
backupsRetentionDays: z.number().int().min(1).max(365),
+ // Reader settings (nullable = opt-in, null means use client default)
+ readerFontSize: z.number().int().min(12).max(24).nullable(),
+ readerLineHeight: z.number().min(1.2).max(2.5).nullable(),
+ readerFontFamily: zReaderFontFamilySchema.nullable(),
+ // AI settings (nullable = opt-in, null means use server default)
+ autoTaggingEnabled: z.boolean().nullable(),
+ autoSummarizationEnabled: z.boolean().nullable(),
+ tagStyle: zTagStyleSchema,
+ curatedTagIds: z.array(z.string()).nullable(),
+ inferredTagLang: z.string().nullable(),
});
export type ZUserSettings = z.infer<typeof zUserSettingsSchema>;
@@ -123,6 +215,14 @@ export const zUpdateUserSettingsSchema = zUserSettingsSchema.partial().pick({
backupsEnabled: true,
backupsFrequency: true,
backupsRetentionDays: true,
+ readerFontSize: true,
+ readerLineHeight: true,
+ readerFontFamily: true,
+ autoTaggingEnabled: true,
+ autoSummarizationEnabled: true,
+ tagStyle: true,
+ curatedTagIds: true,
+ inferredTagLang: true,
});
export const zUpdateBackupSettingsSchema = zUpdateUserSettingsSchema.pick({
diff --git a/packages/shared/utils/bookmarkUtils.ts b/packages/shared/utils/bookmarkUtils.ts
index 9d4659b1..c9587c6c 100644
--- a/packages/shared/utils/bookmarkUtils.ts
+++ b/packages/shared/utils/bookmarkUtils.ts
@@ -28,9 +28,13 @@ export function getBookmarkLinkImageUrl(bookmark: ZBookmarkedLink) {
}
export function isBookmarkStillCrawling(bookmark: ZBookmark) {
- return (
- bookmark.content.type == BookmarkTypes.LINK && !bookmark.content.crawledAt
- );
+ if (bookmark.content.type != BookmarkTypes.LINK) {
+ return false;
+ }
+ if (bookmark.content.crawlStatus) {
+ return bookmark.content.crawlStatus === "pending";
+ }
+ return !bookmark.content.crawledAt;
}
export function isBookmarkStillTagging(bookmark: ZBookmark) {
diff --git a/packages/shared/utils/redirectUrl.test.ts b/packages/shared/utils/redirectUrl.test.ts
new file mode 100644
index 00000000..97d52cf2
--- /dev/null
+++ b/packages/shared/utils/redirectUrl.test.ts
@@ -0,0 +1,89 @@
+import { describe, expect, it } from "vitest";
+
+import { isMobileAppRedirect, validateRedirectUrl } from "./redirectUrl";
+
+describe("validateRedirectUrl", () => {
+ it("should return undefined for null input", () => {
+ expect(validateRedirectUrl(null)).toBe(undefined);
+ });
+
+ it("should return undefined for undefined input", () => {
+ expect(validateRedirectUrl(undefined)).toBe(undefined);
+ });
+
+ it("should return undefined for empty string", () => {
+ expect(validateRedirectUrl("")).toBe(undefined);
+ });
+
+ it("should allow relative paths starting with '/'", () => {
+ expect(validateRedirectUrl("/")).toBe("/");
+ expect(validateRedirectUrl("/dashboard")).toBe("/dashboard");
+ expect(validateRedirectUrl("/settings/profile")).toBe("/settings/profile");
+ expect(validateRedirectUrl("/path?query=value")).toBe("/path?query=value");
+ expect(validateRedirectUrl("/path#hash")).toBe("/path#hash");
+ });
+
+ it("should reject protocol-relative URLs (//)", () => {
+ expect(validateRedirectUrl("//evil.com")).toBe(undefined);
+ expect(validateRedirectUrl("//evil.com/path")).toBe(undefined);
+ });
+
+ it("should allow karakeep:// scheme for mobile app", () => {
+ expect(validateRedirectUrl("karakeep://")).toBe("karakeep://");
+ expect(validateRedirectUrl("karakeep://callback")).toBe(
+ "karakeep://callback",
+ );
+ expect(validateRedirectUrl("karakeep://callback/path")).toBe(
+ "karakeep://callback/path",
+ );
+ expect(validateRedirectUrl("karakeep://callback?param=value")).toBe(
+ "karakeep://callback?param=value",
+ );
+ });
+
+ it("should reject http:// scheme", () => {
+ expect(validateRedirectUrl("http://example.com")).toBe(undefined);
+ expect(validateRedirectUrl("http://localhost:3000")).toBe(undefined);
+ });
+
+ it("should reject https:// scheme", () => {
+ expect(validateRedirectUrl("https://example.com")).toBe(undefined);
+ expect(validateRedirectUrl("https://evil.com/phishing")).toBe(undefined);
+ });
+
+ it("should reject javascript: scheme", () => {
+ expect(validateRedirectUrl("javascript:alert(1)")).toBe(undefined);
+ });
+
+ it("should reject data: scheme", () => {
+ expect(
+ validateRedirectUrl("data:text/html,<script>alert(1)</script>"),
+ ).toBe(undefined);
+ });
+
+ it("should reject other custom schemes", () => {
+ expect(validateRedirectUrl("file:///etc/passwd")).toBe(undefined);
+ expect(validateRedirectUrl("ftp://example.com")).toBe(undefined);
+ expect(validateRedirectUrl("mailto:test@example.com")).toBe(undefined);
+ });
+
+ it("should reject paths not starting with /", () => {
+ expect(validateRedirectUrl("dashboard")).toBe(undefined);
+ expect(validateRedirectUrl("path/to/page")).toBe(undefined);
+ });
+});
+
+describe("isMobileAppRedirect", () => {
+ it("should return true for karakeep:// URLs", () => {
+ expect(isMobileAppRedirect("karakeep://")).toBe(true);
+ expect(isMobileAppRedirect("karakeep://callback")).toBe(true);
+ expect(isMobileAppRedirect("karakeep://callback/path")).toBe(true);
+ });
+
+ it("should return false for other URLs", () => {
+ expect(isMobileAppRedirect("/")).toBe(false);
+ expect(isMobileAppRedirect("/dashboard")).toBe(false);
+ expect(isMobileAppRedirect("https://example.com")).toBe(false);
+ expect(isMobileAppRedirect("http://localhost")).toBe(false);
+ });
+});
diff --git a/packages/shared/utils/redirectUrl.ts b/packages/shared/utils/redirectUrl.ts
new file mode 100644
index 00000000..c2adffc0
--- /dev/null
+++ b/packages/shared/utils/redirectUrl.ts
@@ -0,0 +1,35 @@
+/**
+ * Validates a redirect URL to prevent open redirect attacks.
+ * Only allows:
+ * - Relative paths starting with "/" (but not "//" to prevent protocol-relative URLs)
+ * - The karakeep:// scheme for the mobile app
+ *
+ * @returns The validated URL if valid, otherwise undefined.
+ */
+export function validateRedirectUrl(
+ url: string | null | undefined,
+): string | undefined {
+ if (!url) {
+ return undefined;
+ }
+
+ // Allow relative paths starting with "/" but not "//" (protocol-relative URLs)
+ if (url.startsWith("/") && !url.startsWith("//")) {
+ return url;
+ }
+
+ // Allow karakeep:// scheme for mobile app deep links
+ if (url.startsWith("karakeep://")) {
+ return url;
+ }
+
+ // Reject all other schemes (http, https, javascript, data, etc.)
+ return undefined;
+}
+
+/**
+ * Checks if the redirect URL is a mobile app deep link.
+ */
+export function isMobileAppRedirect(url: string): boolean {
+ return url.startsWith("karakeep://");
+}
diff --git a/packages/shared/utils/tag.ts b/packages/shared/utils/tag.ts
index 8e1bd105..b69b817e 100644
--- a/packages/shared/utils/tag.ts
+++ b/packages/shared/utils/tag.ts
@@ -1,6 +1,37 @@
+import type { ZTagStyle } from "../types/users";
+
/**
* Ensures exactly ONE leading #
*/
export function normalizeTagName(raw: string): string {
return raw.trim().replace(/^#+/, ""); // strip every leading #
}
+
+export type TagStyle = ZTagStyle;
+
+export function getTagStylePrompt(style: TagStyle): string {
+ switch (style) {
+ case "lowercase-hyphens":
+ return "- Use lowercase letters with hyphens between words (e.g., 'machine-learning', 'web-development')";
+ case "lowercase-spaces":
+ return "- Use lowercase letters with spaces between words (e.g., 'machine learning', 'web development')";
+ case "lowercase-underscores":
+ return "- Use lowercase letters with underscores between words (e.g., 'machine_learning', 'web_development')";
+ case "titlecase-spaces":
+ return "- Use title case with spaces between words (e.g., 'Machine Learning', 'Web Development')";
+ case "titlecase-hyphens":
+ return "- Use title case with hyphens between words (e.g., 'Machine-Learning', 'Web-Development')";
+ case "camelCase":
+ return "- Use camelCase format (e.g., 'machineLearning', 'webDevelopment')";
+ case "as-generated":
+ default:
+ return "";
+ }
+}
+
+export function getCuratedTagsPrompt(curatedTags?: string[]): string {
+ if (curatedTags && curatedTags.length > 0) {
+ return `- ONLY use tags from this predefined list: [${curatedTags.join(", ")}]. Do not create any new tags outside this list. If no tags fit, don't emit any.`;
+ }
+ return "";
+}