aboutsummaryrefslogtreecommitdiffstats
path: root/packages/shared/import-export
diff options
context:
space:
mode:
Diffstat (limited to 'packages/shared/import-export')
-rw-r--r--packages/shared/import-export/importer.test.ts454
-rw-r--r--packages/shared/import-export/importer.ts123
-rw-r--r--packages/shared/import-export/parsers.test.ts301
-rw-r--r--packages/shared/import-export/parsers.ts190
4 files changed, 795 insertions, 273 deletions
diff --git a/packages/shared/import-export/importer.test.ts b/packages/shared/import-export/importer.test.ts
index 48cd1204..f097f8d5 100644
--- a/packages/shared/import-export/importer.test.ts
+++ b/packages/shared/import-export/importer.test.ts
@@ -1,13 +1,14 @@
import { describe, expect, it, vi } from "vitest";
-import { importBookmarksFromFile, ParsedBookmark } from ".";
+import type { StagedBookmark } from ".";
+import { importBookmarksFromFile } from ".";
const fakeFile = {
text: vi.fn().mockResolvedValue("fake file content"),
} as unknown as File;
describe("importBookmarksFromFile", () => {
- it("creates root list, folders and imports bookmarks with progress", async () => {
+ it("creates root list, folders and stages bookmarks with progress", async () => {
const parsers = {
pocket: vi.fn().mockReturnValue([
{
@@ -61,32 +62,23 @@ describe("importBookmarksFromFile", () => {
},
);
- const createdBookmarks: ParsedBookmark[] = [];
- const addedToLists: { bookmarkId: string; listIds: string[] }[] = [];
- const updatedTags: { bookmarkId: string; tags: string[] }[] = [];
-
- const createBookmark = vi.fn(async (bookmark: ParsedBookmark) => {
- createdBookmarks.push(bookmark);
- return {
- id: `bookmark-${createdBookmarks.length}`,
- alreadyExists: false,
- };
- });
-
- const addBookmarkToLists = vi.fn(
- async (input: { bookmarkId: string; listIds: string[] }) => {
- addedToLists.push(input);
+ const stagedBookmarks: StagedBookmark[] = [];
+ const stageImportedBookmarks = vi.fn(
+ async (input: {
+ importSessionId: string;
+ bookmarks: StagedBookmark[];
+ }) => {
+ stagedBookmarks.push(...input.bookmarks);
},
);
- const updateBookmarkTags = vi.fn(
- async (input: { bookmarkId: string; tags: string[] }) => {
- updatedTags.push(input);
- },
+ const finalizeImportStaging = vi.fn();
+ const createImportSession = vi.fn(
+ async (_input: { name: string; rootListId: string }) => ({
+ id: "session-1",
+ }),
);
- const createImportSession = vi.fn(async () => ({ id: "session-1" }));
-
const progress: number[] = [];
const res = await importBookmarksFromFile(
{
@@ -95,9 +87,8 @@ describe("importBookmarksFromFile", () => {
rootListName: "Imported",
deps: {
createList,
- createBookmark,
- addBookmarkToLists,
- updateBookmarkTags,
+ stageImportedBookmarks,
+ finalizeImportStaging,
createImportSession,
},
onProgress: (d, t) => progress.push(d / t),
@@ -106,12 +97,14 @@ describe("importBookmarksFromFile", () => {
);
expect(res.rootListId).toBe("Imported");
+ expect(res.importSessionId).toBe("session-1");
expect(res.counts).toEqual({
- successes: 5,
+ successes: 0,
failures: 0,
alreadyExisted: 0,
total: 5, // Using custom parser, no deduplication
});
+
// Root + all unique folders from paths
expect(createdLists).toEqual([
{ name: "Imported", icon: "ā¬†ļø" },
@@ -122,38 +115,43 @@ describe("importBookmarksFromFile", () => {
{ name: "Tech", parentId: "Imported/Reading", icon: "šŸ“" },
{ name: "Duplicates", parentId: "Imported/Development", icon: "šŸ“" },
]);
- // Verify we have 5 created bookmarks (no deduplication with custom parser)
- expect(createdBookmarks).toHaveLength(5);
- // Verify GitHub bookmark exists (will be two separate bookmarks since no deduplication)
- const githubBookmarks = createdBookmarks.filter(
- (bookmark) =>
- bookmark.content?.type === "link" &&
- bookmark.content.url === "https://github.com/example/repo",
- );
- expect(githubBookmarks).toHaveLength(2);
- // Verify text bookmark exists
- const textBookmark = createdBookmarks.find(
- (bookmark) => bookmark.content?.type === "text",
+
+ // Verify 5 bookmarks were staged (in 1 batch since < 50)
+ expect(stagedBookmarks).toHaveLength(5);
+ expect(stageImportedBookmarks).toHaveBeenCalledTimes(1);
+
+ // Verify GitHub link bookmark was staged correctly
+ const githubBookmark = stagedBookmarks.find(
+ (b) => b.url === "https://github.com/example/repo" && b.type === "link",
);
+ expect(githubBookmark).toBeDefined();
+ if (!githubBookmark) {
+ throw new Error("Expected GitHub bookmark to be staged");
+ }
+ expect(githubBookmark.title).toBe("GitHub Repository");
+ expect(githubBookmark.tags).toEqual(["dev", "github"]);
+ expect(githubBookmark.listIds).toEqual(["Imported/Development/Projects"]);
+
+ // Verify text bookmark was staged correctly
+ const textBookmark = stagedBookmarks.find((b) => b.type === "text");
expect(textBookmark).toBeDefined();
- expect(textBookmark!.archived).toBe(true);
- expect(textBookmark!.notes).toBe("Additional context");
- // Verify bookmark with no path goes to root
- const noCategoryBookmark = createdBookmarks.find(
- (bookmark) =>
- bookmark.content?.type === "link" &&
- bookmark.content.url === "https://example.com/misc",
+ if (!textBookmark) {
+ throw new Error("Expected text bookmark to be staged");
+ }
+ expect(textBookmark.content).toBe("Important notes about the project");
+ expect(textBookmark.note).toBe("Additional context");
+ expect(textBookmark.listIds).toEqual(["Imported/Personal"]);
+
+ // Verify bookmark with empty paths gets root list ID
+ const noCategoryBookmark = stagedBookmarks.find(
+ (b) => b.url === "https://example.com/misc",
);
expect(noCategoryBookmark).toBeDefined();
- // Find the corresponding list assignment for this bookmark
- const noCategoryBookmarkId = `bookmark-${createdBookmarks.indexOf(noCategoryBookmark!) + 1}`;
- const listAssignment = addedToLists.find(
- (a) => a.bookmarkId === noCategoryBookmarkId,
- );
- expect(listAssignment!.listIds).toEqual(["Imported"]);
+ expect(noCategoryBookmark!.listIds).toEqual(["Imported"]);
+
+ // Verify finalizeImportStaging was called
+ expect(finalizeImportStaging).toHaveBeenCalledWith("session-1");
- // Verify that tags were updated for bookmarks that have tags
- expect(updatedTags.length).toBeGreaterThan(0);
expect(progress).toContain(0);
expect(progress.at(-1)).toBe(1);
});
@@ -167,9 +165,8 @@ describe("importBookmarksFromFile", () => {
rootListName: "Imported",
deps: {
createList: vi.fn(),
- createBookmark: vi.fn(),
- addBookmarkToLists: vi.fn(),
- updateBookmarkTags: vi.fn(),
+ stageImportedBookmarks: vi.fn(),
+ finalizeImportStaging: vi.fn(),
createImportSession: vi.fn(async () => ({ id: "session-1" })),
},
},
@@ -182,29 +179,29 @@ describe("importBookmarksFromFile", () => {
});
});
- it("continues import when individual bookmarks fail", async () => {
+ it("stages all bookmarks successfully", async () => {
const parsers = {
pocket: vi.fn().mockReturnValue([
{
- title: "Success Bookmark 1",
- content: { type: "link", url: "https://example.com/success1" },
- tags: ["success"],
+ title: "Bookmark 1",
+ content: { type: "link", url: "https://example.com/1" },
+ tags: ["tag1"],
addDate: 100,
- paths: [["Success"]],
+ paths: [["Category1"]],
},
{
- title: "Failure Bookmark",
- content: { type: "link", url: "https://example.com/failure" },
- tags: ["failure"],
+ title: "Bookmark 2",
+ content: { type: "link", url: "https://example.com/2" },
+ tags: ["tag2"],
addDate: 200,
- paths: [["Failure"]],
+ paths: [["Category2"]],
},
{
- title: "Success Bookmark 2",
- content: { type: "link", url: "https://example.com/success2" },
- tags: ["success"],
+ title: "Bookmark 3",
+ content: { type: "link", url: "https://example.com/3" },
+ tags: ["tag3"],
addDate: 300,
- paths: [["Success"]],
+ paths: [["Category1"]],
},
]),
};
@@ -220,37 +217,23 @@ describe("importBookmarksFromFile", () => {
},
);
- const createdBookmarks: ParsedBookmark[] = [];
- const addedToLists: { bookmarkId: string; listIds: string[] }[] = [];
- const updatedTags: { bookmarkId: string; tags: string[] }[] = [];
-
- const createBookmark = vi.fn(async (bookmark: ParsedBookmark) => {
- // Simulate failure for the "Failure Bookmark"
- if (bookmark.title === "Failure Bookmark") {
- throw new Error("Simulated bookmark creation failure");
- }
-
- createdBookmarks.push(bookmark);
- return {
- id: `bookmark-${createdBookmarks.length}`,
- alreadyExists: false,
- };
- });
-
- const addBookmarkToLists = vi.fn(
- async (input: { bookmarkId: string; listIds: string[] }) => {
- addedToLists.push(input);
+ const stagedBookmarks: StagedBookmark[] = [];
+ const stageImportedBookmarks = vi.fn(
+ async (input: {
+ importSessionId: string;
+ bookmarks: StagedBookmark[];
+ }) => {
+ stagedBookmarks.push(...input.bookmarks);
},
);
- const updateBookmarkTags = vi.fn(
- async (input: { bookmarkId: string; tags: string[] }) => {
- updatedTags.push(input);
- },
+ const finalizeImportStaging = vi.fn();
+ const createImportSession = vi.fn(
+ async (_input: { name: string; rootListId: string }) => ({
+ id: "session-1",
+ }),
);
- const createImportSession = vi.fn(async () => ({ id: "session-1" }));
-
const progress: number[] = [];
const res = await importBookmarksFromFile(
{
@@ -259,9 +242,8 @@ describe("importBookmarksFromFile", () => {
rootListName: "Imported",
deps: {
createList,
- createBookmark,
- addBookmarkToLists,
- updateBookmarkTags,
+ stageImportedBookmarks,
+ finalizeImportStaging,
createImportSession,
},
onProgress: (d, t) => progress.push(d / t),
@@ -269,63 +251,57 @@ describe("importBookmarksFromFile", () => {
{ parsers },
);
- // Should still create the root list
expect(res.rootListId).toBe("Imported");
-
- // Should track both successes and failures
+ expect(res.importSessionId).toBe("session-1");
expect(res.counts).toEqual({
- successes: 2, // Two successful bookmarks
- failures: 1, // One failed bookmark
+ successes: 0,
+ failures: 0,
alreadyExisted: 0,
total: 3,
});
- // Should create folders for all bookmarks (including failed ones)
+ // Should create folders for all bookmarks
expect(createdLists).toEqual([
{ name: "Imported", icon: "ā¬†ļø" },
- { name: "Success", parentId: "Imported", icon: "šŸ“" },
- { name: "Failure", parentId: "Imported", icon: "šŸ“" },
+ { name: "Category1", parentId: "Imported", icon: "šŸ“" },
+ { name: "Category2", parentId: "Imported", icon: "šŸ“" },
]);
- // Only successful bookmarks should be created
- expect(createdBookmarks).toHaveLength(2);
- expect(createdBookmarks.map((b) => b.title)).toEqual([
- "Success Bookmark 1",
- "Success Bookmark 2",
- ]);
+ // All bookmarks should be staged (in 1 batch since < 50)
+ expect(stagedBookmarks).toHaveLength(3);
+ expect(stageImportedBookmarks).toHaveBeenCalledTimes(1);
- // Only successful bookmarks should be added to lists and have tags updated
- expect(addedToLists).toHaveLength(2);
- expect(updatedTags).toHaveLength(2);
+ // Verify finalizeImportStaging was called
+ expect(finalizeImportStaging).toHaveBeenCalledWith("session-1");
- // Progress should complete even with failures
+ // Progress should complete
expect(progress).toContain(0);
expect(progress.at(-1)).toBe(1);
});
- it("handles failures in different stages of bookmark import", async () => {
+ it("stages bookmarks with different paths", async () => {
const parsers = {
pocket: vi.fn().mockReturnValue([
{
- title: "Success Bookmark",
- content: { type: "link", url: "https://example.com/success" },
- tags: ["success"],
+ title: "Bookmark 1",
+ content: { type: "link", url: "https://example.com/1" },
+ tags: ["tag1"],
addDate: 100,
- paths: [["Success"]],
+ paths: [["Path1"]],
},
{
- title: "Fail at List Assignment",
- content: { type: "link", url: "https://example.com/fail-list" },
- tags: ["fail"],
+ title: "Bookmark 2",
+ content: { type: "link", url: "https://example.com/2" },
+ tags: ["tag2"],
addDate: 200,
- paths: [["Failure"]],
+ paths: [["Path2"]],
},
{
- title: "Fail at Tag Update",
- content: { type: "link", url: "https://example.com/fail-tag" },
- tags: ["fail-tag"],
+ title: "Bookmark 3",
+ content: { type: "link", url: "https://example.com/3" },
+ tags: ["tag3"],
addDate: 300,
- paths: [["Failure"]],
+ paths: [["Path2"]],
},
]),
};
@@ -338,31 +314,23 @@ describe("importBookmarksFromFile", () => {
},
);
- let bookmarkIdCounter = 1;
- const createBookmark = vi.fn(async () => {
- return { id: `bookmark-${bookmarkIdCounter++}`, alreadyExists: false };
- });
-
- const addBookmarkToLists = vi.fn(
- async (input: { bookmarkId: string; listIds: string[] }) => {
- // Simulate failure for specific bookmark
- if (input.bookmarkId === "bookmark-2") {
- throw new Error("Failed to add bookmark to lists");
- }
+ const stagedBookmarks: StagedBookmark[] = [];
+ const stageImportedBookmarks = vi.fn(
+ async (input: {
+ importSessionId: string;
+ bookmarks: StagedBookmark[];
+ }) => {
+ stagedBookmarks.push(...input.bookmarks);
},
);
- const updateBookmarkTags = vi.fn(
- async (input: { bookmarkId: string; tags: string[] }) => {
- // Simulate failure for specific bookmark
- if (input.bookmarkId === "bookmark-3") {
- throw new Error("Failed to update bookmark tags");
- }
- },
+ const finalizeImportStaging = vi.fn();
+ const createImportSession = vi.fn(
+ async (_input: { name: string; rootListId: string }) => ({
+ id: "session-1",
+ }),
);
- const createImportSession = vi.fn(async () => ({ id: "session-1" }));
-
const progress: number[] = [];
const res = await importBookmarksFromFile(
{
@@ -371,9 +339,8 @@ describe("importBookmarksFromFile", () => {
rootListName: "Imported",
deps: {
createList,
- createBookmark,
- addBookmarkToLists,
- updateBookmarkTags,
+ stageImportedBookmarks,
+ finalizeImportStaging,
createImportSession,
},
onProgress: (d, t) => progress.push(d / t),
@@ -383,23 +350,110 @@ describe("importBookmarksFromFile", () => {
expect(res.rootListId).toBe("Imported");
expect(res.importSessionId).toBe("session-1");
-
- // All bookmarks are created successfully, but 2 fail in post-processing
expect(res.counts).toEqual({
- successes: 1, // Only one fully successful bookmark
- failures: 2, // Two failed in post-processing steps
+ successes: 0,
+ failures: 0,
alreadyExisted: 0,
total: 3,
});
- // All bookmarks should be created (failures happen after bookmark creation)
- expect(createBookmark).toHaveBeenCalledTimes(3);
+ // All bookmarks should be staged (in 1 batch since < 50)
+ expect(stagedBookmarks).toHaveLength(3);
+ expect(stageImportedBookmarks).toHaveBeenCalledTimes(1);
+
+ // Verify finalizeImportStaging was called
+ expect(finalizeImportStaging).toHaveBeenCalledWith("session-1");
+ });
+
+ it("handles HTML bookmarks with empty folder names", async () => {
+ const htmlContent = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3 ADD_DATE="1765995928" LAST_MODIFIED="1765995928">Bluetooth Fernbedienung</H3>
+ <DL><p>
+ <DT><H3 ADD_DATE="1765995928" LAST_MODIFIED="0"></H3>
+ <DL><p>
+ <DT><A HREF="https://www.example.com/product.html" ADD_DATE="1593444456">Example Product</A>
+ </DL><p>
+ </DL><p>
+</DL><p>`;
+
+ const mockFile = {
+ text: vi.fn().mockResolvedValue(htmlContent),
+ } as unknown as File;
+
+ const createdLists: { name: string; icon: string; parentId?: string }[] =
+ [];
+ const createList = vi.fn(
+ async (input: { name: string; icon: string; parentId?: string }) => {
+ createdLists.push(input);
+ return {
+ id: `${input.parentId ? input.parentId + "/" : ""}${input.name}`,
+ };
+ },
+ );
+
+ const stagedBookmarks: StagedBookmark[] = [];
+ const stageImportedBookmarks = vi.fn(
+ async (input: {
+ importSessionId: string;
+ bookmarks: StagedBookmark[];
+ }) => {
+ stagedBookmarks.push(...input.bookmarks);
+ },
+ );
+
+ const finalizeImportStaging = vi.fn();
+ const createImportSession = vi.fn(
+ async (_input: { name: string; rootListId: string }) => ({
+ id: "session-1",
+ }),
+ );
+
+ const res = await importBookmarksFromFile({
+ file: mockFile,
+ source: "html",
+ rootListName: "HTML Import",
+ deps: {
+ createList,
+ stageImportedBookmarks,
+ finalizeImportStaging,
+ createImportSession,
+ },
+ });
+
+ expect(res.counts).toEqual({
+ successes: 0,
+ failures: 0,
+ alreadyExisted: 0,
+ total: 1,
+ });
+
+ // Verify that the empty folder name was replaced with "Unnamed"
+ expect(createdLists).toEqual([
+ { name: "HTML Import", icon: "ā¬†ļø" },
+ { name: "Bluetooth Fernbedienung", parentId: "HTML Import", icon: "šŸ“" },
+ {
+ name: "Unnamed",
+ parentId: "HTML Import/Bluetooth Fernbedienung",
+ icon: "šŸ“",
+ },
+ ]);
- // addBookmarkToLists should be called 3 times (but one fails)
- expect(addBookmarkToLists).toHaveBeenCalledTimes(3);
+ // Verify the bookmark was staged with correct listIds
+ expect(stagedBookmarks).toHaveLength(1);
+ expect(stagedBookmarks[0]).toMatchObject({
+ title: "Example Product",
+ url: "https://www.example.com/product.html",
+ type: "link",
+ tags: [],
+ listIds: ["HTML Import/Bluetooth Fernbedienung/Unnamed"],
+ });
- // updateBookmarkTags should be called 2 times (once fails at list assignment, one fails at tag update)
- expect(updateBookmarkTags).toHaveBeenCalledTimes(2);
+ // Verify finalizeImportStaging was called
+ expect(finalizeImportStaging).toHaveBeenCalledWith("session-1");
});
it("parses mymind CSV export correctly", async () => {
@@ -413,14 +467,22 @@ describe("importBookmarksFromFile", () => {
text: vi.fn().mockResolvedValue(mymindCsv),
} as unknown as File;
- const createdBookmarks: ParsedBookmark[] = [];
- const createBookmark = vi.fn(async (bookmark: ParsedBookmark) => {
- createdBookmarks.push(bookmark);
- return {
- id: `bookmark-${createdBookmarks.length}`,
- alreadyExists: false,
- };
- });
+ const stagedBookmarks: StagedBookmark[] = [];
+ const stageImportedBookmarks = vi.fn(
+ async (input: {
+ importSessionId: string;
+ bookmarks: StagedBookmark[];
+ }) => {
+ stagedBookmarks.push(...input.bookmarks);
+ },
+ );
+
+ const finalizeImportStaging = vi.fn();
+ const createImportSession = vi.fn(
+ async (_input: { name: string; rootListId: string }) => ({
+ id: "session-1",
+ }),
+ );
const res = await importBookmarksFromFile({
file: mockFile,
@@ -432,52 +494,54 @@ describe("importBookmarksFromFile", () => {
id: `${input.parentId ? input.parentId + "/" : ""}${input.name}`,
}),
),
- createBookmark,
- addBookmarkToLists: vi.fn(),
- updateBookmarkTags: vi.fn(),
- createImportSession: vi.fn(async () => ({ id: "session-1" })),
+ stageImportedBookmarks,
+ finalizeImportStaging,
+ createImportSession,
},
});
expect(res.counts).toEqual({
- successes: 3,
+ successes: 0,
failures: 0,
alreadyExisted: 0,
total: 3,
});
- // Verify first bookmark (WebPage with URL)
- expect(createdBookmarks[0]).toMatchObject({
+ // Verify 3 bookmarks were staged
+ expect(stagedBookmarks).toHaveLength(3);
+
+ // Verify first bookmark (WebPage with URL) - mymind has no paths, so root list
+ expect(stagedBookmarks[0]).toMatchObject({
title: "mymind",
- content: {
- type: "link",
- url: "https://access.mymind.com/everything",
- },
+ url: "https://access.mymind.com/everything",
+ type: "link",
tags: ["Wellness", "Self-Improvement", "Psychology"],
+ listIds: ["mymind Import"],
});
- expect(createdBookmarks[0].addDate).toBeCloseTo(
- new Date("2024-12-04T23:02:10Z").getTime() / 1000,
+ expect(stagedBookmarks[0].sourceAddedAt).toEqual(
+ new Date("2024-12-04T23:02:10Z"),
);
// Verify second bookmark (WebPage with note)
- expect(createdBookmarks[1]).toMatchObject({
+ expect(stagedBookmarks[1]).toMatchObject({
title: "Movies / TV / Anime",
- content: {
- type: "link",
- url: "https://fmhy.pages.dev/videopiracyguide",
- },
+ url: "https://fmhy.pages.dev/videopiracyguide",
+ type: "link",
tags: ["Tools", "media", "Entertainment"],
- notes: "Free Media!",
+ note: "Free Media!",
+ listIds: ["mymind Import"],
});
// Verify third bookmark (Note with text content)
- expect(createdBookmarks[2]).toMatchObject({
+ expect(stagedBookmarks[2]).toMatchObject({
title: "",
- content: {
- type: "text",
- text: "• Critical Thinking\n• Empathy",
- },
+ content: "• Critical Thinking\n• Empathy",
+ type: "text",
tags: [],
+ listIds: ["mymind Import"],
});
+
+ // Verify finalizeImportStaging was called
+ expect(finalizeImportStaging).toHaveBeenCalledWith("session-1");
});
});
diff --git a/packages/shared/import-export/importer.ts b/packages/shared/import-export/importer.ts
index b32c49c1..be24ca73 100644
--- a/packages/shared/import-export/importer.ts
+++ b/packages/shared/import-export/importer.ts
@@ -1,4 +1,3 @@
-import { limitConcurrency } from "../concurrency";
import { MAX_LIST_NAME_LENGTH } from "../types/lists";
import { ImportSource, ParsedBookmark, parseImportFile } from "./parsers";
@@ -9,28 +8,32 @@ export interface ImportCounts {
total: number;
}
+export interface StagedBookmark {
+ type: "link" | "text" | "asset";
+ url?: string;
+ title?: string;
+ content?: string;
+ note?: string;
+ tags: string[];
+ listIds: string[];
+ sourceAddedAt?: Date;
+}
+
export interface ImportDeps {
createList: (input: {
name: string;
icon: string;
parentId?: string;
}) => Promise<{ id: string }>;
- createBookmark: (
- bookmark: ParsedBookmark,
- sessionId: string,
- ) => Promise<{ id: string; alreadyExists?: boolean }>;
- addBookmarkToLists: (input: {
- bookmarkId: string;
- listIds: string[];
- }) => Promise<void>;
- updateBookmarkTags: (input: {
- bookmarkId: string;
- tags: string[];
+ stageImportedBookmarks: (input: {
+ importSessionId: string;
+ bookmarks: StagedBookmark[];
}) => Promise<void>;
createImportSession: (input: {
name: string;
rootListId: string;
}) => Promise<{ id: string }>;
+ finalizeImportStaging: (sessionId: string) => Promise<void>;
}
export interface ImportOptions {
@@ -62,7 +65,7 @@ export async function importBookmarksFromFile(
},
options: ImportOptions = {},
): Promise<ImportResult> {
- const { concurrencyLimit = 20, parsers } = options;
+ const { parsers } = options;
const textContent = await file.text();
const parsedBookmarks = parsers?.[source]
@@ -120,50 +123,74 @@ export async function importBookmarksFromFile(
pathMap[pathKey] = folderList.id;
}
- let done = 0;
- const importPromises = parsedBookmarks.map((bookmark) => async () => {
- try {
- const listIds = bookmark.paths.map(
- (path) => pathMap[path.join(PATH_DELIMITER)] || rootList.id,
- );
- if (listIds.length === 0) listIds.push(rootList.id);
+ // Prepare all bookmarks for staging
+ const bookmarksToStage: StagedBookmark[] = parsedBookmarks.map((bookmark) => {
+ // Convert paths to list IDs using pathMap
+ // If no paths, assign to root list
+ const listIds =
+ bookmark.paths.length === 0
+ ? [rootList.id]
+ : bookmark.paths
+ .map((path) => {
+ if (path.length === 0) {
+ return rootList.id;
+ }
+ const pathKey = path.join(PATH_DELIMITER);
+ return pathMap[pathKey] || rootList.id;
+ })
+ .filter((id, index, arr) => arr.indexOf(id) === index); // dedupe
- const created = await deps.createBookmark(bookmark, session.id);
- await deps.addBookmarkToLists({ bookmarkId: created.id, listIds });
- if (bookmark.tags && bookmark.tags.length > 0) {
- await deps.updateBookmarkTags({
- bookmarkId: created.id,
- tags: bookmark.tags,
- });
- }
+ // Determine type and extract content appropriately
+ let type: "link" | "text" | "asset" = "link";
+ let url: string | undefined;
+ let textContent: string | undefined;
- return created;
- } finally {
- done += 1;
- onProgress?.(done, parsedBookmarks.length);
+ if (bookmark.content) {
+ if (bookmark.content.type === "link") {
+ type = "link";
+ url = bookmark.content.url;
+ } else if (bookmark.content.type === "text") {
+ type = "text";
+ textContent = bookmark.content.text;
+ }
}
- });
- const resultsPromises = limitConcurrency(importPromises, concurrencyLimit);
- const results = await Promise.allSettled(resultsPromises);
+ return {
+ type,
+ url,
+ title: bookmark.title,
+ content: textContent,
+ note: bookmark.notes,
+ tags: bookmark.tags ?? [],
+ listIds,
+ sourceAddedAt: bookmark.addDate
+ ? new Date(bookmark.addDate * 1000)
+ : undefined,
+ };
+ });
- let successes = 0;
- let failures = 0;
- let alreadyExisted = 0;
+ // Stage bookmarks in batches of 50
+ const BATCH_SIZE = 50;
+ let staged = 0;
- for (const r of results) {
- if (r.status === "fulfilled") {
- if (r.value.alreadyExists) alreadyExisted++;
- else successes++;
- } else {
- failures++;
- }
+ for (let i = 0; i < bookmarksToStage.length; i += BATCH_SIZE) {
+ const batch = bookmarksToStage.slice(i, i + BATCH_SIZE);
+ await deps.stageImportedBookmarks({
+ importSessionId: session.id,
+ bookmarks: batch,
+ });
+ staged += batch.length;
+ onProgress?.(staged, parsedBookmarks.length);
}
+
+ // Finalize staging - marks session as "pending" for worker pickup
+ await deps.finalizeImportStaging(session.id);
+
return {
counts: {
- successes,
- failures,
- alreadyExisted,
+ successes: 0,
+ failures: 0,
+ alreadyExisted: 0,
total: parsedBookmarks.length,
},
rootListId: rootList.id,
diff --git a/packages/shared/import-export/parsers.test.ts b/packages/shared/import-export/parsers.test.ts
new file mode 100644
index 00000000..18502305
--- /dev/null
+++ b/packages/shared/import-export/parsers.test.ts
@@ -0,0 +1,301 @@
+import { describe, expect, it } from "vitest";
+
+import { parseImportFile } from "./parsers";
+
+describe("parseNetscapeBookmarkFile", () => {
+ it("parses a simple bookmark file with single bookmark", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567890">Example Site</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0]).toMatchObject({
+ title: "Example Site",
+ content: {
+ type: "link",
+ url: "https://example.com",
+ },
+ tags: [],
+ addDate: 1234567890,
+ paths: [[]],
+ });
+ });
+
+ it("parses bookmarks with tags", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567890" TAGS="tag1,tag2,tag3">Example Site</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].tags).toEqual(["tag1", "tag2", "tag3"]);
+ });
+
+ it("parses bookmarks in nested folders", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3 ADD_DATE="1234567890" LAST_MODIFIED="1234567891">Folder1</H3>
+ <DL><p>
+ <DT><H3 ADD_DATE="1234567892" LAST_MODIFIED="1234567893">Folder2</H3>
+ <DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567894">Nested Bookmark</A>
+ </DL><p>
+ </DL><p>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0]).toMatchObject({
+ title: "Nested Bookmark",
+ content: {
+ type: "link",
+ url: "https://example.com",
+ },
+ paths: [["Folder1", "Folder2"]],
+ });
+ });
+
+ it("handles empty folder names by replacing with 'Unnamed'", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3 ADD_DATE="1234567890" LAST_MODIFIED="1234567891">Named Folder</H3>
+ <DL><p>
+ <DT><H3 ADD_DATE="1234567892" LAST_MODIFIED="0"></H3>
+ <DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567894">Bookmark</A>
+ </DL><p>
+ </DL><p>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].paths).toEqual([["Named Folder", "Unnamed"]]);
+ });
+
+ it("parses multiple bookmarks in different folders", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3 ADD_DATE="1234567890">Tech</H3>
+ <DL><p>
+ <DT><A HREF="https://github.com" ADD_DATE="1234567891">GitHub</A>
+ <DT><A HREF="https://stackoverflow.com" ADD_DATE="1234567892">Stack Overflow</A>
+ </DL><p>
+ <DT><H3 ADD_DATE="1234567893">News</H3>
+ <DL><p>
+ <DT><A HREF="https://news.ycombinator.com" ADD_DATE="1234567894">Hacker News</A>
+ </DL><p>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(3);
+
+ expect(result[0]).toMatchObject({
+ title: "GitHub",
+ content: { type: "link", url: "https://github.com" },
+ paths: [["Tech"]],
+ });
+
+ expect(result[1]).toMatchObject({
+ title: "Stack Overflow",
+ content: { type: "link", url: "https://stackoverflow.com" },
+ paths: [["Tech"]],
+ });
+
+ expect(result[2]).toMatchObject({
+ title: "Hacker News",
+ content: { type: "link", url: "https://news.ycombinator.com" },
+ paths: [["News"]],
+ });
+ });
+
+ it("parses bookmarks at root level (no folders)", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://example1.com" ADD_DATE="1234567890">Bookmark 1</A>
+ <DT><A HREF="https://example2.com" ADD_DATE="1234567891">Bookmark 2</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(2);
+ expect(result[0].paths).toEqual([[]]);
+ expect(result[1].paths).toEqual([[]]);
+ });
+
+ it("handles deeply nested folder structures", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3>Level1</H3>
+ <DL><p>
+ <DT><H3>Level2</H3>
+ <DL><p>
+ <DT><H3>Level3</H3>
+ <DL><p>
+ <DT><H3>Level4</H3>
+ <DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567890">Deep Bookmark</A>
+ </DL><p>
+ </DL><p>
+ </DL><p>
+ </DL><p>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].paths).toEqual([["Level1", "Level2", "Level3", "Level4"]]);
+ });
+
+ it("deduplicates bookmarks with the same URL", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3>Folder1</H3>
+ <DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567890" TAGS="tag1">First Instance</A>
+ </DL><p>
+ <DT><H3>Folder2</H3>
+ <DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567891" TAGS="tag2">Second Instance</A>
+ </DL><p>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0]).toMatchObject({
+ content: { type: "link", url: "https://example.com" },
+ tags: ["tag1", "tag2"],
+ addDate: 1234567890, // Should keep the earlier date
+ });
+ expect(result[0].paths).toHaveLength(2);
+ expect(result[0].paths).toContainEqual(["Folder1"]);
+ expect(result[0].paths).toContainEqual(["Folder2"]);
+ });
+
+ it("merges notes from duplicate bookmarks", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567890">Bookmark</A>
+ <DD>First note
+ <DT><A HREF="https://example.com" ADD_DATE="1234567891">Bookmark</A>
+ <DD>Second note
+</DL><p>`;
+
+ // Note: The current parser doesn't extract DD notes, but this test
+ // documents the expected behavior if/when DD parsing is added
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].content).toMatchObject({
+ type: "link",
+ url: "https://example.com",
+ });
+ });
+
+ it("handles bookmarks without ADD_DATE attribute", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://example.com">No Date Bookmark</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].addDate).toBeUndefined();
+ });
+
+ it("handles bookmarks without HREF attribute", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A ADD_DATE="1234567890">No URL Bookmark</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].content).toBeUndefined();
+ });
+
+ it("handles mixed structure with folders and root-level bookmarks", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://root1.com" ADD_DATE="1234567890">Root Bookmark 1</A>
+ <DT><H3>Folder</H3>
+ <DL><p>
+ <DT><A HREF="https://folder1.com" ADD_DATE="1234567891">Folder Bookmark</A>
+ </DL><p>
+ <DT><A HREF="https://root2.com" ADD_DATE="1234567892">Root Bookmark 2</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(3);
+ expect(result[0]).toMatchObject({
+ title: "Root Bookmark 1",
+ paths: [[]],
+ });
+ expect(result[1]).toMatchObject({
+ title: "Folder Bookmark",
+ paths: [["Folder"]],
+ });
+ expect(result[2]).toMatchObject({
+ title: "Root Bookmark 2",
+ paths: [[]],
+ });
+ });
+
+ it("throws error for non-Netscape bookmark files", () => {
+ const html = `<html>
+<head><title>Not a bookmark file</title></head>
+<body>Just a regular HTML file</body>
+</html>`;
+
+ expect(() => parseImportFile("html", html)).toThrow(
+ "The uploaded html file does not seem to be a bookmark file",
+ );
+ });
+});
diff --git a/packages/shared/import-export/parsers.ts b/packages/shared/import-export/parsers.ts
index f4d3f862..24d85c80 100644
--- a/packages/shared/import-export/parsers.ts
+++ b/packages/shared/import-export/parsers.ts
@@ -1,5 +1,6 @@
// Copied from https://gist.github.com/devster31/4e8c6548fd16ffb75c02e6f24e27f9b9
+import type { AnyNode } from "domhandler";
import * as cheerio from "cheerio";
import { parse } from "csv-parse/sync";
import { z } from "zod";
@@ -10,11 +11,13 @@ import { zExportSchema } from "./exporters";
export type ImportSource =
| "html"
| "pocket"
+ | "matter"
| "omnivore"
| "karakeep"
| "linkwarden"
| "tab-session-manager"
- | "mymind";
+ | "mymind"
+ | "instapaper";
export interface ParsedBookmark {
title: string;
@@ -34,41 +37,58 @@ function parseNetscapeBookmarkFile(textContent: string): ParsedBookmark[] {
}
const $ = cheerio.load(textContent);
+ const bookmarks: ParsedBookmark[] = [];
- return $("a")
- .map(function (_index, a) {
- const $a = $(a);
- const addDate = $a.attr("add_date");
- let tags: string[] = [];
+ // Recursively traverse the bookmark hierarchy top-down
+ function traverseFolder(
+ element: cheerio.Cheerio<AnyNode>,
+ currentPath: string[],
+ ) {
+ element.children().each((_index, child) => {
+ const $child = $(child);
- const tagsStr = $a.attr("tags");
- try {
- tags = tagsStr && tagsStr.length > 0 ? tagsStr.split(",") : [];
- } catch {
- /* empty */
- }
- const url = $a.attr("href");
+ // Check if this is a folder (DT with H3)
+ const h3 = $child.children("h3").first();
+ if (h3.length > 0) {
+ const folderName = h3.text().trim() || "Unnamed";
+ const newPath = [...currentPath, folderName];
+
+ // Find the DL that follows this folder and recurse into it
+ const dl = $child.children("dl").first();
+ if (dl.length > 0) {
+ traverseFolder(dl, newPath);
+ }
+ } else {
+ // Check if this is a bookmark (DT with A)
+ const anchor = $child.children("a").first();
+ if (anchor.length > 0) {
+ const addDate = anchor.attr("add_date");
+ const tagsStr = anchor.attr("tags");
+ const tags = tagsStr && tagsStr.length > 0 ? tagsStr.split(",") : [];
+ const url = anchor.attr("href");
- // Build folder path by traversing up the hierarchy
- const path: string[] = [];
- let current = $a.parent();
- while (current && current.length > 0) {
- const h3 = current.find("> h3").first();
- if (h3.length > 0) {
- path.unshift(h3.text());
+ bookmarks.push({
+ title: anchor.text(),
+ content: url
+ ? { type: BookmarkTypes.LINK as const, url }
+ : undefined,
+ tags,
+ addDate:
+ typeof addDate === "undefined" ? undefined : parseInt(addDate),
+ paths: [currentPath],
+ });
}
- current = current.parent();
}
+ });
+ }
- return {
- title: $a.text(),
- content: url ? { type: BookmarkTypes.LINK as const, url } : undefined,
- tags,
- addDate: typeof addDate === "undefined" ? undefined : parseInt(addDate),
- paths: [path],
- };
- })
- .get();
+ // Start traversal from the root DL element
+ const rootDl = $("dl").first();
+ if (rootDl.length > 0) {
+ traverseFolder(rootDl, []);
+ }
+
+ return bookmarks;
}
function parsePocketBookmarkFile(textContent: string): ParsedBookmark[] {
@@ -95,6 +115,52 @@ function parsePocketBookmarkFile(textContent: string): ParsedBookmark[] {
});
}
+function parseMatterBookmarkFile(textContent: string): ParsedBookmark[] {
+ const zMatterRecordSchema = z.object({
+ Title: z.string(),
+ Author: z.string(),
+ Publisher: z.string(),
+ URL: z.string(),
+ Tags: z
+ .string()
+ .transform((tags) => (tags.length > 0 ? tags.split(";") : [])),
+ "Word Count": z.string(),
+ "In Queue": z.string().transform((inQueue) => inQueue === "False"),
+ Favorited: z.string(),
+ Read: z.string(),
+ Highlight_Count: z.string(),
+ "Last Interaction Date": z
+ .string()
+ .transform((date) => Date.parse(date) / 1000),
+ "File Id": z.string(),
+ });
+
+ const zMatterExportSchema = z.array(zMatterRecordSchema);
+
+ const records = parse(textContent, {
+ columns: true,
+ skip_empty_lines: true,
+ });
+
+ const parsed = zMatterExportSchema.safeParse(records);
+ if (!parsed.success) {
+ throw new Error(
+ `The uploaded CSV file contains an invalid Matter bookmark file: ${parsed.error.toString()}`,
+ );
+ }
+
+ return parsed.data.map((record) => {
+ return {
+ title: record.Title,
+ content: { type: BookmarkTypes.LINK as const, url: record.URL },
+ tags: record.Tags,
+ addDate: record["Last Interaction Date"],
+ archived: record["In Queue"],
+ paths: [], // TODO
+ };
+ });
+}
+
function parseKarakeepBookmarkFile(textContent: string): ParsedBookmark[] {
const parsed = zExportSchema.safeParse(JSON.parse(textContent));
if (!parsed.success) {
@@ -292,6 +358,64 @@ function parseMymindBookmarkFile(textContent: string): ParsedBookmark[] {
});
}
+function parseInstapaperBookmarkFile(textContent: string): ParsedBookmark[] {
+ const zInstapaperRecordScheme = z.object({
+ URL: z.string(),
+ Title: z.string(),
+ Selection: z.string(),
+ Folder: z.string(),
+ Timestamp: z.string(),
+ Tags: z.string(),
+ });
+
+ const zInstapaperExportScheme = z.array(zInstapaperRecordScheme);
+
+ const record = parse(textContent, {
+ columns: true,
+ skip_empty_lines: true,
+ });
+
+ const parsed = zInstapaperExportScheme.safeParse(record);
+
+ if (!parsed.success) {
+ throw new Error(
+ `CSV file contains an invalid instapaper bookmark file: ${parsed.error.toString()}`,
+ );
+ }
+
+ return parsed.data.map((record) => {
+ let content: ParsedBookmark["content"];
+ if (record.URL && record.URL.trim().length > 0) {
+ content = { type: BookmarkTypes.LINK as const, url: record.URL.trim() };
+ } else if (record.Selection && record.Selection.trim().length > 0) {
+ content = {
+ type: BookmarkTypes.TEXT as const,
+ text: record.Selection.trim(),
+ };
+ }
+
+ const addDate = parseInt(record.Timestamp);
+
+ let tags: string[] = [];
+ try {
+ const parsedTags = JSON.parse(record.Tags);
+ if (Array.isArray(parsedTags)) {
+ tags = parsedTags.map((tag) => tag.toString().trim());
+ }
+ } catch {
+ tags = [];
+ }
+
+ return {
+ title: record.Title || "",
+ content,
+ addDate,
+ tags,
+ paths: [], // TODO
+ };
+ });
+}
+
function deduplicateBookmarks(bookmarks: ParsedBookmark[]): ParsedBookmark[] {
const deduplicatedBookmarksMap = new Map<string, ParsedBookmark>();
const textBookmarks: ParsedBookmark[] = [];
@@ -345,6 +469,9 @@ export function parseImportFile(
case "pocket":
result = parsePocketBookmarkFile(textContent);
break;
+ case "matter":
+ result = parseMatterBookmarkFile(textContent);
+ break;
case "karakeep":
result = parseKarakeepBookmarkFile(textContent);
break;
@@ -360,6 +487,9 @@ export function parseImportFile(
case "mymind":
result = parseMymindBookmarkFile(textContent);
break;
+ case "instapaper":
+ result = parseInstapaperBookmarkFile(textContent);
+ break;
}
return deduplicateBookmarks(result);
}