aboutsummaryrefslogtreecommitdiffstats
path: root/packages/shared/import-export/parsers.test.ts
diff options
context:
space:
mode:
authorMohamed Bassem <me@mbassem.com>2026-01-03 10:39:45 +0000
committerGitHub <noreply@github.com>2026-01-03 10:39:45 +0000
commit6fe20639702e3eb81bd262075094fb5d1f7033b9 (patch)
tree9f6ee471afe159158184dfbad9d61bee236307b8 /packages/shared/import-export/parsers.test.ts
parent1af9b9ddf69cc7215d10e2f0713123756b36077b (diff)
downloadkarakeep-6fe20639702e3eb81bd262075094fb5d1f7033b9.tar.zst
fix: Eliminate the O(n2) parsing of the netscape import parsing (#2338)
* fix: Eliminate the O(n2) parsing of the netscape import parsing * remove unneeded tests
Diffstat (limited to 'packages/shared/import-export/parsers.test.ts')
-rw-r--r--packages/shared/import-export/parsers.test.ts301
1 files changed, 301 insertions, 0 deletions
diff --git a/packages/shared/import-export/parsers.test.ts b/packages/shared/import-export/parsers.test.ts
new file mode 100644
index 00000000..18502305
--- /dev/null
+++ b/packages/shared/import-export/parsers.test.ts
@@ -0,0 +1,301 @@
+import { describe, expect, it } from "vitest";
+
+import { parseImportFile } from "./parsers";
+
+describe("parseNetscapeBookmarkFile", () => {
+ it("parses a simple bookmark file with single bookmark", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567890">Example Site</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0]).toMatchObject({
+ title: "Example Site",
+ content: {
+ type: "link",
+ url: "https://example.com",
+ },
+ tags: [],
+ addDate: 1234567890,
+ paths: [[]],
+ });
+ });
+
+ it("parses bookmarks with tags", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567890" TAGS="tag1,tag2,tag3">Example Site</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].tags).toEqual(["tag1", "tag2", "tag3"]);
+ });
+
+ it("parses bookmarks in nested folders", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3 ADD_DATE="1234567890" LAST_MODIFIED="1234567891">Folder1</H3>
+ <DL><p>
+ <DT><H3 ADD_DATE="1234567892" LAST_MODIFIED="1234567893">Folder2</H3>
+ <DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567894">Nested Bookmark</A>
+ </DL><p>
+ </DL><p>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0]).toMatchObject({
+ title: "Nested Bookmark",
+ content: {
+ type: "link",
+ url: "https://example.com",
+ },
+ paths: [["Folder1", "Folder2"]],
+ });
+ });
+
+ it("handles empty folder names by replacing with 'Unnamed'", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3 ADD_DATE="1234567890" LAST_MODIFIED="1234567891">Named Folder</H3>
+ <DL><p>
+ <DT><H3 ADD_DATE="1234567892" LAST_MODIFIED="0"></H3>
+ <DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567894">Bookmark</A>
+ </DL><p>
+ </DL><p>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].paths).toEqual([["Named Folder", "Unnamed"]]);
+ });
+
+ it("parses multiple bookmarks in different folders", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3 ADD_DATE="1234567890">Tech</H3>
+ <DL><p>
+ <DT><A HREF="https://github.com" ADD_DATE="1234567891">GitHub</A>
+ <DT><A HREF="https://stackoverflow.com" ADD_DATE="1234567892">Stack Overflow</A>
+ </DL><p>
+ <DT><H3 ADD_DATE="1234567893">News</H3>
+ <DL><p>
+ <DT><A HREF="https://news.ycombinator.com" ADD_DATE="1234567894">Hacker News</A>
+ </DL><p>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(3);
+
+ expect(result[0]).toMatchObject({
+ title: "GitHub",
+ content: { type: "link", url: "https://github.com" },
+ paths: [["Tech"]],
+ });
+
+ expect(result[1]).toMatchObject({
+ title: "Stack Overflow",
+ content: { type: "link", url: "https://stackoverflow.com" },
+ paths: [["Tech"]],
+ });
+
+ expect(result[2]).toMatchObject({
+ title: "Hacker News",
+ content: { type: "link", url: "https://news.ycombinator.com" },
+ paths: [["News"]],
+ });
+ });
+
+ it("parses bookmarks at root level (no folders)", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://example1.com" ADD_DATE="1234567890">Bookmark 1</A>
+ <DT><A HREF="https://example2.com" ADD_DATE="1234567891">Bookmark 2</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(2);
+ expect(result[0].paths).toEqual([[]]);
+ expect(result[1].paths).toEqual([[]]);
+ });
+
+ it("handles deeply nested folder structures", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3>Level1</H3>
+ <DL><p>
+ <DT><H3>Level2</H3>
+ <DL><p>
+ <DT><H3>Level3</H3>
+ <DL><p>
+ <DT><H3>Level4</H3>
+ <DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567890">Deep Bookmark</A>
+ </DL><p>
+ </DL><p>
+ </DL><p>
+ </DL><p>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].paths).toEqual([["Level1", "Level2", "Level3", "Level4"]]);
+ });
+
+ it("deduplicates bookmarks with the same URL", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><H3>Folder1</H3>
+ <DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567890" TAGS="tag1">First Instance</A>
+ </DL><p>
+ <DT><H3>Folder2</H3>
+ <DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567891" TAGS="tag2">Second Instance</A>
+ </DL><p>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0]).toMatchObject({
+ content: { type: "link", url: "https://example.com" },
+ tags: ["tag1", "tag2"],
+ addDate: 1234567890, // Should keep the earlier date
+ });
+ expect(result[0].paths).toHaveLength(2);
+ expect(result[0].paths).toContainEqual(["Folder1"]);
+ expect(result[0].paths).toContainEqual(["Folder2"]);
+ });
+
+ it("merges notes from duplicate bookmarks", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://example.com" ADD_DATE="1234567890">Bookmark</A>
+ <DD>First note
+ <DT><A HREF="https://example.com" ADD_DATE="1234567891">Bookmark</A>
+ <DD>Second note
+</DL><p>`;
+
+ // Note: The current parser doesn't extract DD notes, but this test
+ // documents the expected behavior if/when DD parsing is added
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].content).toMatchObject({
+ type: "link",
+ url: "https://example.com",
+ });
+ });
+
+ it("handles bookmarks without ADD_DATE attribute", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://example.com">No Date Bookmark</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].addDate).toBeUndefined();
+ });
+
+ it("handles bookmarks without HREF attribute", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A ADD_DATE="1234567890">No URL Bookmark</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].content).toBeUndefined();
+ });
+
+ it("handles mixed structure with folders and root-level bookmarks", () => {
+ const html = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+<TITLE>Bookmarks</TITLE>
+<H1>Bookmarks</H1>
+<DL><p>
+ <DT><A HREF="https://root1.com" ADD_DATE="1234567890">Root Bookmark 1</A>
+ <DT><H3>Folder</H3>
+ <DL><p>
+ <DT><A HREF="https://folder1.com" ADD_DATE="1234567891">Folder Bookmark</A>
+ </DL><p>
+ <DT><A HREF="https://root2.com" ADD_DATE="1234567892">Root Bookmark 2</A>
+</DL><p>`;
+
+ const result = parseImportFile("html", html);
+
+ expect(result).toHaveLength(3);
+ expect(result[0]).toMatchObject({
+ title: "Root Bookmark 1",
+ paths: [[]],
+ });
+ expect(result[1]).toMatchObject({
+ title: "Folder Bookmark",
+ paths: [["Folder"]],
+ });
+ expect(result[2]).toMatchObject({
+ title: "Root Bookmark 2",
+ paths: [[]],
+ });
+ });
+
+ it("throws error for non-Netscape bookmark files", () => {
+ const html = `<html>
+<head><title>Not a bookmark file</title></head>
+<body>Just a regular HTML file</body>
+</html>`;
+
+ expect(() => parseImportFile("html", html)).toThrow(
+ "The uploaded html file does not seem to be a bookmark file",
+ );
+ });
+});