aboutsummaryrefslogtreecommitdiffstats
path: root/packages/shared/import-export/parsers.ts
diff options
context:
space:
mode:
authorMohamed Bassem <me@mbassem.com>2026-01-03 10:39:45 +0000
committerGitHub <noreply@github.com>2026-01-03 10:39:45 +0000
commit6fe20639702e3eb81bd262075094fb5d1f7033b9 (patch)
tree9f6ee471afe159158184dfbad9d61bee236307b8 /packages/shared/import-export/parsers.ts
parent1af9b9ddf69cc7215d10e2f0713123756b36077b (diff)
downloadkarakeep-6fe20639702e3eb81bd262075094fb5d1f7033b9.tar.zst
fix: Eliminate the O(n2) parsing of the netscape import parsing (#2338)
* fix: Eliminate the O(n2) parsing of the netscape import parsing * remove unneeded tests
Diffstat (limited to 'packages/shared/import-export/parsers.ts')
-rw-r--r--packages/shared/import-export/parsers.ts78
1 files changed, 47 insertions, 31 deletions
diff --git a/packages/shared/import-export/parsers.ts b/packages/shared/import-export/parsers.ts
index a56cbb98..df3d2c45 100644
--- a/packages/shared/import-export/parsers.ts
+++ b/packages/shared/import-export/parsers.ts
@@ -1,5 +1,6 @@
// Copied from https://gist.github.com/devster31/4e8c6548fd16ffb75c02e6f24e27f9b9
+import type { AnyNode } from "domhandler";
import * as cheerio from "cheerio";
import { parse } from "csv-parse/sync";
import { z } from "zod";
@@ -35,43 +36,58 @@ function parseNetscapeBookmarkFile(textContent: string): ParsedBookmark[] {
}
const $ = cheerio.load(textContent);
+ const bookmarks: ParsedBookmark[] = [];
- return $("a")
- .map(function (_index, a) {
- const $a = $(a);
- const addDate = $a.attr("add_date");
- let tags: string[] = [];
+ // Recursively traverse the bookmark hierarchy top-down
+ function traverseFolder(
+ element: cheerio.Cheerio<AnyNode>,
+ currentPath: string[],
+ ) {
+ element.children().each((_index, child) => {
+ const $child = $(child);
- const tagsStr = $a.attr("tags");
- try {
- tags = tagsStr && tagsStr.length > 0 ? tagsStr.split(",") : [];
- } catch {
- /* empty */
- }
- const url = $a.attr("href");
+ // Check if this is a folder (DT with H3)
+ const h3 = $child.children("h3").first();
+ if (h3.length > 0) {
+ const folderName = h3.text().trim() || "Unnamed";
+ const newPath = [...currentPath, folderName];
+
+ // Find the DL that follows this folder and recurse into it
+ const dl = $child.children("dl").first();
+ if (dl.length > 0) {
+ traverseFolder(dl, newPath);
+ }
+ } else {
+ // Check if this is a bookmark (DT with A)
+ const anchor = $child.children("a").first();
+ if (anchor.length > 0) {
+ const addDate = anchor.attr("add_date");
+ const tagsStr = anchor.attr("tags");
+ const tags = tagsStr && tagsStr.length > 0 ? tagsStr.split(",") : [];
+ const url = anchor.attr("href");
- // Build folder path by traversing up the hierarchy
- const path: string[] = [];
- let current = $a.parent();
- while (current && current.length > 0) {
- const h3 = current.find("> h3").first();
- if (h3.length > 0) {
- const folderName = h3.text().trim();
- // Use "Unnamed" for empty folder names
- path.unshift(folderName || "Unnamed");
+ bookmarks.push({
+ title: anchor.text(),
+ content: url
+ ? { type: BookmarkTypes.LINK as const, url }
+ : undefined,
+ tags,
+ addDate:
+ typeof addDate === "undefined" ? undefined : parseInt(addDate),
+ paths: [currentPath],
+ });
}
- current = current.parent();
}
+ });
+ }
- return {
- title: $a.text(),
- content: url ? { type: BookmarkTypes.LINK as const, url } : undefined,
- tags,
- addDate: typeof addDate === "undefined" ? undefined : parseInt(addDate),
- paths: [path],
- };
- })
- .get();
+ // Start traversal from the root DL element
+ const rootDl = $("dl").first();
+ if (rootDl.length > 0) {
+ traverseFolder(rootDl, []);
+ }
+
+ return bookmarks;
}
function parsePocketBookmarkFile(textContent: string): ParsedBookmark[] {