diff options
| author | Mohamed Bassem <me@mbassem.com> | 2026-01-03 10:39:45 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-01-03 10:39:45 +0000 |
| commit | 6fe20639702e3eb81bd262075094fb5d1f7033b9 (patch) | |
| tree | 9f6ee471afe159158184dfbad9d61bee236307b8 /packages/shared/import-export/parsers.ts | |
| parent | 1af9b9ddf69cc7215d10e2f0713123756b36077b (diff) | |
| download | karakeep-6fe20639702e3eb81bd262075094fb5d1f7033b9.tar.zst | |
fix: Eliminate the O(n2) parsing of the netscape import parsing (#2338)
* fix: Eliminate the O(n2) parsing of the netscape import parsing
* remove unneeded tests
Diffstat (limited to 'packages/shared/import-export/parsers.ts')
| -rw-r--r-- | packages/shared/import-export/parsers.ts | 78 |
1 files changed, 47 insertions, 31 deletions
diff --git a/packages/shared/import-export/parsers.ts b/packages/shared/import-export/parsers.ts index a56cbb98..df3d2c45 100644 --- a/packages/shared/import-export/parsers.ts +++ b/packages/shared/import-export/parsers.ts @@ -1,5 +1,6 @@ // Copied from https://gist.github.com/devster31/4e8c6548fd16ffb75c02e6f24e27f9b9 +import type { AnyNode } from "domhandler"; import * as cheerio from "cheerio"; import { parse } from "csv-parse/sync"; import { z } from "zod"; @@ -35,43 +36,58 @@ function parseNetscapeBookmarkFile(textContent: string): ParsedBookmark[] { } const $ = cheerio.load(textContent); + const bookmarks: ParsedBookmark[] = []; - return $("a") - .map(function (_index, a) { - const $a = $(a); - const addDate = $a.attr("add_date"); - let tags: string[] = []; + // Recursively traverse the bookmark hierarchy top-down + function traverseFolder( + element: cheerio.Cheerio<AnyNode>, + currentPath: string[], + ) { + element.children().each((_index, child) => { + const $child = $(child); - const tagsStr = $a.attr("tags"); - try { - tags = tagsStr && tagsStr.length > 0 ? tagsStr.split(",") : []; - } catch { - /* empty */ - } - const url = $a.attr("href"); + // Check if this is a folder (DT with H3) + const h3 = $child.children("h3").first(); + if (h3.length > 0) { + const folderName = h3.text().trim() || "Unnamed"; + const newPath = [...currentPath, folderName]; + + // Find the DL that follows this folder and recurse into it + const dl = $child.children("dl").first(); + if (dl.length > 0) { + traverseFolder(dl, newPath); + } + } else { + // Check if this is a bookmark (DT with A) + const anchor = $child.children("a").first(); + if (anchor.length > 0) { + const addDate = anchor.attr("add_date"); + const tagsStr = anchor.attr("tags"); + const tags = tagsStr && tagsStr.length > 0 ? tagsStr.split(",") : []; + const url = anchor.attr("href"); - // Build folder path by traversing up the hierarchy - const path: string[] = []; - let current = $a.parent(); - while (current && current.length > 0) { - const h3 = current.find("> h3").first(); - if (h3.length > 0) { - const folderName = h3.text().trim(); - // Use "Unnamed" for empty folder names - path.unshift(folderName || "Unnamed"); + bookmarks.push({ + title: anchor.text(), + content: url + ? { type: BookmarkTypes.LINK as const, url } + : undefined, + tags, + addDate: + typeof addDate === "undefined" ? undefined : parseInt(addDate), + paths: [currentPath], + }); } - current = current.parent(); } + }); + } - return { - title: $a.text(), - content: url ? { type: BookmarkTypes.LINK as const, url } : undefined, - tags, - addDate: typeof addDate === "undefined" ? undefined : parseInt(addDate), - paths: [path], - }; - }) - .get(); + // Start traversal from the root DL element + const rootDl = $("dl").first(); + if (rootDl.length > 0) { + traverseFolder(rootDl, []); + } + + return bookmarks; } function parsePocketBookmarkFile(textContent: string): ParsedBookmark[] { |
