From 6fe20639702e3eb81bd262075094fb5d1f7033b9 Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sat, 3 Jan 2026 10:39:45 +0000 Subject: fix: Eliminate the O(n2) parsing of the netscape import parsing (#2338) * fix: Eliminate the O(n2) parsing of the netscape import parsing * remove unneeded tests --- packages/shared/import-export/parsers.ts | 84 +++++++++++++++++++------------- 1 file changed, 50 insertions(+), 34 deletions(-) (limited to 'packages/shared/import-export/parsers.ts') diff --git a/packages/shared/import-export/parsers.ts b/packages/shared/import-export/parsers.ts index a56cbb98..df3d2c45 100644 --- a/packages/shared/import-export/parsers.ts +++ b/packages/shared/import-export/parsers.ts @@ -1,5 +1,6 @@ // Copied from https://gist.github.com/devster31/4e8c6548fd16ffb75c02e6f24e27f9b9 +import type { AnyNode } from "domhandler"; import * as cheerio from "cheerio"; import { parse } from "csv-parse/sync"; import { z } from "zod"; @@ -35,43 +36,58 @@ function parseNetscapeBookmarkFile(textContent: string): ParsedBookmark[] { } const $ = cheerio.load(textContent); - - return $("a") - .map(function (_index, a) { - const $a = $(a); - const addDate = $a.attr("add_date"); - let tags: string[] = []; - - const tagsStr = $a.attr("tags"); - try { - tags = tagsStr && tagsStr.length > 0 ? tagsStr.split(",") : []; - } catch { - /* empty */ - } - const url = $a.attr("href"); - - // Build folder path by traversing up the hierarchy - const path: string[] = []; - let current = $a.parent(); - while (current && current.length > 0) { - const h3 = current.find("> h3").first(); - if (h3.length > 0) { - const folderName = h3.text().trim(); - // Use "Unnamed" for empty folder names - path.unshift(folderName || "Unnamed"); + const bookmarks: ParsedBookmark[] = []; + + // Recursively traverse the bookmark hierarchy top-down + function traverseFolder( + element: cheerio.Cheerio, + currentPath: string[], + ) { + element.children().each((_index, child) => { + const $child = $(child); + + // Check if this is a folder (DT with H3) + const h3 = $child.children("h3").first(); + if (h3.length > 0) { + const folderName = h3.text().trim() || "Unnamed"; + const newPath = [...currentPath, folderName]; + + // Find the DL that follows this folder and recurse into it + const dl = $child.children("dl").first(); + if (dl.length > 0) { + traverseFolder(dl, newPath); + } + } else { + // Check if this is a bookmark (DT with A) + const anchor = $child.children("a").first(); + if (anchor.length > 0) { + const addDate = anchor.attr("add_date"); + const tagsStr = anchor.attr("tags"); + const tags = tagsStr && tagsStr.length > 0 ? tagsStr.split(",") : []; + const url = anchor.attr("href"); + + bookmarks.push({ + title: anchor.text(), + content: url + ? { type: BookmarkTypes.LINK as const, url } + : undefined, + tags, + addDate: + typeof addDate === "undefined" ? undefined : parseInt(addDate), + paths: [currentPath], + }); } - current = current.parent(); } + }); + } - return { - title: $a.text(), - content: url ? { type: BookmarkTypes.LINK as const, url } : undefined, - tags, - addDate: typeof addDate === "undefined" ? undefined : parseInt(addDate), - paths: [path], - }; - }) - .get(); + // Start traversal from the root DL element + const rootDl = $("dl").first(); + if (rootDl.length > 0) { + traverseFolder(rootDl, []); + } + + return bookmarks; } function parsePocketBookmarkFile(textContent: string): ParsedBookmark[] { -- cgit v1.3-1-g0d28