From d62c9724b7f4cb728cd5b5496fdcc0eba8330772 Mon Sep 17 00:00:00 2001 From: MohamedBassem Date: Sat, 21 Sep 2024 16:56:42 +0000 Subject: feature(web): Preserve title, tags and createdAt when importing a netscape html. Fixes #401 --- apps/web/lib/netscapeBookmarkParser.ts | 35 ++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) (limited to 'apps/web/lib/netscapeBookmarkParser.ts') diff --git a/apps/web/lib/netscapeBookmarkParser.ts b/apps/web/lib/netscapeBookmarkParser.ts index ac5f3ec2..196c0525 100644 --- a/apps/web/lib/netscapeBookmarkParser.ts +++ b/apps/web/lib/netscapeBookmarkParser.ts @@ -1,20 +1,31 @@ -function extractUrls(html: string): string[] { - const regex = /]*?\s+)?href="(http[^"]*)"/gi; - let match; - const urls = []; - - while ((match = regex.exec(html)) !== null) { - urls.push(match[1]); - } - - return urls; -} +// Copied from https://gist.github.com/devster31/4e8c6548fd16ffb75c02e6f24e27f9b9 +import * as cheerio from "cheerio"; export async function parseNetscapeBookmarkFile(file: File) { const textContent = await file.text(); + if (!textContent.startsWith("")) { throw Error("The uploaded html file does not seem to be a bookmark file"); } - return extractUrls(textContent).map((url) => new URL(url)); + const $ = cheerio.load(textContent); + + return $("a") + .map(function (_index, a) { + const $a = $(a); + const addDate = $a.attr("add_date"); + let tags: string[] = []; + try { + tags = $a.attr("tags")?.split(",") ?? []; + } catch (e) { + /* empty */ + } + return { + title: $a.text(), + url: $a.attr("href"), + tags: tags, + addDate: typeof addDate === "undefined" ? undefined : parseInt(addDate), + }; + }) + .get(); } -- cgit v1.2.3-70-g09d2