diff options
Diffstat (limited to 'apps/web/lib/netscapeBookmarkParser.ts')
| -rw-r--r-- | apps/web/lib/netscapeBookmarkParser.ts | 35 |
1 files changed, 23 insertions, 12 deletions
diff --git a/apps/web/lib/netscapeBookmarkParser.ts b/apps/web/lib/netscapeBookmarkParser.ts index ac5f3ec2..196c0525 100644 --- a/apps/web/lib/netscapeBookmarkParser.ts +++ b/apps/web/lib/netscapeBookmarkParser.ts @@ -1,20 +1,31 @@ -function extractUrls(html: string): string[] { - const regex = /<a\s+(?:[^>]*?\s+)?href="(http[^"]*)"/gi; - let match; - const urls = []; - - while ((match = regex.exec(html)) !== null) { - urls.push(match[1]); - } - - return urls; -} +// Copied from https://gist.github.com/devster31/4e8c6548fd16ffb75c02e6f24e27f9b9 +import * as cheerio from "cheerio"; export async function parseNetscapeBookmarkFile(file: File) { const textContent = await file.text(); + if (!textContent.startsWith("<!DOCTYPE NETSCAPE-Bookmark-file-1>")) { throw Error("The uploaded html file does not seem to be a bookmark file"); } - return extractUrls(textContent).map((url) => new URL(url)); + const $ = cheerio.load(textContent); + + return $("a") + .map(function (_index, a) { + const $a = $(a); + const addDate = $a.attr("add_date"); + let tags: string[] = []; + try { + tags = $a.attr("tags")?.split(",") ?? []; + } catch (e) { + /* empty */ + } + return { + title: $a.text(), + url: $a.attr("href"), + tags: tags, + addDate: typeof addDate === "undefined" ? undefined : parseInt(addDate), + }; + }) + .get(); } |
