aboutsummaryrefslogtreecommitdiffstats
path: root/apps/web/lib/netscapeBookmarkParser.ts
diff options
context:
space:
mode:
Diffstat (limited to 'apps/web/lib/netscapeBookmarkParser.ts')
-rw-r--r--apps/web/lib/netscapeBookmarkParser.ts35
1 files changed, 23 insertions, 12 deletions
diff --git a/apps/web/lib/netscapeBookmarkParser.ts b/apps/web/lib/netscapeBookmarkParser.ts
index ac5f3ec2..196c0525 100644
--- a/apps/web/lib/netscapeBookmarkParser.ts
+++ b/apps/web/lib/netscapeBookmarkParser.ts
@@ -1,20 +1,31 @@
-function extractUrls(html: string): string[] {
- const regex = /<a\s+(?:[^>]*?\s+)?href="(http[^"]*)"/gi;
- let match;
- const urls = [];
-
- while ((match = regex.exec(html)) !== null) {
- urls.push(match[1]);
- }
-
- return urls;
-}
+// Copied from https://gist.github.com/devster31/4e8c6548fd16ffb75c02e6f24e27f9b9
+import * as cheerio from "cheerio";
export async function parseNetscapeBookmarkFile(file: File) {
const textContent = await file.text();
+
if (!textContent.startsWith("<!DOCTYPE NETSCAPE-Bookmark-file-1>")) {
throw Error("The uploaded html file does not seem to be a bookmark file");
}
- return extractUrls(textContent).map((url) => new URL(url));
+ const $ = cheerio.load(textContent);
+
+ return $("a")
+ .map(function (_index, a) {
+ const $a = $(a);
+ const addDate = $a.attr("add_date");
+ let tags: string[] = [];
+ try {
+ tags = $a.attr("tags")?.split(",") ?? [];
+ } catch (e) {
+ /* empty */
+ }
+ return {
+ title: $a.text(),
+ url: $a.attr("href"),
+ tags: tags,
+ addDate: typeof addDate === "undefined" ? undefined : parseInt(addDate),
+ };
+ })
+ .get();
}