From aecbe6ae8b3dbc7bcdcf33f1c8c086dafb77eb24 Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sat, 30 Aug 2025 15:26:02 +0000 Subject: fix: handle list with slashes in their names and truncate long list names. fixes #1597 --- apps/web/lib/importBookmarkParser.ts | 286 ----------------------------------- 1 file changed, 286 deletions(-) delete mode 100644 apps/web/lib/importBookmarkParser.ts (limited to 'apps/web/lib/importBookmarkParser.ts') diff --git a/apps/web/lib/importBookmarkParser.ts b/apps/web/lib/importBookmarkParser.ts deleted file mode 100644 index 44fe872c..00000000 --- a/apps/web/lib/importBookmarkParser.ts +++ /dev/null @@ -1,286 +0,0 @@ -// Copied from https://gist.github.com/devster31/4e8c6548fd16ffb75c02e6f24e27f9b9 -import * as cheerio from "cheerio"; -import { parse } from "csv-parse/sync"; -import { z } from "zod"; - -import { BookmarkTypes } from "@karakeep/shared/types/bookmarks"; - -import { zExportSchema } from "./exportBookmarks"; - -export interface ParsedBookmark { - title: string; - content?: - | { type: BookmarkTypes.LINK; url: string } - | { type: BookmarkTypes.TEXT; text: string }; - tags: string[]; - addDate?: number; - notes?: string; - archived?: boolean; - paths: string[][]; -} - -export async function parseNetscapeBookmarkFile( - file: File, -): Promise { - const textContent = await file.text(); - - if (!textContent.startsWith("")) { - throw Error("The uploaded html file does not seem to be a bookmark file"); - } - - const $ = cheerio.load(textContent); - - return $("a") - .map(function (_index, a) { - const $a = $(a); - const addDate = $a.attr("add_date"); - let tags: string[] = []; - - const tagsStr = $a.attr("tags"); - try { - tags = tagsStr && tagsStr.length > 0 ? tagsStr.split(",") : []; - } catch { - /* empty */ - } - const url = $a.attr("href"); - - // Build folder path by traversing up the hierarchy - const path: string[] = []; - let current = $a.parent(); - while (current && current.length > 0) { - const h3 = current.find("> h3").first(); - if (h3.length > 0) { - path.unshift(h3.text()); - } - current = current.parent(); - } - - return { - title: $a.text(), - content: url ? { type: BookmarkTypes.LINK as const, url } : undefined, - tags, - addDate: typeof addDate === "undefined" ? undefined : parseInt(addDate), - paths: [path], - }; - }) - .get(); -} - -export async function parsePocketBookmarkFile( - file: File, -): Promise { - const textContent = await file.text(); - - const records = parse(textContent, { - columns: true, - skip_empty_lines: true, - }) as { - title: string; - url: string; - time_added: string; - tags: string; - status?: string; - }[]; - - return records.map((record) => { - return { - title: record.title, - content: { type: BookmarkTypes.LINK as const, url: record.url }, - tags: record.tags.length > 0 ? record.tags.split("|") : [], - addDate: parseInt(record.time_added), - archived: record.status === "archive", - paths: [], // TODO - }; - }); -} - -export async function parseKarakeepBookmarkFile( - file: File, -): Promise { - const textContent = await file.text(); - - const parsed = zExportSchema.safeParse(JSON.parse(textContent)); - if (!parsed.success) { - throw new Error( - `The uploaded JSON file contains an invalid bookmark file: ${parsed.error.toString()}`, - ); - } - - return parsed.data.bookmarks.map((bookmark) => { - let content = undefined; - if (bookmark.content?.type == BookmarkTypes.LINK) { - content = { - type: BookmarkTypes.LINK as const, - url: bookmark.content.url, - }; - } else if (bookmark.content?.type == BookmarkTypes.TEXT) { - content = { - type: BookmarkTypes.TEXT as const, - text: bookmark.content.text, - }; - } - return { - title: bookmark.title ?? "", - content, - tags: bookmark.tags, - addDate: bookmark.createdAt, - notes: bookmark.note ?? undefined, - archived: bookmark.archived, - paths: [], // TODO - }; - }); -} - -export async function parseOmnivoreBookmarkFile( - file: File, -): Promise { - const textContent = await file.text(); - const zOmnivoreExportSchema = z.array( - z.object({ - title: z.string(), - url: z.string(), - labels: z.array(z.string()), - savedAt: z.coerce.date(), - state: z.string().optional(), - }), - ); - - const parsed = zOmnivoreExportSchema.safeParse(JSON.parse(textContent)); - if (!parsed.success) { - throw new Error( - `The uploaded JSON file contains an invalid omnivore bookmark file: ${parsed.error.toString()}`, - ); - } - - return parsed.data.map((bookmark) => { - return { - title: bookmark.title ?? "", - content: { type: BookmarkTypes.LINK as const, url: bookmark.url }, - tags: bookmark.labels, - addDate: bookmark.savedAt.getTime() / 1000, - archived: bookmark.state === "Archived", - paths: [], - }; - }); -} - -export async function parseLinkwardenBookmarkFile( - file: File, -): Promise { - const textContent = await file.text(); - const zLinkwardenExportSchema = z.object({ - collections: z.array( - z.object({ - links: z.array( - z.object({ - name: z.string(), - url: z.string(), - tags: z.array(z.object({ name: z.string() })), - createdAt: z.coerce.date(), - }), - ), - }), - ), - }); - - const parsed = zLinkwardenExportSchema.safeParse(JSON.parse(textContent)); - if (!parsed.success) { - throw new Error( - `The uploaded JSON file contains an invalid Linkwarden bookmark file: ${parsed.error.toString()}`, - ); - } - - return parsed.data.collections.flatMap((collection) => { - return collection.links.map((bookmark) => ({ - title: bookmark.name ?? "", - content: { type: BookmarkTypes.LINK as const, url: bookmark.url }, - tags: bookmark.tags.map((tag) => tag.name), - addDate: bookmark.createdAt.getTime() / 1000, - paths: [], // TODO - })); - }); -} - -export async function parseTabSessionManagerStateFile( - file: File, -): Promise { - const textContent = await file.text(); - - const zTab = z.object({ - url: z.string(), - title: z.string(), - lastAccessed: z.number(), - }); - - const zSession = z.object({ - windows: z.record(z.string(), z.record(z.string(), zTab)), - date: z.number(), - }); - - const zTabSessionManagerSchema = z.array(zSession); - - const parsed = zTabSessionManagerSchema.safeParse(JSON.parse(textContent)); - if (!parsed.success) { - throw new Error( - `The uploaded JSON file contains an invalid Tab Session Manager bookmark file: ${parsed.error.toString()}`, - ); - } - - // Get the object in data that has the most recent `date` - const { windows } = parsed.data.reduce((prev, curr) => - prev.date > curr.date ? prev : curr, - ); - - return Object.values(windows).flatMap((window) => - Object.values(window).map((tab) => ({ - title: tab.title, - content: { type: BookmarkTypes.LINK as const, url: tab.url }, - tags: [], - addDate: tab.lastAccessed, - paths: [], // Tab Session Manager doesn't have folders - })), - ); -} - -export function deduplicateBookmarks( - bookmarks: ParsedBookmark[], -): ParsedBookmark[] { - const deduplicatedBookmarksMap = new Map(); - const textBookmarks: ParsedBookmark[] = []; - - for (const bookmark of bookmarks) { - if (bookmark.content?.type === BookmarkTypes.LINK) { - const url = bookmark.content.url; - if (deduplicatedBookmarksMap.has(url)) { - const existing = deduplicatedBookmarksMap.get(url)!; - // Merge tags - existing.tags = [...new Set([...existing.tags, ...bookmark.tags])]; - // Merge paths - existing.paths = [...existing.paths, ...bookmark.paths]; - const existingDate = existing.addDate ?? Infinity; - const newDate = bookmark.addDate ?? Infinity; - if (newDate < existingDate) { - existing.addDate = bookmark.addDate; - } - // Append notes if both exist - if (existing.notes && bookmark.notes) { - existing.notes = `${existing.notes}\n---\n${bookmark.notes}`; - } else if (bookmark.notes) { - existing.notes = bookmark.notes; - } - // For archived status, prefer archived if either is archived - if (bookmark.archived === true) { - existing.archived = true; - } - // Title: keep existing one for simplicity - } else { - deduplicatedBookmarksMap.set(url, bookmark); - } - } else { - // Keep text bookmarks as they are (no URL to dedupe on) - textBookmarks.push(bookmark); - } - } - - return [...deduplicatedBookmarksMap.values(), ...textBookmarks]; -} -- cgit v1.2.3-70-g09d2