aboutsummaryrefslogtreecommitdiffstats
path: root/apps
diff options
context:
space:
mode:
Diffstat (limited to 'apps')
-rw-r--r--apps/web/app/api/bookmarks/export/route.tsx8
-rw-r--r--apps/web/lib/exportBookmarks.ts111
-rw-r--r--apps/web/lib/hooks/useBookmarkImport.ts272
-rw-r--r--apps/web/lib/importBookmarkParser.ts286
4 files changed, 86 insertions, 591 deletions
diff --git a/apps/web/app/api/bookmarks/export/route.tsx b/apps/web/app/api/bookmarks/export/route.tsx
index 47fdeebc..ad309877 100644
--- a/apps/web/app/api/bookmarks/export/route.tsx
+++ b/apps/web/app/api/bookmarks/export/route.tsx
@@ -1,12 +1,12 @@
import { NextRequest } from "next/server";
+import { api, createContextFromRequest } from "@/server/api/client";
+import { z } from "zod";
+
import {
toExportFormat,
toNetscapeFormat,
zExportSchema,
-} from "@/lib/exportBookmarks";
-import { api, createContextFromRequest } from "@/server/api/client";
-import { z } from "zod";
-
+} from "@karakeep/shared/import-export";
import { MAX_NUM_BOOKMARKS_PER_PAGE } from "@karakeep/shared/types/bookmarks";
export const dynamic = "force-dynamic";
diff --git a/apps/web/lib/exportBookmarks.ts b/apps/web/lib/exportBookmarks.ts
deleted file mode 100644
index 5dc26e78..00000000
--- a/apps/web/lib/exportBookmarks.ts
+++ /dev/null
@@ -1,111 +0,0 @@
-import { z } from "zod";
-
-import { BookmarkTypes, ZBookmark } from "@karakeep/shared/types/bookmarks";
-
-export const zExportBookmarkSchema = z.object({
- createdAt: z.number(),
- title: z.string().nullable(),
- tags: z.array(z.string()),
- content: z
- .discriminatedUnion("type", [
- z.object({
- type: z.literal(BookmarkTypes.LINK),
- url: z.string(),
- }),
- z.object({
- type: z.literal(BookmarkTypes.TEXT),
- text: z.string(),
- }),
- ])
- .nullable(),
- note: z.string().nullable(),
- archived: z.boolean().optional().default(false),
-});
-
-export const zExportSchema = z.object({
- bookmarks: z.array(zExportBookmarkSchema),
-});
-
-export function toExportFormat(
- bookmark: ZBookmark,
-): z.infer<typeof zExportBookmarkSchema> {
- let content = null;
- switch (bookmark.content.type) {
- case BookmarkTypes.LINK: {
- content = {
- type: bookmark.content.type,
- url: bookmark.content.url,
- };
- break;
- }
- case BookmarkTypes.TEXT: {
- content = {
- type: bookmark.content.type,
- text: bookmark.content.text,
- };
- break;
- }
- // Exclude asset types for now
- }
- return {
- createdAt: Math.floor(bookmark.createdAt.getTime() / 1000),
- title:
- bookmark.title ??
- (bookmark.content.type === BookmarkTypes.LINK
- ? (bookmark.content.title ?? null)
- : null),
- tags: bookmark.tags.map((t) => t.name),
- content,
- note: bookmark.note ?? null,
- archived: bookmark.archived,
- };
-}
-
-export function toNetscapeFormat(bookmarks: ZBookmark[]): string {
- const header = `<!DOCTYPE NETSCAPE-Bookmark-file-1>
-<!-- This is an automatically generated file.
- It will be read and overwritten.
- DO NOT EDIT! -->
-<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
-<TITLE>Bookmarks</TITLE>
-<H1>Bookmarks</H1>
-<DL><p>`;
-
- const footer = `</DL><p>`;
-
- const bookmarkEntries = bookmarks
- .map((bookmark) => {
- if (bookmark.content?.type !== BookmarkTypes.LINK) {
- return "";
- }
- const addDate = bookmark.createdAt
- ? `ADD_DATE="${Math.floor(bookmark.createdAt.getTime() / 1000)}"`
- : "";
-
- const tagNames = bookmark.tags.map((t) => t.name).join(",");
- const tags = tagNames.length > 0 ? `TAGS="${tagNames}"` : "";
-
- const encodedUrl = encodeURI(bookmark.content.url);
- const displayTitle = bookmark.title ?? bookmark.content.url;
- const encodedTitle = escapeHtml(displayTitle);
-
- return ` <DT><A HREF="${encodedUrl}" ${addDate} ${tags}>${encodedTitle}</A>`;
- })
- .filter(Boolean)
- .join("\n");
-
- return `${header}\n${bookmarkEntries}\n${footer}`;
-}
-
-function escapeHtml(input: string): string {
- const escapeMap: Record<string, string> = {
- "&": "&amp;",
- "'": "&#x27;",
- "`": "&#x60;",
- '"': "&quot;",
- "<": "&lt;",
- ">": "&gt;",
- };
-
- return input.replace(/[&'`"<>]/g, (match) => escapeMap[match] || "");
-}
diff --git a/apps/web/lib/hooks/useBookmarkImport.ts b/apps/web/lib/hooks/useBookmarkImport.ts
index 7e5f6111..de515677 100644
--- a/apps/web/lib/hooks/useBookmarkImport.ts
+++ b/apps/web/lib/hooks/useBookmarkImport.ts
@@ -4,16 +4,6 @@ import { useState } from "react";
import { useRouter } from "next/navigation";
import { toast } from "@/components/ui/use-toast";
import { useTranslation } from "@/lib/i18n/client";
-import {
- deduplicateBookmarks,
- ParsedBookmark,
- parseKarakeepBookmarkFile,
- parseLinkwardenBookmarkFile,
- parseNetscapeBookmarkFile,
- parseOmnivoreBookmarkFile,
- parsePocketBookmarkFile,
- parseTabSessionManagerStateFile,
-} from "@/lib/importBookmarkParser";
import { useMutation } from "@tanstack/react-query";
import {
@@ -24,16 +14,15 @@ import {
useAddBookmarkToList,
useCreateBookmarkList,
} from "@karakeep/shared-react/hooks/lists";
-import { limitConcurrency } from "@karakeep/shared/concurrency";
-import { BookmarkTypes } from "@karakeep/shared/types/bookmarks";
-
-export type ImportSource =
- | "html"
- | "pocket"
- | "omnivore"
- | "karakeep"
- | "linkwarden"
- | "tab-session-manager";
+import {
+ importBookmarksFromFile,
+ ImportSource,
+ ParsedBookmark,
+} from "@karakeep/shared/import-export";
+import {
+ BookmarkTypes,
+ MAX_BOOKMARK_TITLE_LENGTH,
+} from "@karakeep/shared/types/bookmarks";
export interface ImportProgress {
done: number;
@@ -53,53 +42,6 @@ export function useBookmarkImport() {
const { mutateAsync: addToList } = useAddBookmarkToList();
const { mutateAsync: updateTags } = useUpdateBookmarkTags();
- const { mutateAsync: parseAndCreateBookmark } = useMutation({
- mutationFn: async (toImport: {
- bookmark: ParsedBookmark;
- listIds: string[];
- }) => {
- const bookmark = toImport.bookmark;
- if (bookmark.content === undefined) {
- throw new Error("Content is undefined");
- }
- const created = await createBookmark({
- crawlPriority: "low",
- title: bookmark.title,
- createdAt: bookmark.addDate
- ? new Date(bookmark.addDate * 1000)
- : undefined,
- note: bookmark.notes,
- archived: bookmark.archived,
- ...(bookmark.content.type === BookmarkTypes.LINK
- ? {
- type: BookmarkTypes.LINK,
- url: bookmark.content.url,
- }
- : {
- type: BookmarkTypes.TEXT,
- text: bookmark.content.text,
- }),
- });
-
- await Promise.all([
- ...toImport.listIds.map((listId) =>
- addToList({
- bookmarkId: created.id,
- listId,
- }),
- ),
- bookmark.tags.length > 0
- ? updateTags({
- bookmarkId: created.id,
- attach: bookmark.tags.map((t) => ({ tagName: t })),
- detach: [],
- })
- : undefined,
- ]);
- return created;
- },
- });
-
const uploadBookmarkFileMutation = useMutation({
mutationFn: async ({
file,
@@ -108,138 +50,87 @@ export function useBookmarkImport() {
file: File;
source: ImportSource;
}) => {
- if (source === "html") {
- return await parseNetscapeBookmarkFile(file);
- } else if (source === "pocket") {
- return await parsePocketBookmarkFile(file);
- } else if (source === "karakeep") {
- return await parseKarakeepBookmarkFile(file);
- } else if (source === "omnivore") {
- return await parseOmnivoreBookmarkFile(file);
- } else if (source === "linkwarden") {
- return await parseLinkwardenBookmarkFile(file);
- } else if (source === "tab-session-manager") {
- return await parseTabSessionManagerStateFile(file);
- } else {
- throw new Error("Unknown source");
- }
- },
- onSuccess: async (parsedBookmarks) => {
- if (parsedBookmarks.length === 0) {
- toast({ description: "No bookmarks found in the file." });
- return;
- }
-
- const rootList = await createList({
- name: t("settings.import.imported_bookmarks"),
- icon: "⬆️",
- });
-
- const finalBookmarksToImport = deduplicateBookmarks(parsedBookmarks);
-
- setImportProgress({ done: 0, total: finalBookmarksToImport.length });
-
- const allRequiredPaths = new Set<string>();
- for (const bookmark of finalBookmarksToImport) {
- for (const path of bookmark.paths) {
- if (path && path.length > 0) {
- for (let i = 1; i <= path.length; i++) {
- const subPath = path.slice(0, i);
- const pathKey = subPath.join("/");
- allRequiredPaths.add(pathKey);
- }
- }
- }
- }
-
- const allRequiredPathsArray = Array.from(allRequiredPaths).sort(
- (a, b) => a.split("/").length - b.split("/").length,
- );
-
- const pathMap: Record<string, string> = {};
- pathMap[""] = rootList.id;
-
- for (const pathKey of allRequiredPathsArray) {
- const parts = pathKey.split("/");
- const parentKey = parts.slice(0, -1).join("/");
- const parentId = pathMap[parentKey] || rootList.id;
-
- const folderName = parts[parts.length - 1];
- const folderList = await createList({
- name: folderName,
- parentId: parentId,
- icon: "📁",
- });
- pathMap[pathKey] = folderList.id;
- }
-
- const importPromises = finalBookmarksToImport.map(
- (bookmark) => async () => {
- const listIds = bookmark.paths.map(
- (path) => pathMap[path.join("/")] || rootList.id,
- );
- if (listIds.length === 0) {
- listIds.push(rootList.id);
- }
-
- try {
- const created = await parseAndCreateBookmark({
- bookmark: bookmark,
+ const result = await importBookmarksFromFile(
+ {
+ file,
+ source,
+ rootListName: t("settings.import.imported_bookmarks"),
+ deps: {
+ createList: createList,
+ createBookmark: async (bookmark: ParsedBookmark) => {
+ if (bookmark.content === undefined) {
+ throw new Error("Content is undefined");
+ }
+ const created = await createBookmark({
+ crawlPriority: "low",
+ title: bookmark.title.substring(0, MAX_BOOKMARK_TITLE_LENGTH),
+ createdAt: bookmark.addDate
+ ? new Date(bookmark.addDate * 1000)
+ : undefined,
+ note: bookmark.notes,
+ archived: bookmark.archived,
+ ...(bookmark.content.type === BookmarkTypes.LINK
+ ? {
+ type: BookmarkTypes.LINK,
+ url: bookmark.content.url,
+ }
+ : {
+ type: BookmarkTypes.TEXT,
+ text: bookmark.content.text,
+ }),
+ });
+ return created as { id: string; alreadyExists?: boolean };
+ },
+ addBookmarkToLists: async ({
+ bookmarkId,
listIds,
- });
-
- setImportProgress((prev) => {
- const newDone = (prev?.done ?? 0) + 1;
- return {
- done: newDone,
- total: finalBookmarksToImport.length,
- };
- });
- return { status: "fulfilled" as const, value: created };
- } catch {
- setImportProgress((prev) => {
- const newDone = (prev?.done ?? 0) + 1;
- return {
- done: newDone,
- total: finalBookmarksToImport.length,
- };
- });
- return { status: "rejected" as const };
- }
+ }: {
+ bookmarkId: string;
+ listIds: string[];
+ }) => {
+ await Promise.all(
+ listIds.map((listId) =>
+ addToList({
+ bookmarkId,
+ listId,
+ }),
+ ),
+ );
+ },
+ updateBookmarkTags: async ({
+ bookmarkId,
+ tags,
+ }: {
+ bookmarkId: string;
+ tags: string[];
+ }) => {
+ if (tags.length > 0) {
+ await updateTags({
+ bookmarkId,
+ attach: tags.map((t) => ({ tagName: t })),
+ detach: [],
+ });
+ }
+ },
+ },
+ onProgress: (done, total) => setImportProgress({ done, total }),
},
+ {},
);
-
- const CONCURRENCY_LIMIT = 20;
- const resultsPromises = limitConcurrency(
- importPromises,
- CONCURRENCY_LIMIT,
- );
-
- const results = await Promise.all(resultsPromises);
-
- let successes = 0;
- let failures = 0;
- let alreadyExisted = 0;
-
- for (const result of results) {
- if (result.status === "fulfilled") {
- if (result.value.alreadyExists) {
- alreadyExisted++;
- } else {
- successes++;
- }
- } else {
- failures++;
- }
+ return result;
+ },
+ onSuccess: async (result) => {
+ if (result.counts.total === 0) {
+ toast({ description: "No bookmarks found in the file." });
+ return;
}
-
+ const { successes, failures, alreadyExisted } = result.counts;
if (successes > 0 || alreadyExisted > 0) {
toast({
description: `Imported ${successes} bookmarks and skipped ${alreadyExisted} bookmarks that already existed`,
variant: "default",
});
}
-
if (failures > 0) {
toast({
description: `Failed to import ${failures} bookmarks. Check console for details.`,
@@ -247,7 +138,8 @@ export function useBookmarkImport() {
});
}
- router.push(`/dashboard/lists/${rootList.id}`);
+ if (result.rootListId)
+ router.push(`/dashboard/lists/${result.rootListId}`);
},
onError: (error) => {
setImportProgress(null);
diff --git a/apps/web/lib/importBookmarkParser.ts b/apps/web/lib/importBookmarkParser.ts
deleted file mode 100644
index 44fe872c..00000000
--- a/apps/web/lib/importBookmarkParser.ts
+++ /dev/null
@@ -1,286 +0,0 @@
-// Copied from https://gist.github.com/devster31/4e8c6548fd16ffb75c02e6f24e27f9b9
-import * as cheerio from "cheerio";
-import { parse } from "csv-parse/sync";
-import { z } from "zod";
-
-import { BookmarkTypes } from "@karakeep/shared/types/bookmarks";
-
-import { zExportSchema } from "./exportBookmarks";
-
-export interface ParsedBookmark {
- title: string;
- content?:
- | { type: BookmarkTypes.LINK; url: string }
- | { type: BookmarkTypes.TEXT; text: string };
- tags: string[];
- addDate?: number;
- notes?: string;
- archived?: boolean;
- paths: string[][];
-}
-
-export async function parseNetscapeBookmarkFile(
- file: File,
-): Promise<ParsedBookmark[]> {
- const textContent = await file.text();
-
- if (!textContent.startsWith("<!DOCTYPE NETSCAPE-Bookmark-file-1>")) {
- throw Error("The uploaded html file does not seem to be a bookmark file");
- }
-
- const $ = cheerio.load(textContent);
-
- return $("a")
- .map(function (_index, a) {
- const $a = $(a);
- const addDate = $a.attr("add_date");
- let tags: string[] = [];
-
- const tagsStr = $a.attr("tags");
- try {
- tags = tagsStr && tagsStr.length > 0 ? tagsStr.split(",") : [];
- } catch {
- /* empty */
- }
- const url = $a.attr("href");
-
- // Build folder path by traversing up the hierarchy
- const path: string[] = [];
- let current = $a.parent();
- while (current && current.length > 0) {
- const h3 = current.find("> h3").first();
- if (h3.length > 0) {
- path.unshift(h3.text());
- }
- current = current.parent();
- }
-
- return {
- title: $a.text(),
- content: url ? { type: BookmarkTypes.LINK as const, url } : undefined,
- tags,
- addDate: typeof addDate === "undefined" ? undefined : parseInt(addDate),
- paths: [path],
- };
- })
- .get();
-}
-
-export async function parsePocketBookmarkFile(
- file: File,
-): Promise<ParsedBookmark[]> {
- const textContent = await file.text();
-
- const records = parse(textContent, {
- columns: true,
- skip_empty_lines: true,
- }) as {
- title: string;
- url: string;
- time_added: string;
- tags: string;
- status?: string;
- }[];
-
- return records.map((record) => {
- return {
- title: record.title,
- content: { type: BookmarkTypes.LINK as const, url: record.url },
- tags: record.tags.length > 0 ? record.tags.split("|") : [],
- addDate: parseInt(record.time_added),
- archived: record.status === "archive",
- paths: [], // TODO
- };
- });
-}
-
-export async function parseKarakeepBookmarkFile(
- file: File,
-): Promise<ParsedBookmark[]> {
- const textContent = await file.text();
-
- const parsed = zExportSchema.safeParse(JSON.parse(textContent));
- if (!parsed.success) {
- throw new Error(
- `The uploaded JSON file contains an invalid bookmark file: ${parsed.error.toString()}`,
- );
- }
-
- return parsed.data.bookmarks.map((bookmark) => {
- let content = undefined;
- if (bookmark.content?.type == BookmarkTypes.LINK) {
- content = {
- type: BookmarkTypes.LINK as const,
- url: bookmark.content.url,
- };
- } else if (bookmark.content?.type == BookmarkTypes.TEXT) {
- content = {
- type: BookmarkTypes.TEXT as const,
- text: bookmark.content.text,
- };
- }
- return {
- title: bookmark.title ?? "",
- content,
- tags: bookmark.tags,
- addDate: bookmark.createdAt,
- notes: bookmark.note ?? undefined,
- archived: bookmark.archived,
- paths: [], // TODO
- };
- });
-}
-
-export async function parseOmnivoreBookmarkFile(
- file: File,
-): Promise<ParsedBookmark[]> {
- const textContent = await file.text();
- const zOmnivoreExportSchema = z.array(
- z.object({
- title: z.string(),
- url: z.string(),
- labels: z.array(z.string()),
- savedAt: z.coerce.date(),
- state: z.string().optional(),
- }),
- );
-
- const parsed = zOmnivoreExportSchema.safeParse(JSON.parse(textContent));
- if (!parsed.success) {
- throw new Error(
- `The uploaded JSON file contains an invalid omnivore bookmark file: ${parsed.error.toString()}`,
- );
- }
-
- return parsed.data.map((bookmark) => {
- return {
- title: bookmark.title ?? "",
- content: { type: BookmarkTypes.LINK as const, url: bookmark.url },
- tags: bookmark.labels,
- addDate: bookmark.savedAt.getTime() / 1000,
- archived: bookmark.state === "Archived",
- paths: [],
- };
- });
-}
-
-export async function parseLinkwardenBookmarkFile(
- file: File,
-): Promise<ParsedBookmark[]> {
- const textContent = await file.text();
- const zLinkwardenExportSchema = z.object({
- collections: z.array(
- z.object({
- links: z.array(
- z.object({
- name: z.string(),
- url: z.string(),
- tags: z.array(z.object({ name: z.string() })),
- createdAt: z.coerce.date(),
- }),
- ),
- }),
- ),
- });
-
- const parsed = zLinkwardenExportSchema.safeParse(JSON.parse(textContent));
- if (!parsed.success) {
- throw new Error(
- `The uploaded JSON file contains an invalid Linkwarden bookmark file: ${parsed.error.toString()}`,
- );
- }
-
- return parsed.data.collections.flatMap((collection) => {
- return collection.links.map((bookmark) => ({
- title: bookmark.name ?? "",
- content: { type: BookmarkTypes.LINK as const, url: bookmark.url },
- tags: bookmark.tags.map((tag) => tag.name),
- addDate: bookmark.createdAt.getTime() / 1000,
- paths: [], // TODO
- }));
- });
-}
-
-export async function parseTabSessionManagerStateFile(
- file: File,
-): Promise<ParsedBookmark[]> {
- const textContent = await file.text();
-
- const zTab = z.object({
- url: z.string(),
- title: z.string(),
- lastAccessed: z.number(),
- });
-
- const zSession = z.object({
- windows: z.record(z.string(), z.record(z.string(), zTab)),
- date: z.number(),
- });
-
- const zTabSessionManagerSchema = z.array(zSession);
-
- const parsed = zTabSessionManagerSchema.safeParse(JSON.parse(textContent));
- if (!parsed.success) {
- throw new Error(
- `The uploaded JSON file contains an invalid Tab Session Manager bookmark file: ${parsed.error.toString()}`,
- );
- }
-
- // Get the object in data that has the most recent `date`
- const { windows } = parsed.data.reduce((prev, curr) =>
- prev.date > curr.date ? prev : curr,
- );
-
- return Object.values(windows).flatMap((window) =>
- Object.values(window).map((tab) => ({
- title: tab.title,
- content: { type: BookmarkTypes.LINK as const, url: tab.url },
- tags: [],
- addDate: tab.lastAccessed,
- paths: [], // Tab Session Manager doesn't have folders
- })),
- );
-}
-
-export function deduplicateBookmarks(
- bookmarks: ParsedBookmark[],
-): ParsedBookmark[] {
- const deduplicatedBookmarksMap = new Map<string, ParsedBookmark>();
- const textBookmarks: ParsedBookmark[] = [];
-
- for (const bookmark of bookmarks) {
- if (bookmark.content?.type === BookmarkTypes.LINK) {
- const url = bookmark.content.url;
- if (deduplicatedBookmarksMap.has(url)) {
- const existing = deduplicatedBookmarksMap.get(url)!;
- // Merge tags
- existing.tags = [...new Set([...existing.tags, ...bookmark.tags])];
- // Merge paths
- existing.paths = [...existing.paths, ...bookmark.paths];
- const existingDate = existing.addDate ?? Infinity;
- const newDate = bookmark.addDate ?? Infinity;
- if (newDate < existingDate) {
- existing.addDate = bookmark.addDate;
- }
- // Append notes if both exist
- if (existing.notes && bookmark.notes) {
- existing.notes = `${existing.notes}\n---\n${bookmark.notes}`;
- } else if (bookmark.notes) {
- existing.notes = bookmark.notes;
- }
- // For archived status, prefer archived if either is archived
- if (bookmark.archived === true) {
- existing.archived = true;
- }
- // Title: keep existing one for simplicity
- } else {
- deduplicatedBookmarksMap.set(url, bookmark);
- }
- } else {
- // Keep text bookmarks as they are (no URL to dedupe on)
- textBookmarks.push(bookmark);
- }
- }
-
- return [...deduplicatedBookmarksMap.values(), ...textBookmarks];
-}