diff options
| author | Mohamed Bassem <me@mbassem.com> | 2026-02-04 09:44:18 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-02-04 09:44:18 +0000 |
| commit | 3c838ddb26c1e86d3f201ce71f13c834be705f69 (patch) | |
| tree | 892fe4f8cd2ca01d6e4cd34f677fc16aa2fd63f6 /packages/shared/import-export/importer.ts | |
| parent | 3fcccb858ee3ef22fe9ce479af4ce458ac9a0fe1 (diff) | |
| download | karakeep-3c838ddb26c1e86d3f201ce71f13c834be705f69.tar.zst | |
feat: Import workflow v3 (#2378)
* feat: import workflow v3
* batch stage
* revert migration
* cleanups
* pr comments
* move to models
* add allowed workers
* e2e tests
* import list ids
* add missing indicies
* merge test
* more fixes
* add resume/pause to UI
* fix ui states
* fix tests
* simplify progress tracking
* remove backpressure
* fix list imports
* fix race on claiming bookmarks
* remove the codex file
Diffstat (limited to 'packages/shared/import-export/importer.ts')
| -rw-r--r-- | packages/shared/import-export/importer.ts | 125 |
1 files changed, 76 insertions, 49 deletions
diff --git a/packages/shared/import-export/importer.ts b/packages/shared/import-export/importer.ts index b32c49c1..be24ca73 100644 --- a/packages/shared/import-export/importer.ts +++ b/packages/shared/import-export/importer.ts @@ -1,4 +1,3 @@ -import { limitConcurrency } from "../concurrency"; import { MAX_LIST_NAME_LENGTH } from "../types/lists"; import { ImportSource, ParsedBookmark, parseImportFile } from "./parsers"; @@ -9,28 +8,32 @@ export interface ImportCounts { total: number; } +export interface StagedBookmark { + type: "link" | "text" | "asset"; + url?: string; + title?: string; + content?: string; + note?: string; + tags: string[]; + listIds: string[]; + sourceAddedAt?: Date; +} + export interface ImportDeps { createList: (input: { name: string; icon: string; parentId?: string; }) => Promise<{ id: string }>; - createBookmark: ( - bookmark: ParsedBookmark, - sessionId: string, - ) => Promise<{ id: string; alreadyExists?: boolean }>; - addBookmarkToLists: (input: { - bookmarkId: string; - listIds: string[]; - }) => Promise<void>; - updateBookmarkTags: (input: { - bookmarkId: string; - tags: string[]; + stageImportedBookmarks: (input: { + importSessionId: string; + bookmarks: StagedBookmark[]; }) => Promise<void>; createImportSession: (input: { name: string; rootListId: string; }) => Promise<{ id: string }>; + finalizeImportStaging: (sessionId: string) => Promise<void>; } export interface ImportOptions { @@ -62,7 +65,7 @@ export async function importBookmarksFromFile( }, options: ImportOptions = {}, ): Promise<ImportResult> { - const { concurrencyLimit = 20, parsers } = options; + const { parsers } = options; const textContent = await file.text(); const parsedBookmarks = parsers?.[source] @@ -120,50 +123,74 @@ export async function importBookmarksFromFile( pathMap[pathKey] = folderList.id; } - let done = 0; - const importPromises = parsedBookmarks.map((bookmark) => async () => { - try { - const listIds = bookmark.paths.map( - (path) => pathMap[path.join(PATH_DELIMITER)] || rootList.id, - ); - if (listIds.length === 0) listIds.push(rootList.id); - - const created = await deps.createBookmark(bookmark, session.id); - await deps.addBookmarkToLists({ bookmarkId: created.id, listIds }); - if (bookmark.tags && bookmark.tags.length > 0) { - await deps.updateBookmarkTags({ - bookmarkId: created.id, - tags: bookmark.tags, - }); + // Prepare all bookmarks for staging + const bookmarksToStage: StagedBookmark[] = parsedBookmarks.map((bookmark) => { + // Convert paths to list IDs using pathMap + // If no paths, assign to root list + const listIds = + bookmark.paths.length === 0 + ? [rootList.id] + : bookmark.paths + .map((path) => { + if (path.length === 0) { + return rootList.id; + } + const pathKey = path.join(PATH_DELIMITER); + return pathMap[pathKey] || rootList.id; + }) + .filter((id, index, arr) => arr.indexOf(id) === index); // dedupe + + // Determine type and extract content appropriately + let type: "link" | "text" | "asset" = "link"; + let url: string | undefined; + let textContent: string | undefined; + + if (bookmark.content) { + if (bookmark.content.type === "link") { + type = "link"; + url = bookmark.content.url; + } else if (bookmark.content.type === "text") { + type = "text"; + textContent = bookmark.content.text; } - - return created; - } finally { - done += 1; - onProgress?.(done, parsedBookmarks.length); } - }); - const resultsPromises = limitConcurrency(importPromises, concurrencyLimit); - const results = await Promise.allSettled(resultsPromises); + return { + type, + url, + title: bookmark.title, + content: textContent, + note: bookmark.notes, + tags: bookmark.tags ?? [], + listIds, + sourceAddedAt: bookmark.addDate + ? new Date(bookmark.addDate * 1000) + : undefined, + }; + }); - let successes = 0; - let failures = 0; - let alreadyExisted = 0; + // Stage bookmarks in batches of 50 + const BATCH_SIZE = 50; + let staged = 0; - for (const r of results) { - if (r.status === "fulfilled") { - if (r.value.alreadyExists) alreadyExisted++; - else successes++; - } else { - failures++; - } + for (let i = 0; i < bookmarksToStage.length; i += BATCH_SIZE) { + const batch = bookmarksToStage.slice(i, i + BATCH_SIZE); + await deps.stageImportedBookmarks({ + importSessionId: session.id, + bookmarks: batch, + }); + staged += batch.length; + onProgress?.(staged, parsedBookmarks.length); } + + // Finalize staging - marks session as "pending" for worker pickup + await deps.finalizeImportStaging(session.id); + return { counts: { - successes, - failures, - alreadyExisted, + successes: 0, + failures: 0, + alreadyExisted: 0, total: parsedBookmarks.length, }, rootListId: rootList.id, |
