From 86a4b3966504507afd6c3adbb6a1246cafd39d83 Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sat, 29 Nov 2025 14:53:31 +0000 Subject: feat: Add automated bookmark backup feature (#2182) * feat: Add automated bookmark backup system Implements a comprehensive automated backup feature for user bookmarks with the following capabilities: Database Schema: - Add backupSettings table to store user backup preferences (enabled, frequency, retention) - Add backups table to track backup records with status and metadata - Add BACKUP asset type for storing compressed backup files - Add migration 0066_add_backup_tables.sql Background Workers: - Implement BackupSchedulingWorker cron job (runs daily at midnight UTC) - Create BackupWorker to process individual backup jobs - Deterministic scheduling spreads backup jobs across 24 hours based on user ID hash - Support for daily and weekly backup frequencies - Automated retention cleanup to delete old backups based on user settings Export & Compression: - Reuse existing export functionality for bookmark data - Compress exports using Node.js built-in zlib (gzip level 9) - Store compressed backups as assets with proper metadata - Track backup size and bookmark count for statistics tRPC API: - backups.getSettings - Retrieve user backup configuration - backups.updateSettings - Update backup preferences - backups.list - List all user backups with metadata - backups.get - Get specific backup details - backups.delete - Delete a backup - backups.download - Download backup file (base64 encoded) - backups.triggerBackup - Manually trigger backup creation UI Components: - BackupSettings component with configuration form - Enable/disable automatic backups toggle - Frequency selection (daily/weekly) - Retention period configuration (1-365 days) - Backup list table with download and delete actions - Manual backup trigger button - Display backup stats (size, bookmark count, status) - Added backups page to settings navigation Technical Details: - Uses Restate queue system for distributed job processing - Implements idempotency keys to prevent duplicate backups - Background worker concurrency: 2 jobs at a time - 10-minute timeout for large backup exports - Proper error handling and logging throughout - Type-safe implementation with Zod schemas * refactor: simplify backup settings and asset handling - Move backup settings from separate table to user table columns - Update BackupSettings model to use static methods with users table - Remove download mutation in favor of direct asset links - Implement proper quota checks using QuotaService.checkStorageQuota - Update UI to use new property names and direct asset downloads - Update shared types to match new schema Key changes: - backupSettingsTable removed, settings now in users table - Backup downloads use direct /api/assets/{id} links - Quota properly validated before creating backup assets - Cleaner separation of concerns in tRPC models * migration * use zip instead of gzip * fix drizzle * fix settings * streaming json * remove more dead code * add e2e tests * return backup * poll for backups * more fixes * more fixes * fix test * fix UI * fix delete asset * fix ui * redirect for backup download * cleanups * fix idempotency * fix tests * add ratelimit * add error handling for background backups * i18n * model changes --------- Co-authored-by: Claude --- packages/trpc/lib/attachments.ts | 1 + packages/trpc/models/backups.ts | 172 ++++++++++++++++++++++++++++++++++++ packages/trpc/models/users.ts | 9 ++ packages/trpc/routers/_app.ts | 2 + packages/trpc/routers/backups.ts | 54 +++++++++++ packages/trpc/routers/users.test.ts | 9 ++ 6 files changed, 247 insertions(+) create mode 100644 packages/trpc/models/backups.ts create mode 100644 packages/trpc/routers/backups.ts (limited to 'packages/trpc') diff --git a/packages/trpc/lib/attachments.ts b/packages/trpc/lib/attachments.ts index 7a4e2668..25d9be94 100644 --- a/packages/trpc/lib/attachments.ts +++ b/packages/trpc/lib/attachments.ts @@ -17,6 +17,7 @@ export function mapDBAssetTypeToUserType(assetType: AssetTypes): ZAssetType { [AssetTypes.LINK_HTML_CONTENT]: "linkHtmlContent", [AssetTypes.BOOKMARK_ASSET]: "bookmarkAsset", [AssetTypes.USER_UPLOADED]: "userUploaded", + [AssetTypes.BACKUP]: "unknown", // Backups are not displayed as regular assets [AssetTypes.UNKNOWN]: "bannerImage", }; return map[assetType]; diff --git a/packages/trpc/models/backups.ts b/packages/trpc/models/backups.ts new file mode 100644 index 00000000..c7ab99ba --- /dev/null +++ b/packages/trpc/models/backups.ts @@ -0,0 +1,172 @@ +import { TRPCError } from "@trpc/server"; +import { and, desc, eq, lt } from "drizzle-orm"; +import { z } from "zod"; + +import { assets, backupsTable } from "@karakeep/db/schema"; +import { BackupQueue } from "@karakeep/shared-server"; +import { deleteAsset } from "@karakeep/shared/assetdb"; +import { zBackupSchema } from "@karakeep/shared/types/backups"; + +import { AuthedContext } from ".."; + +export class Backup { + private constructor( + private ctx: AuthedContext, + private backup: z.infer, + ) {} + + static async fromId(ctx: AuthedContext, backupId: string): Promise { + const backup = await ctx.db.query.backupsTable.findFirst({ + where: and( + eq(backupsTable.id, backupId), + eq(backupsTable.userId, ctx.user.id), + ), + }); + + if (!backup) { + throw new TRPCError({ + code: "NOT_FOUND", + message: "Backup not found", + }); + } + + return new Backup(ctx, backup); + } + + private static fromData( + ctx: AuthedContext, + backup: z.infer, + ): Backup { + return new Backup(ctx, backup); + } + + static async getAll(ctx: AuthedContext): Promise { + const backups = await ctx.db.query.backupsTable.findMany({ + where: eq(backupsTable.userId, ctx.user.id), + orderBy: [desc(backupsTable.createdAt)], + }); + + return backups.map((b) => new Backup(ctx, b)); + } + + static async create(ctx: AuthedContext): Promise { + const [backup] = await ctx.db + .insert(backupsTable) + .values({ + userId: ctx.user.id, + size: 0, + bookmarkCount: 0, + status: "pending", + }) + .returning(); + return new Backup(ctx, backup!); + } + + async triggerBackgroundJob({ + delayMs, + idempotencyKey, + }: { delayMs?: number; idempotencyKey?: string } = {}): Promise { + await BackupQueue.enqueue( + { + userId: this.ctx.user.id, + backupId: this.backup.id, + }, + { + delayMs, + idempotencyKey, + }, + ); + } + + /** + * Generic update method for backup records + */ + async update( + data: Partial<{ + size: number; + bookmarkCount: number; + status: "pending" | "success" | "failure"; + assetId: string | null; + errorMessage: string | null; + }>, + ): Promise { + await this.ctx.db + .update(backupsTable) + .set(data) + .where( + and( + eq(backupsTable.id, this.backup.id), + eq(backupsTable.userId, this.ctx.user.id), + ), + ); + + // Update local state + this.backup = { ...this.backup, ...data }; + } + + async delete(): Promise { + if (this.backup.assetId) { + // Delete asset + await deleteAsset({ + userId: this.ctx.user.id, + assetId: this.backup.assetId, + }); + } + + await this.ctx.db.transaction(async (db) => { + // Delete asset first + if (this.backup.assetId) { + await db + .delete(assets) + .where( + and( + eq(assets.id, this.backup.assetId), + eq(assets.userId, this.ctx.user.id), + ), + ); + } + + // Delete backup record + await db + .delete(backupsTable) + .where( + and( + eq(backupsTable.id, this.backup.id), + eq(backupsTable.userId, this.ctx.user.id), + ), + ); + }); + } + + /** + * Finds backups older than the retention period + */ + static async findOldBackups( + ctx: AuthedContext, + retentionDays: number, + ): Promise { + const cutoffDate = new Date(); + cutoffDate.setDate(cutoffDate.getDate() - retentionDays); + + const oldBackups = await ctx.db.query.backupsTable.findMany({ + where: and( + eq(backupsTable.userId, ctx.user.id), + lt(backupsTable.createdAt, cutoffDate), + ), + }); + + return oldBackups.map((backup) => Backup.fromData(ctx, backup)); + } + + asPublic(): z.infer { + return this.backup; + } + + get id() { + return this.backup.id; + } + + get assetId() { + return this.backup.assetId; + } +} diff --git a/packages/trpc/models/users.ts b/packages/trpc/models/users.ts index 97b062f0..a1f32f02 100644 --- a/packages/trpc/models/users.ts +++ b/packages/trpc/models/users.ts @@ -430,6 +430,9 @@ export class User { bookmarkClickAction: true, archiveDisplayBehaviour: true, timezone: true, + backupsEnabled: true, + backupsFrequency: true, + backupsRetentionDays: true, }, }); @@ -444,6 +447,9 @@ export class User { bookmarkClickAction: settings.bookmarkClickAction, archiveDisplayBehaviour: settings.archiveDisplayBehaviour, timezone: settings.timezone || "UTC", + backupsEnabled: settings.backupsEnabled, + backupsFrequency: settings.backupsFrequency, + backupsRetentionDays: settings.backupsRetentionDays, }; } @@ -463,6 +469,9 @@ export class User { bookmarkClickAction: input.bookmarkClickAction, archiveDisplayBehaviour: input.archiveDisplayBehaviour, timezone: input.timezone, + backupsEnabled: input.backupsEnabled, + backupsFrequency: input.backupsFrequency, + backupsRetentionDays: input.backupsRetentionDays, }) .where(eq(users.id, this.user.id)); } diff --git a/packages/trpc/routers/_app.ts b/packages/trpc/routers/_app.ts index 1d548ee4..bae69130 100644 --- a/packages/trpc/routers/_app.ts +++ b/packages/trpc/routers/_app.ts @@ -2,6 +2,7 @@ import { router } from "../index"; import { adminAppRouter } from "./admin"; import { apiKeysAppRouter } from "./apiKeys"; import { assetsAppRouter } from "./assets"; +import { backupsAppRouter } from "./backups"; import { bookmarksAppRouter } from "./bookmarks"; import { feedsAppRouter } from "./feeds"; import { highlightsAppRouter } from "./highlights"; @@ -25,6 +26,7 @@ export const appRouter = router({ prompts: promptsAppRouter, admin: adminAppRouter, feeds: feedsAppRouter, + backups: backupsAppRouter, highlights: highlightsAppRouter, importSessions: importSessionsRouter, webhooks: webhooksAppRouter, diff --git a/packages/trpc/routers/backups.ts b/packages/trpc/routers/backups.ts new file mode 100644 index 00000000..7a7a9896 --- /dev/null +++ b/packages/trpc/routers/backups.ts @@ -0,0 +1,54 @@ +import { z } from "zod"; + +import { zBackupSchema } from "@karakeep/shared/types/backups"; + +import { authedProcedure, createRateLimitMiddleware, router } from "../index"; +import { Backup } from "../models/backups"; + +export const backupsAppRouter = router({ + list: authedProcedure + .output(z.object({ backups: z.array(zBackupSchema) })) + .query(async ({ ctx }) => { + const backups = await Backup.getAll(ctx); + return { backups: backups.map((b) => b.asPublic()) }; + }), + + get: authedProcedure + .input( + z.object({ + backupId: z.string(), + }), + ) + .output(zBackupSchema) + .query(async ({ ctx, input }) => { + const backup = await Backup.fromId(ctx, input.backupId); + return backup.asPublic(); + }), + + delete: authedProcedure + .input( + z.object({ + backupId: z.string(), + }), + ) + .mutation(async ({ input, ctx }) => { + const backup = await Backup.fromId(ctx, input.backupId); + await backup.delete(); + }), + + triggerBackup: authedProcedure + .use( + createRateLimitMiddleware({ + name: "backups.triggerBackup", + windowMs: 60 * 60 * 1000, // 1 hour window + maxRequests: 5, // Max 5 backup triggers per hour + }), + ) + .output(zBackupSchema) + .mutation(async ({ ctx }) => { + const backup = await Backup.create(ctx); + await backup.triggerBackgroundJob(); + + return backup.asPublic(); + }), +}); diff --git a/packages/trpc/routers/users.test.ts b/packages/trpc/routers/users.test.ts index 3b16e1a4..a2f2be9f 100644 --- a/packages/trpc/routers/users.test.ts +++ b/packages/trpc/routers/users.test.ts @@ -155,11 +155,17 @@ describe("User Routes", () => { bookmarkClickAction: "open_original_link", archiveDisplayBehaviour: "show", timezone: "UTC", + backupsEnabled: false, + backupsFrequency: "weekly", + backupsRetentionDays: 30, }); // Update settings await caller.users.updateSettings({ bookmarkClickAction: "expand_bookmark_preview", + backupsEnabled: true, + backupsFrequency: "daily", + backupsRetentionDays: 7, }); // Verify updated settings @@ -168,6 +174,9 @@ describe("User Routes", () => { bookmarkClickAction: "expand_bookmark_preview", archiveDisplayBehaviour: "show", timezone: "UTC", + backupsEnabled: true, + backupsFrequency: "daily", + backupsRetentionDays: 7, }); // Test invalid update (e.g., empty input, if schema enforces it) -- cgit v1.2.3-70-g09d2