diff options
| author | MohamedBassem <me@mbassem.com> | 2024-10-12 16:47:22 +0000 |
|---|---|---|
| committer | MohamedBassem <me@mbassem.com> | 2024-10-12 17:37:42 +0000 |
| commit | c16173ea0fdbf6cc47b13756c0a77e8399669055 (patch) | |
| tree | 6b3ecd073259176059386eb16c6635e4699d26a3 /packages | |
| parent | 9f87207d668fbe0a2039c63803128fbe5916f993 (diff) | |
| download | karakeep-c16173ea0fdbf6cc47b13756c0a77e8399669055.tar.zst | |
feature: Introduce a mechanism to cleanup dangling assets
Diffstat (limited to 'packages')
| -rw-r--r-- | packages/shared/assetdb.ts | 42 | ||||
| -rw-r--r-- | packages/shared/package.json | 1 | ||||
| -rw-r--r-- | packages/shared/queues.ts | 16 | ||||
| -rw-r--r-- | packages/trpc/routers/admin.ts | 65 |
4 files changed, 122 insertions, 2 deletions
diff --git a/packages/shared/assetdb.ts b/packages/shared/assetdb.ts index 4edfa1ec..64413e9f 100644 --- a/packages/shared/assetdb.ts +++ b/packages/shared/assetdb.ts @@ -1,5 +1,6 @@ import * as fs from "fs"; import * as path from "path"; +import { Glob } from "glob"; import { z } from "zod"; import serverConfig from "./config"; @@ -120,6 +121,25 @@ export async function readAsset({ return { asset, metadata }; } +export async function readAssetMetadata({ + userId, + assetId, +}: { + userId: string; + assetId: string; +}) { + const assetDir = getAssetDir(userId, assetId); + + const metadataStr = await fs.promises.readFile( + path.join(assetDir, "metadata.json"), + { + encoding: "utf8", + }, + ); + + return zAssetMetadataSchema.parse(JSON.parse(metadataStr)); +} + export async function getAssetSize({ userId, assetId, @@ -154,3 +174,25 @@ export async function deleteUserAssets({ userId }: { userId: string }) { } await fs.promises.rm(userDir, { recursive: true }); } + +export async function* getAllAssets() { + const g = new Glob(`/**/**/asset.bin`, { + maxDepth: 3, + root: ROOT_PATH, + cwd: ROOT_PATH, + absolute: false, + }); + for await (const file of g) { + const [userId, assetId] = file.split("/").slice(0, 2); + const [size, metadata] = await Promise.all([ + getAssetSize({ userId, assetId }), + readAssetMetadata({ userId, assetId }), + ]); + yield { + userId, + assetId, + ...metadata, + size, + }; + } +} diff --git a/packages/shared/package.json b/packages/shared/package.json index 2b1ae973..69d93075 100644 --- a/packages/shared/package.json +++ b/packages/shared/package.json @@ -6,6 +6,7 @@ "type": "module", "dependencies": { "@hoarder/queue": "workspace:^0.1.0", + "glob": "^11.0.0", "meilisearch": "^0.37.0", "winston": "^3.11.0", "zod": "^3.22.4" diff --git a/packages/shared/queues.ts b/packages/shared/queues.ts index cadeefd0..6b04b988 100644 --- a/packages/shared/queues.ts +++ b/packages/shared/queues.ts @@ -65,6 +65,22 @@ export const SearchIndexingQueue = new SqliteQueue<ZSearchIndexingRequest>( }, ); +// Tidy Assets Worker +export const zTidyAssetsRequestSchema = z.object({ + cleanDanglingAssets: z.boolean().optional().default(false), + syncAssetMetadata: z.boolean().optional().default(false), +}); +export type ZTidyAssetsRequest = z.infer<typeof zTidyAssetsRequestSchema>; +export const TidyAssetsQueue = new SqliteQueue<ZTidyAssetsRequest>( + "tidy_assets_queue", + queueDB, + { + defaultJobArgs: { + numRetries: 1, + }, + }, +); + export async function triggerSearchReindex(bookmarkId: string) { await SearchIndexingQueue.enqueue({ bookmarkId, diff --git a/packages/trpc/routers/admin.ts b/packages/trpc/routers/admin.ts index b3fb2383..ff1249d0 100644 --- a/packages/trpc/routers/admin.ts +++ b/packages/trpc/routers/admin.ts @@ -1,11 +1,12 @@ -import { count, eq } from "drizzle-orm"; +import { count, eq, sum } from "drizzle-orm"; import { z } from "zod"; -import { bookmarkLinks, bookmarks, users } from "@hoarder/db/schema"; +import { assets, bookmarkLinks, bookmarks, users } from "@hoarder/db/schema"; import { LinkCrawlerQueue, OpenAIQueue, SearchIndexingQueue, + TidyAssetsQueue, triggerSearchReindex, } from "@hoarder/shared/queues"; @@ -30,6 +31,9 @@ export const adminAppRouter = router({ indexingStats: z.object({ queued: z.number(), }), + tidyAssetsStats: z.object({ + queued: z.number(), + }), }), ) .query(async ({ ctx }) => { @@ -49,6 +53,9 @@ export const adminAppRouter = router({ queuedInferences, [{ value: pendingInference }], [{ value: failedInference }], + + // Tidy Assets + queuedTidyAssets, ] = await Promise.all([ ctx.db.select({ value: count() }).from(users), ctx.db.select({ value: count() }).from(bookmarks), @@ -77,6 +84,9 @@ export const adminAppRouter = router({ .select({ value: count() }) .from(bookmarks) .where(eq(bookmarks.taggingStatus, "failure")), + + // Tidy Assets + TidyAssetsQueue.stats(), ]); return { @@ -95,6 +105,9 @@ export const adminAppRouter = router({ indexingStats: { queued: queuedIndexing.pending + queuedIndexing.pending_retry, }, + tidyAssetsStats: { + queued: queuedTidyAssets.pending + queuedTidyAssets.pending_retry, + }, }; }), recrawlLinks: adminProcedure @@ -143,4 +156,52 @@ export const adminAppRouter = router({ bookmarkIds.map((b) => OpenAIQueue.enqueue({ bookmarkId: b.id })), ); }), + tidyAssets: adminProcedure.mutation(async () => { + await TidyAssetsQueue.enqueue({ + cleanDanglingAssets: true, + syncAssetMetadata: true, + }); + }), + userStats: adminProcedure + .output( + z.record( + z.string(), + z.object({ + numBookmarks: z.number(), + assetSizes: z.number(), + }), + ), + ) + .query(async ({ ctx }) => { + const [userIds, bookmarkStats, assetStats] = await Promise.all([ + ctx.db.select({ id: users.id }).from(users), + ctx.db + .select({ id: bookmarks.userId, value: count() }) + .from(bookmarks) + .groupBy(bookmarks.userId), + ctx.db + .select({ id: assets.userId, value: sum(assets.size) }) + .from(assets) + .groupBy(assets.userId), + ]); + + const results: Record< + string, + { numBookmarks: number; assetSizes: number } + > = {}; + for (const user of userIds) { + results[user.id] = { + numBookmarks: 0, + assetSizes: 0, + }; + } + for (const stat of bookmarkStats) { + results[stat.id].numBookmarks = stat.value; + } + for (const stat of assetStats) { + results[stat.id].assetSizes = parseInt(stat.value ?? "0"); + } + + return results; + }), }); |
