aboutsummaryrefslogtreecommitdiffstats
path: root/packages
diff options
context:
space:
mode:
authorMohamedBassem <me@mbassem.com>2024-10-12 16:47:22 +0000
committerMohamedBassem <me@mbassem.com>2024-10-12 17:37:42 +0000
commitc16173ea0fdbf6cc47b13756c0a77e8399669055 (patch)
tree6b3ecd073259176059386eb16c6635e4699d26a3 /packages
parent9f87207d668fbe0a2039c63803128fbe5916f993 (diff)
downloadkarakeep-c16173ea0fdbf6cc47b13756c0a77e8399669055.tar.zst
feature: Introduce a mechanism to cleanup dangling assets
Diffstat (limited to 'packages')
-rw-r--r--packages/shared/assetdb.ts42
-rw-r--r--packages/shared/package.json1
-rw-r--r--packages/shared/queues.ts16
-rw-r--r--packages/trpc/routers/admin.ts65
4 files changed, 122 insertions, 2 deletions
diff --git a/packages/shared/assetdb.ts b/packages/shared/assetdb.ts
index 4edfa1ec..64413e9f 100644
--- a/packages/shared/assetdb.ts
+++ b/packages/shared/assetdb.ts
@@ -1,5 +1,6 @@
import * as fs from "fs";
import * as path from "path";
+import { Glob } from "glob";
import { z } from "zod";
import serverConfig from "./config";
@@ -120,6 +121,25 @@ export async function readAsset({
return { asset, metadata };
}
+export async function readAssetMetadata({
+ userId,
+ assetId,
+}: {
+ userId: string;
+ assetId: string;
+}) {
+ const assetDir = getAssetDir(userId, assetId);
+
+ const metadataStr = await fs.promises.readFile(
+ path.join(assetDir, "metadata.json"),
+ {
+ encoding: "utf8",
+ },
+ );
+
+ return zAssetMetadataSchema.parse(JSON.parse(metadataStr));
+}
+
export async function getAssetSize({
userId,
assetId,
@@ -154,3 +174,25 @@ export async function deleteUserAssets({ userId }: { userId: string }) {
}
await fs.promises.rm(userDir, { recursive: true });
}
+
+export async function* getAllAssets() {
+ const g = new Glob(`/**/**/asset.bin`, {
+ maxDepth: 3,
+ root: ROOT_PATH,
+ cwd: ROOT_PATH,
+ absolute: false,
+ });
+ for await (const file of g) {
+ const [userId, assetId] = file.split("/").slice(0, 2);
+ const [size, metadata] = await Promise.all([
+ getAssetSize({ userId, assetId }),
+ readAssetMetadata({ userId, assetId }),
+ ]);
+ yield {
+ userId,
+ assetId,
+ ...metadata,
+ size,
+ };
+ }
+}
diff --git a/packages/shared/package.json b/packages/shared/package.json
index 2b1ae973..69d93075 100644
--- a/packages/shared/package.json
+++ b/packages/shared/package.json
@@ -6,6 +6,7 @@
"type": "module",
"dependencies": {
"@hoarder/queue": "workspace:^0.1.0",
+ "glob": "^11.0.0",
"meilisearch": "^0.37.0",
"winston": "^3.11.0",
"zod": "^3.22.4"
diff --git a/packages/shared/queues.ts b/packages/shared/queues.ts
index cadeefd0..6b04b988 100644
--- a/packages/shared/queues.ts
+++ b/packages/shared/queues.ts
@@ -65,6 +65,22 @@ export const SearchIndexingQueue = new SqliteQueue<ZSearchIndexingRequest>(
},
);
+// Tidy Assets Worker
+export const zTidyAssetsRequestSchema = z.object({
+ cleanDanglingAssets: z.boolean().optional().default(false),
+ syncAssetMetadata: z.boolean().optional().default(false),
+});
+export type ZTidyAssetsRequest = z.infer<typeof zTidyAssetsRequestSchema>;
+export const TidyAssetsQueue = new SqliteQueue<ZTidyAssetsRequest>(
+ "tidy_assets_queue",
+ queueDB,
+ {
+ defaultJobArgs: {
+ numRetries: 1,
+ },
+ },
+);
+
export async function triggerSearchReindex(bookmarkId: string) {
await SearchIndexingQueue.enqueue({
bookmarkId,
diff --git a/packages/trpc/routers/admin.ts b/packages/trpc/routers/admin.ts
index b3fb2383..ff1249d0 100644
--- a/packages/trpc/routers/admin.ts
+++ b/packages/trpc/routers/admin.ts
@@ -1,11 +1,12 @@
-import { count, eq } from "drizzle-orm";
+import { count, eq, sum } from "drizzle-orm";
import { z } from "zod";
-import { bookmarkLinks, bookmarks, users } from "@hoarder/db/schema";
+import { assets, bookmarkLinks, bookmarks, users } from "@hoarder/db/schema";
import {
LinkCrawlerQueue,
OpenAIQueue,
SearchIndexingQueue,
+ TidyAssetsQueue,
triggerSearchReindex,
} from "@hoarder/shared/queues";
@@ -30,6 +31,9 @@ export const adminAppRouter = router({
indexingStats: z.object({
queued: z.number(),
}),
+ tidyAssetsStats: z.object({
+ queued: z.number(),
+ }),
}),
)
.query(async ({ ctx }) => {
@@ -49,6 +53,9 @@ export const adminAppRouter = router({
queuedInferences,
[{ value: pendingInference }],
[{ value: failedInference }],
+
+ // Tidy Assets
+ queuedTidyAssets,
] = await Promise.all([
ctx.db.select({ value: count() }).from(users),
ctx.db.select({ value: count() }).from(bookmarks),
@@ -77,6 +84,9 @@ export const adminAppRouter = router({
.select({ value: count() })
.from(bookmarks)
.where(eq(bookmarks.taggingStatus, "failure")),
+
+ // Tidy Assets
+ TidyAssetsQueue.stats(),
]);
return {
@@ -95,6 +105,9 @@ export const adminAppRouter = router({
indexingStats: {
queued: queuedIndexing.pending + queuedIndexing.pending_retry,
},
+ tidyAssetsStats: {
+ queued: queuedTidyAssets.pending + queuedTidyAssets.pending_retry,
+ },
};
}),
recrawlLinks: adminProcedure
@@ -143,4 +156,52 @@ export const adminAppRouter = router({
bookmarkIds.map((b) => OpenAIQueue.enqueue({ bookmarkId: b.id })),
);
}),
+ tidyAssets: adminProcedure.mutation(async () => {
+ await TidyAssetsQueue.enqueue({
+ cleanDanglingAssets: true,
+ syncAssetMetadata: true,
+ });
+ }),
+ userStats: adminProcedure
+ .output(
+ z.record(
+ z.string(),
+ z.object({
+ numBookmarks: z.number(),
+ assetSizes: z.number(),
+ }),
+ ),
+ )
+ .query(async ({ ctx }) => {
+ const [userIds, bookmarkStats, assetStats] = await Promise.all([
+ ctx.db.select({ id: users.id }).from(users),
+ ctx.db
+ .select({ id: bookmarks.userId, value: count() })
+ .from(bookmarks)
+ .groupBy(bookmarks.userId),
+ ctx.db
+ .select({ id: assets.userId, value: sum(assets.size) })
+ .from(assets)
+ .groupBy(assets.userId),
+ ]);
+
+ const results: Record<
+ string,
+ { numBookmarks: number; assetSizes: number }
+ > = {};
+ for (const user of userIds) {
+ results[user.id] = {
+ numBookmarks: 0,
+ assetSizes: 0,
+ };
+ }
+ for (const stat of bookmarkStats) {
+ results[stat.id].numBookmarks = stat.value;
+ }
+ for (const stat of assetStats) {
+ results[stat.id].assetSizes = parseInt(stat.value ?? "0");
+ }
+
+ return results;
+ }),
});