aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMohamed Bassem <me@mbassem.com>2026-02-08 01:33:48 +0000
committerGitHub <noreply@github.com>2026-02-08 01:33:48 +0000
commitb2640803745636e87e42f5460759093f49df9d04 (patch)
tree965f020a5c81b1805c19b2a2b8a3a434d06b1e1e
parent9e5693c6e4b410d1af05cc3d50c89ff73f21e060 (diff)
downloadkarakeep-b2640803745636e87e42f5460759093f49df9d04.tar.zst
feat(metrics): add prometheus metric for bookmark crawl latency (#2461)
Track the time from bookmark creation to crawl completion as a histogram (karakeep_bookmark_crawl_latency_seconds). This measures the end-to-end latency users experience when adding bookmarks via extension, web, etc. Excludes recrawls (crawledAt already set) and imports (low priority jobs). https://claude.ai/code/session_019jTGGXGWzK9C5aTznQhdgz Co-authored-by: Claude <noreply@anthropic.com>
-rw-r--r--apps/workers/metrics.ts12
-rw-r--r--apps/workers/workerUtils.ts2
-rw-r--r--apps/workers/workers/crawlerWorker.ts16
3 files changed, 28 insertions, 2 deletions
diff --git a/apps/workers/metrics.ts b/apps/workers/metrics.ts
index c16c7bd4..42b5aa46 100644
--- a/apps/workers/metrics.ts
+++ b/apps/workers/metrics.ts
@@ -1,5 +1,5 @@
import { prometheus } from "@hono/prometheus";
-import { Counter, Registry } from "prom-client";
+import { Counter, Histogram, Registry } from "prom-client";
export const registry = new Registry();
@@ -21,5 +21,15 @@ export const crawlerStatusCodeCounter = new Counter({
labelNames: ["status_code"],
});
+export const bookmarkCrawlLatencyHistogram = new Histogram({
+ name: "karakeep_bookmark_crawl_latency_seconds",
+ help: "Latency from bookmark creation to crawl completion (excludes recrawls and imports)",
+ buckets: [
+ 0.1, 0.25, 0.5, 1, 2.5, 5, 7.5, 10, 15, 20, 30, 45, 60, 90, 120, 180, 300,
+ 600, 900, 1200,
+ ],
+});
+
registry.registerMetric(workerStatsCounter);
registry.registerMetric(crawlerStatusCodeCounter);
+registry.registerMetric(bookmarkCrawlLatencyHistogram);
diff --git a/apps/workers/workerUtils.ts b/apps/workers/workerUtils.ts
index a99f2103..48e3b277 100644
--- a/apps/workers/workerUtils.ts
+++ b/apps/workers/workerUtils.ts
@@ -31,6 +31,8 @@ export async function getBookmarkDetails(bookmarkId: string) {
return {
url: bookmark.link.url,
userId: bookmark.userId,
+ createdAt: bookmark.createdAt,
+ crawledAt: bookmark.link.crawledAt,
screenshotAssetId: bookmark.assets.find(
(a) => a.assetType == AssetTypes.LINK_SCREENSHOT,
)?.id,
diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts
index d3c20b7c..48ea5352 100644
--- a/apps/workers/workers/crawlerWorker.ts
+++ b/apps/workers/workers/crawlerWorker.ts
@@ -27,7 +27,11 @@ import metascraperTitle from "metascraper-title";
import metascraperUrl from "metascraper-url";
import metascraperX from "metascraper-x";
import metascraperYoutube from "metascraper-youtube";
-import { crawlerStatusCodeCounter, workerStatsCounter } from "metrics";
+import {
+ bookmarkCrawlLatencyHistogram,
+ crawlerStatusCodeCounter,
+ workerStatsCounter,
+} from "metrics";
import {
fetchWithProxy,
getBookmarkDomain,
@@ -1709,6 +1713,8 @@ async function runCrawler(
const {
url,
userId,
+ createdAt,
+ crawledAt,
screenshotAssetId: oldScreenshotAssetId,
pdfAssetId: oldPdfAssetId,
imageAssetId: oldImageAssetId,
@@ -1812,5 +1818,13 @@ async function runCrawler(
// Do the archival as a separate last step as it has the potential for failure
await archivalLogic();
}
+
+ // Record the latency from bookmark creation to crawl completion.
+ // Only for first-time, high-priority crawls (excludes recrawls and imports).
+ if (crawledAt === null && job.priority === 0) {
+ const latencySeconds = (Date.now() - createdAt.getTime()) / 1000;
+ bookmarkCrawlLatencyHistogram.observe(latencySeconds);
+ }
+
return { status: "completed" };
}