From b2640803745636e87e42f5460759093f49df9d04 Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sun, 8 Feb 2026 01:33:48 +0000 Subject: feat(metrics): add prometheus metric for bookmark crawl latency (#2461) Track the time from bookmark creation to crawl completion as a histogram (karakeep_bookmark_crawl_latency_seconds). This measures the end-to-end latency users experience when adding bookmarks via extension, web, etc. Excludes recrawls (crawledAt already set) and imports (low priority jobs). https://claude.ai/code/session_019jTGGXGWzK9C5aTznQhdgz Co-authored-by: Claude --- apps/workers/metrics.ts | 12 +++++++++++- apps/workers/workerUtils.ts | 2 ++ apps/workers/workers/crawlerWorker.ts | 16 +++++++++++++++- 3 files changed, 28 insertions(+), 2 deletions(-) (limited to 'apps/workers') diff --git a/apps/workers/metrics.ts b/apps/workers/metrics.ts index c16c7bd4..42b5aa46 100644 --- a/apps/workers/metrics.ts +++ b/apps/workers/metrics.ts @@ -1,5 +1,5 @@ import { prometheus } from "@hono/prometheus"; -import { Counter, Registry } from "prom-client"; +import { Counter, Histogram, Registry } from "prom-client"; export const registry = new Registry(); @@ -21,5 +21,15 @@ export const crawlerStatusCodeCounter = new Counter({ labelNames: ["status_code"], }); +export const bookmarkCrawlLatencyHistogram = new Histogram({ + name: "karakeep_bookmark_crawl_latency_seconds", + help: "Latency from bookmark creation to crawl completion (excludes recrawls and imports)", + buckets: [ + 0.1, 0.25, 0.5, 1, 2.5, 5, 7.5, 10, 15, 20, 30, 45, 60, 90, 120, 180, 300, + 600, 900, 1200, + ], +}); + registry.registerMetric(workerStatsCounter); registry.registerMetric(crawlerStatusCodeCounter); +registry.registerMetric(bookmarkCrawlLatencyHistogram); diff --git a/apps/workers/workerUtils.ts b/apps/workers/workerUtils.ts index a99f2103..48e3b277 100644 --- a/apps/workers/workerUtils.ts +++ b/apps/workers/workerUtils.ts @@ -31,6 +31,8 @@ export async function getBookmarkDetails(bookmarkId: string) { return { url: bookmark.link.url, userId: bookmark.userId, + createdAt: bookmark.createdAt, + crawledAt: bookmark.link.crawledAt, screenshotAssetId: bookmark.assets.find( (a) => a.assetType == AssetTypes.LINK_SCREENSHOT, )?.id, diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts index d3c20b7c..48ea5352 100644 --- a/apps/workers/workers/crawlerWorker.ts +++ b/apps/workers/workers/crawlerWorker.ts @@ -27,7 +27,11 @@ import metascraperTitle from "metascraper-title"; import metascraperUrl from "metascraper-url"; import metascraperX from "metascraper-x"; import metascraperYoutube from "metascraper-youtube"; -import { crawlerStatusCodeCounter, workerStatsCounter } from "metrics"; +import { + bookmarkCrawlLatencyHistogram, + crawlerStatusCodeCounter, + workerStatsCounter, +} from "metrics"; import { fetchWithProxy, getBookmarkDomain, @@ -1709,6 +1713,8 @@ async function runCrawler( const { url, userId, + createdAt, + crawledAt, screenshotAssetId: oldScreenshotAssetId, pdfAssetId: oldPdfAssetId, imageAssetId: oldImageAssetId, @@ -1812,5 +1818,13 @@ async function runCrawler( // Do the archival as a separate last step as it has the potential for failure await archivalLogic(); } + + // Record the latency from bookmark creation to crawl completion. + // Only for first-time, high-priority crawls (excludes recrawls and imports). + if (crawledAt === null && job.priority === 0) { + const latencySeconds = (Date.now() - createdAt.getTime()) / 1000; + bookmarkCrawlLatencyHistogram.observe(latencySeconds); + } + return { status: "completed" }; } -- cgit v1.2.3-70-g09d2