diff options
| author | Mohamed Bassem <me@mbassem.com> | 2026-02-08 01:33:48 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-02-08 01:33:48 +0000 |
| commit | b2640803745636e87e42f5460759093f49df9d04 (patch) | |
| tree | 965f020a5c81b1805c19b2a2b8a3a434d06b1e1e | |
| parent | 9e5693c6e4b410d1af05cc3d50c89ff73f21e060 (diff) | |
| download | karakeep-b2640803745636e87e42f5460759093f49df9d04.tar.zst | |
feat(metrics): add prometheus metric for bookmark crawl latency (#2461)
Track the time from bookmark creation to crawl completion as a histogram
(karakeep_bookmark_crawl_latency_seconds). This measures the end-to-end
latency users experience when adding bookmarks via extension, web, etc.
Excludes recrawls (crawledAt already set) and imports (low priority jobs).
https://claude.ai/code/session_019jTGGXGWzK9C5aTznQhdgz
Co-authored-by: Claude <noreply@anthropic.com>
| -rw-r--r-- | apps/workers/metrics.ts | 12 | ||||
| -rw-r--r-- | apps/workers/workerUtils.ts | 2 | ||||
| -rw-r--r-- | apps/workers/workers/crawlerWorker.ts | 16 |
3 files changed, 28 insertions, 2 deletions
diff --git a/apps/workers/metrics.ts b/apps/workers/metrics.ts index c16c7bd4..42b5aa46 100644 --- a/apps/workers/metrics.ts +++ b/apps/workers/metrics.ts @@ -1,5 +1,5 @@ import { prometheus } from "@hono/prometheus"; -import { Counter, Registry } from "prom-client"; +import { Counter, Histogram, Registry } from "prom-client"; export const registry = new Registry(); @@ -21,5 +21,15 @@ export const crawlerStatusCodeCounter = new Counter({ labelNames: ["status_code"], }); +export const bookmarkCrawlLatencyHistogram = new Histogram({ + name: "karakeep_bookmark_crawl_latency_seconds", + help: "Latency from bookmark creation to crawl completion (excludes recrawls and imports)", + buckets: [ + 0.1, 0.25, 0.5, 1, 2.5, 5, 7.5, 10, 15, 20, 30, 45, 60, 90, 120, 180, 300, + 600, 900, 1200, + ], +}); + registry.registerMetric(workerStatsCounter); registry.registerMetric(crawlerStatusCodeCounter); +registry.registerMetric(bookmarkCrawlLatencyHistogram); diff --git a/apps/workers/workerUtils.ts b/apps/workers/workerUtils.ts index a99f2103..48e3b277 100644 --- a/apps/workers/workerUtils.ts +++ b/apps/workers/workerUtils.ts @@ -31,6 +31,8 @@ export async function getBookmarkDetails(bookmarkId: string) { return {
url: bookmark.link.url,
userId: bookmark.userId,
+ createdAt: bookmark.createdAt,
+ crawledAt: bookmark.link.crawledAt,
screenshotAssetId: bookmark.assets.find(
(a) => a.assetType == AssetTypes.LINK_SCREENSHOT,
)?.id,
diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts index d3c20b7c..48ea5352 100644 --- a/apps/workers/workers/crawlerWorker.ts +++ b/apps/workers/workers/crawlerWorker.ts @@ -27,7 +27,11 @@ import metascraperTitle from "metascraper-title"; import metascraperUrl from "metascraper-url"; import metascraperX from "metascraper-x"; import metascraperYoutube from "metascraper-youtube"; -import { crawlerStatusCodeCounter, workerStatsCounter } from "metrics"; +import { + bookmarkCrawlLatencyHistogram, + crawlerStatusCodeCounter, + workerStatsCounter, +} from "metrics"; import { fetchWithProxy, getBookmarkDomain, @@ -1709,6 +1713,8 @@ async function runCrawler( const { url, userId, + createdAt, + crawledAt, screenshotAssetId: oldScreenshotAssetId, pdfAssetId: oldPdfAssetId, imageAssetId: oldImageAssetId, @@ -1812,5 +1818,13 @@ async function runCrawler( // Do the archival as a separate last step as it has the potential for failure await archivalLogic(); } + + // Record the latency from bookmark creation to crawl completion. + // Only for first-time, high-priority crawls (excludes recrawls and imports). + if (crawledAt === null && job.priority === 0) { + const latencySeconds = (Date.now() - createdAt.getTime()) / 1000; + bookmarkCrawlLatencyHistogram.observe(latencySeconds); + } + return { status: "completed" }; } |
