From 4350666961132ffe68afea8686b567f361a58f57 Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sat, 15 Nov 2025 13:49:52 +0000 Subject: feat: add Prometheus counter for HTTP status codes (#2117) * feat: add Prometheus counter for crawler status codes Add a new Prometheus metric to track HTTP status codes encountered during crawling operations. This helps monitor crawler health and identify patterns in response codes (e.g., 200 OK, 404 Not Found, etc.). Changes: - Add crawlerStatusCodeCounter in metrics.ts with status_code label - Instrument crawlerWorker.ts to track status codes after page crawling - Counter increments for each crawl with the corresponding HTTP status code The metric is exposed at the /metrics endpoint and follows the naming convention: karakeep_crawler_status_codes_total * fix: update counter name to follow Prometheus conventions Change metric name from "karakeep_crawler_status_codes" to "karakeep_crawler_status_codes_total" to comply with Prometheus naming best practices for counter metrics. --------- Co-authored-by: Claude --- apps/workers/metrics.ts | 7 +++++++ apps/workers/workers/crawlerWorker.ts | 7 ++++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'apps/workers') diff --git a/apps/workers/metrics.ts b/apps/workers/metrics.ts index b9ed2c70..3dc4d2c0 100644 --- a/apps/workers/metrics.ts +++ b/apps/workers/metrics.ts @@ -15,4 +15,11 @@ export const workerStatsCounter = new Counter({ labelNames: ["worker_name", "status"], }); +export const crawlerStatusCodeCounter = new Counter({ + name: "karakeep_crawler_status_codes_total", + help: "HTTP status codes encountered during crawling", + labelNames: ["status_code"], +}); + registry.registerMetric(workerStatsCounter); +registry.registerMetric(crawlerStatusCodeCounter); diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts index 07a74757..0ab3a3cc 100644 --- a/apps/workers/workers/crawlerWorker.ts +++ b/apps/workers/workers/crawlerWorker.ts @@ -26,7 +26,7 @@ import metascraperPublisher from "metascraper-publisher"; import metascraperTitle from "metascraper-title"; import metascraperTwitter from "metascraper-twitter"; import metascraperUrl from "metascraper-url"; -import { workerStatsCounter } from "metrics"; +import { crawlerStatusCodeCounter, workerStatsCounter } from "metrics"; import { fetchWithProxy, getRandomProxy, @@ -1099,6 +1099,11 @@ async function crawlAndParseUrl( const { htmlContent, screenshot, statusCode, url: browserUrl } = result; + // Track status code in Prometheus + if (statusCode !== null) { + crawlerStatusCodeCounter.labels(statusCode.toString()).inc(); + } + const meta = await Promise.race([ extractMetadata(htmlContent, browserUrl, jobId), abortPromise(abortSignal), -- cgit v1.2.3-70-g09d2