diff options
Diffstat (limited to 'apps/workers')
| -rw-r--r-- | apps/workers/metrics.ts | 12 | ||||
| -rw-r--r-- | apps/workers/workerUtils.ts | 2 | ||||
| -rw-r--r-- | apps/workers/workers/crawlerWorker.ts | 16 |
3 files changed, 28 insertions, 2 deletions
diff --git a/apps/workers/metrics.ts b/apps/workers/metrics.ts index c16c7bd4..42b5aa46 100644 --- a/apps/workers/metrics.ts +++ b/apps/workers/metrics.ts @@ -1,5 +1,5 @@ import { prometheus } from "@hono/prometheus"; -import { Counter, Registry } from "prom-client"; +import { Counter, Histogram, Registry } from "prom-client"; export const registry = new Registry(); @@ -21,5 +21,15 @@ export const crawlerStatusCodeCounter = new Counter({ labelNames: ["status_code"], }); +export const bookmarkCrawlLatencyHistogram = new Histogram({ + name: "karakeep_bookmark_crawl_latency_seconds", + help: "Latency from bookmark creation to crawl completion (excludes recrawls and imports)", + buckets: [ + 0.1, 0.25, 0.5, 1, 2.5, 5, 7.5, 10, 15, 20, 30, 45, 60, 90, 120, 180, 300, + 600, 900, 1200, + ], +}); + registry.registerMetric(workerStatsCounter); registry.registerMetric(crawlerStatusCodeCounter); +registry.registerMetric(bookmarkCrawlLatencyHistogram); diff --git a/apps/workers/workerUtils.ts b/apps/workers/workerUtils.ts index a99f2103..48e3b277 100644 --- a/apps/workers/workerUtils.ts +++ b/apps/workers/workerUtils.ts @@ -31,6 +31,8 @@ export async function getBookmarkDetails(bookmarkId: string) { return {
url: bookmark.link.url,
userId: bookmark.userId,
+ createdAt: bookmark.createdAt,
+ crawledAt: bookmark.link.crawledAt,
screenshotAssetId: bookmark.assets.find(
(a) => a.assetType == AssetTypes.LINK_SCREENSHOT,
)?.id,
diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts index d3c20b7c..48ea5352 100644 --- a/apps/workers/workers/crawlerWorker.ts +++ b/apps/workers/workers/crawlerWorker.ts @@ -27,7 +27,11 @@ import metascraperTitle from "metascraper-title"; import metascraperUrl from "metascraper-url"; import metascraperX from "metascraper-x"; import metascraperYoutube from "metascraper-youtube"; -import { crawlerStatusCodeCounter, workerStatsCounter } from "metrics"; +import { + bookmarkCrawlLatencyHistogram, + crawlerStatusCodeCounter, + workerStatsCounter, +} from "metrics"; import { fetchWithProxy, getBookmarkDomain, @@ -1709,6 +1713,8 @@ async function runCrawler( const { url, userId, + createdAt, + crawledAt, screenshotAssetId: oldScreenshotAssetId, pdfAssetId: oldPdfAssetId, imageAssetId: oldImageAssetId, @@ -1812,5 +1818,13 @@ async function runCrawler( // Do the archival as a separate last step as it has the potential for failure await archivalLogic(); } + + // Record the latency from bookmark creation to crawl completion. + // Only for first-time, high-priority crawls (excludes recrawls and imports). + if (crawledAt === null && job.priority === 0) { + const latencySeconds = (Date.now() - createdAt.getTime()) / 1000; + bookmarkCrawlLatencyHistogram.observe(latencySeconds); + } + return { status: "completed" }; } |
