diff options
| author | Mohamed Bassem <me@mbassem.com> | 2025-12-08 00:30:11 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-12-08 00:30:11 +0000 |
| commit | 13a090c4113efddc800b1f87a97e0244097bd4df (patch) | |
| tree | d4e8d71986a0b7f4032ba377e4f1ef58188ac22e | |
| parent | e3cc54636349361a8672dc261973b19680f614e3 (diff) | |
| download | karakeep-13a090c4113efddc800b1f87a97e0244097bd4df.tar.zst | |
feat: spread feed fetch scheduling deterministically over the hour (#2227)
Previously, all RSS feeds were fetched at the top of each hour (minute 0),
which could cause load spikes. This change spreads feed fetches evenly
throughout the hour using a deterministic hash of the feed ID.
Each feed is assigned a target minute (0-59) based on its ID hash, ensuring
consistent scheduling across restarts while distributing the load evenly.
Co-authored-by: Claude <noreply@anthropic.com>
| -rw-r--r-- | apps/workers/workers/feedWorker.ts | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/apps/workers/workers/feedWorker.ts b/apps/workers/workers/feedWorker.ts index 3382e980..2a1334a9 100644 --- a/apps/workers/workers/feedWorker.ts +++ b/apps/workers/workers/feedWorker.ts @@ -14,6 +14,21 @@ import logger from "@karakeep/shared/logger"; import { DequeuedJob, getQueueClient } from "@karakeep/shared/queueing"; import { BookmarkTypes } from "@karakeep/shared/types/bookmarks"; +/** + * Deterministically maps a feed ID to a minute offset within the hour (0-59). + * This ensures feeds are spread evenly across the hour based on their ID. + */ +function getFeedMinuteOffset(feedId: string): number { + // Simple hash function: sum character codes + let hash = 0; + for (let i = 0; i < feedId.length; i++) { + hash = (hash << 5) - hash + feedId.charCodeAt(i); + hash = hash & hash; // Convert to 32-bit integer + } + // Return a minute offset between 0 and 59 + return Math.abs(hash) % 60; +} + export const FeedRefreshingWorker = cron.schedule( "0 * * * *", () => { @@ -30,9 +45,24 @@ export const FeedRefreshingWorker = cron.schedule( const currentHour = new Date(); currentHour.setMinutes(0, 0, 0); const hourlyWindow = currentHour.toISOString(); + const now = new Date(); + const currentMinute = now.getMinutes(); for (const feed of feeds) { const idempotencyKey = `${feed.id}-${hourlyWindow}`; + const targetMinute = getFeedMinuteOffset(feed.id); + + // Calculate delay: if target minute has passed, schedule for next hour + let delayMinutes = targetMinute - currentMinute; + if (delayMinutes < 0) { + delayMinutes += 60; + } + const delayMs = delayMinutes * 60 * 1000; + + logger.debug( + `[feed] Scheduling feed ${feed.id} at minute ${targetMinute} (delay: ${delayMinutes} minutes)`, + ); + FeedQueue.enqueue( { feedId: feed.id, @@ -40,6 +70,7 @@ export const FeedRefreshingWorker = cron.schedule( { idempotencyKey, groupId: feed.userId, + delayMs, }, ); } |
