aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMohamed Bassem <me@mbassem.com>2025-12-08 00:30:11 +0000
committerGitHub <noreply@github.com>2025-12-08 00:30:11 +0000
commit13a090c4113efddc800b1f87a97e0244097bd4df (patch)
treed4e8d71986a0b7f4032ba377e4f1ef58188ac22e
parente3cc54636349361a8672dc261973b19680f614e3 (diff)
downloadkarakeep-13a090c4113efddc800b1f87a97e0244097bd4df.tar.zst
feat: spread feed fetch scheduling deterministically over the hour (#2227)
Previously, all RSS feeds were fetched at the top of each hour (minute 0), which could cause load spikes. This change spreads feed fetches evenly throughout the hour using a deterministic hash of the feed ID. Each feed is assigned a target minute (0-59) based on its ID hash, ensuring consistent scheduling across restarts while distributing the load evenly. Co-authored-by: Claude <noreply@anthropic.com>
-rw-r--r--apps/workers/workers/feedWorker.ts31
1 files changed, 31 insertions, 0 deletions
diff --git a/apps/workers/workers/feedWorker.ts b/apps/workers/workers/feedWorker.ts
index 3382e980..2a1334a9 100644
--- a/apps/workers/workers/feedWorker.ts
+++ b/apps/workers/workers/feedWorker.ts
@@ -14,6 +14,21 @@ import logger from "@karakeep/shared/logger";
import { DequeuedJob, getQueueClient } from "@karakeep/shared/queueing";
import { BookmarkTypes } from "@karakeep/shared/types/bookmarks";
+/**
+ * Deterministically maps a feed ID to a minute offset within the hour (0-59).
+ * This ensures feeds are spread evenly across the hour based on their ID.
+ */
+function getFeedMinuteOffset(feedId: string): number {
+ // Simple hash function: sum character codes
+ let hash = 0;
+ for (let i = 0; i < feedId.length; i++) {
+ hash = (hash << 5) - hash + feedId.charCodeAt(i);
+ hash = hash & hash; // Convert to 32-bit integer
+ }
+ // Return a minute offset between 0 and 59
+ return Math.abs(hash) % 60;
+}
+
export const FeedRefreshingWorker = cron.schedule(
"0 * * * *",
() => {
@@ -30,9 +45,24 @@ export const FeedRefreshingWorker = cron.schedule(
const currentHour = new Date();
currentHour.setMinutes(0, 0, 0);
const hourlyWindow = currentHour.toISOString();
+ const now = new Date();
+ const currentMinute = now.getMinutes();
for (const feed of feeds) {
const idempotencyKey = `${feed.id}-${hourlyWindow}`;
+ const targetMinute = getFeedMinuteOffset(feed.id);
+
+ // Calculate delay: if target minute has passed, schedule for next hour
+ let delayMinutes = targetMinute - currentMinute;
+ if (delayMinutes < 0) {
+ delayMinutes += 60;
+ }
+ const delayMs = delayMinutes * 60 * 1000;
+
+ logger.debug(
+ `[feed] Scheduling feed ${feed.id} at minute ${targetMinute} (delay: ${delayMinutes} minutes)`,
+ );
+
FeedQueue.enqueue(
{
feedId: feed.id,
@@ -40,6 +70,7 @@ export const FeedRefreshingWorker = cron.schedule(
{
idempotencyKey,
groupId: feed.userId,
+ delayMs,
},
);
}