From 4cf0856e39c4d69037a6c1a4c3a2a7f803b364a7 Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sun, 9 Nov 2025 21:20:54 +0000 Subject: feat: add crawler domain rate limiting (#2115) --- packages/plugins/ratelimit-memory/src/index.test.ts | 4 +++- packages/shared/config.ts | 10 ++++++++++ packages/shared/ratelimiting.ts | 7 +++---- 3 files changed, 16 insertions(+), 5 deletions(-) (limited to 'packages') diff --git a/packages/plugins/ratelimit-memory/src/index.test.ts b/packages/plugins/ratelimit-memory/src/index.test.ts index 5bbed769..74989aab 100644 --- a/packages/plugins/ratelimit-memory/src/index.test.ts +++ b/packages/plugins/ratelimit-memory/src/index.test.ts @@ -1,3 +1,4 @@ +import assert from "assert"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { RateLimiter } from "./index"; @@ -46,7 +47,7 @@ describe("RateLimiter", () => { expect(result1.allowed).toBe(true); expect(result2.allowed).toBe(true); expect(result3.allowed).toBe(false); - expect(result3.resetInSeconds).toBeDefined(); + assert(!result3.allowed); expect(result3.resetInSeconds).toBeGreaterThan(0); }); @@ -139,6 +140,7 @@ describe("RateLimiter", () => { const result = rateLimiter.checkRateLimit(config, "user1"); expect(result.allowed).toBe(false); // Should have ~30 seconds remaining + assert(!result.allowed); expect(result.resetInSeconds).toBeGreaterThan(29); expect(result.resetInSeconds).toBeLessThanOrEqual(30); }); diff --git a/packages/shared/config.ts b/packages/shared/config.ts index a62f0fb6..d4a951f1 100644 --- a/packages/shared/config.ts +++ b/packages/shared/config.ts @@ -106,6 +106,8 @@ const allEnv = z.object({ .transform((t) => t.split("%%").filter((a) => a)), CRAWLER_SCREENSHOT_TIMEOUT_SEC: z.coerce.number().default(5), CRAWLER_IP_VALIDATION_DNS_RESOLVER_TIMEOUT_SEC: z.coerce.number().default(1), + CRAWLER_DOMAIN_RATE_LIMIT_WINDOW_MS: z.coerce.number().min(1).optional(), + CRAWLER_DOMAIN_RATE_LIMIT_MAX_REQUESTS: z.coerce.number().min(1).optional(), LOG_LEVEL: z.string().default("debug"), NO_COLOR: stringBool("false"), DEMO_MODE: stringBool("false"), @@ -299,6 +301,14 @@ const serverConfigSchema = allEnv.transform((val, ctx) => { dnsResolverTimeoutSec: val.CRAWLER_IP_VALIDATION_DNS_RESOLVER_TIMEOUT_SEC, }, + domainRatelimiting: + val.CRAWLER_DOMAIN_RATE_LIMIT_WINDOW_MS !== undefined && + val.CRAWLER_DOMAIN_RATE_LIMIT_MAX_REQUESTS !== undefined + ? { + windowMs: val.CRAWLER_DOMAIN_RATE_LIMIT_WINDOW_MS, + maxRequests: val.CRAWLER_DOMAIN_RATE_LIMIT_MAX_REQUESTS, + } + : null, }, ocr: { langs: val.OCR_LANGS, diff --git a/packages/shared/ratelimiting.ts b/packages/shared/ratelimiting.ts index 3b22310b..ee5988b4 100644 --- a/packages/shared/ratelimiting.ts +++ b/packages/shared/ratelimiting.ts @@ -6,10 +6,9 @@ export interface RateLimitConfig { maxRequests: number; } -export interface RateLimitResult { - allowed: boolean; - resetInSeconds?: number; -} +export type RateLimitResult = + | { allowed: true } + | { allowed: false; resetInSeconds: number }; export interface RateLimitClient { /** -- cgit v1.2.3-70-g09d2