aboutsummaryrefslogtreecommitdiffstats
path: root/packages
diff options
context:
space:
mode:
authorMohamed Bassem <me@mbassem.com>2025-11-02 17:19:28 +0000
committerGitHub <noreply@github.com>2025-11-02 17:19:28 +0000
commitb63a49fc3980296c6a6ea6ac0624142e8af94d52 (patch)
tree1b1266f09f7821c0c59220895e9f28f406ebb841 /packages
parentc6ebceb9f0b13da902edd6bf722cfc961d7eedc6 (diff)
downloadkarakeep-b63a49fc3980296c6a6ea6ac0624142e8af94d52.tar.zst
fix: Stricter SSRF validation (#2082)
* fix: Stricter SSRF validation * skip dns resolution if running in proxy context * more fixes * Add LRU cache * change the env variable for internal hostnames * make dns resolution timeout configerable * upgrade ipaddr * handle ipv6 * handle proxy bypass for request interceptor
Diffstat (limited to 'packages')
-rw-r--r--packages/shared/config.ts25
1 files changed, 24 insertions, 1 deletions
diff --git a/packages/shared/config.ts b/packages/shared/config.ts
index d54b7589..51b591ad 100644
--- a/packages/shared/config.ts
+++ b/packages/shared/config.ts
@@ -104,6 +104,7 @@ const allEnv = z.object({
.default("")
.transform((t) => t.split("%%").filter((a) => a)),
CRAWLER_SCREENSHOT_TIMEOUT_SEC: z.coerce.number().default(5),
+ CRAWLER_IP_VALIDATION_DNS_RESOLVER_TIMEOUT_SEC: z.coerce.number().default(1),
LOG_LEVEL: z.string().default("debug"),
NO_COLOR: stringBool("false"),
DEMO_MODE: stringBool("false"),
@@ -178,7 +179,24 @@ const allEnv = z.object({
.filter((p) => p),
)
.optional(),
- CRAWLER_NO_PROXY: z.string().optional(),
+ CRAWLER_NO_PROXY: z
+ .string()
+ .transform((val) =>
+ val
+ .split(",")
+ .map((p) => p.trim())
+ .filter((p) => p),
+ )
+ .optional(),
+ CRAWLER_ALLOWED_INTERNAL_HOSTNAMES: z
+ .string()
+ .transform((val) =>
+ val
+ .split(",")
+ .map((p) => p.trim())
+ .filter((p) => p),
+ )
+ .optional(),
// Database configuration
DB_WAL_MODE: stringBool("false"),
@@ -276,6 +294,10 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
ytDlpArguments: val.CRAWLER_YTDLP_ARGS,
screenshotTimeoutSec: val.CRAWLER_SCREENSHOT_TIMEOUT_SEC,
htmlContentSizeThreshold: val.HTML_CONTENT_SIZE_INLINE_THRESHOLD_BYTES,
+ ipValidation: {
+ dnsResolverTimeoutSec:
+ val.CRAWLER_IP_VALIDATION_DNS_RESOLVER_TIMEOUT_SEC,
+ },
},
ocr: {
langs: val.OCR_LANGS,
@@ -309,6 +331,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
httpsProxy: val.CRAWLER_HTTPS_PROXY,
noProxy: val.CRAWLER_NO_PROXY,
},
+ allowedInternalHostnames: val.CRAWLER_ALLOWED_INTERNAL_HOSTNAMES,
assetPreprocessing: {
numWorkers: val.ASSET_PREPROCESSING_NUM_WORKERS,
},