From 3fcccb858ee3ef22fe9ce479af4ce458ac9a0fe1 Mon Sep 17 00:00:00 2001
From: Mohamed Bassem <me@mbassem.com>
Date: Sun, 1 Feb 2026 22:57:11 +0000
Subject: feat: Add LLM-based OCR as alternative to Tesseract (#2442)

* feat(ocr): add LLM-based OCR support alongside Tesseract

Add support for using configured LLM inference providers (OpenAI or Ollama)
for OCR text extraction from images as an alternative to Tesseract.

Changes:
- Add OCR_USE_LLM environment variable flag (default: false)
- Add buildOCRPrompt function for LLM-based text extraction
- Add readImageTextWithLLM function in asset preprocessing worker
- Update extractAndSaveImageText to route between Tesseract and LLM OCR
- Update documentation with the new configuration option

When OCR_USE_LLM is enabled, the system uses the configured inference model
to extract text from images. If no inference provider is configured, it
falls back to Tesseract.

https://claude.ai/code/session_01Y7h7kDAmqXKXEWDmWbVkDs

* format

---------

Co-authored-by: Claude <noreply@anthropic.com>
---
 packages/shared/config.ts | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'packages/shared/config.ts')

diff --git a/packages/shared/config.ts b/packages/shared/config.ts
index 7238e90c..cfcf1532 100644
--- a/packages/shared/config.ts
+++ b/packages/shared/config.ts
@@ -82,6 +82,7 @@ const allEnv = z.object({
     .default("eng")
     .transform((val) => val.split(",")),
   OCR_CONFIDENCE_THRESHOLD: z.coerce.number().default(50),
+  OCR_USE_LLM: stringBool("false"),
   CRAWLER_HEADLESS_BROWSER: stringBool("true"),
   BROWSER_WEB_URL: z.string().optional(),
   BROWSER_WEBSOCKET_URL: z.string().optional(),
@@ -337,6 +338,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
       langs: val.OCR_LANGS,
       cacheDir: val.OCR_CACHE_DIR,
       confidenceThreshold: val.OCR_CONFIDENCE_THRESHOLD,
+      useLLM: val.OCR_USE_LLM,
     },
     search: {
       numWorkers: val.SEARCH_NUM_WORKERS,
-- 
cgit v1.2.3-70-g09d2