From 3fcccb858ee3ef22fe9ce479af4ce458ac9a0fe1 Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sun, 1 Feb 2026 22:57:11 +0000 Subject: feat: Add LLM-based OCR as alternative to Tesseract (#2442) * feat(ocr): add LLM-based OCR support alongside Tesseract Add support for using configured LLM inference providers (OpenAI or Ollama) for OCR text extraction from images as an alternative to Tesseract. Changes: - Add OCR_USE_LLM environment variable flag (default: false) - Add buildOCRPrompt function for LLM-based text extraction - Add readImageTextWithLLM function in asset preprocessing worker - Update extractAndSaveImageText to route between Tesseract and LLM OCR - Update documentation with the new configuration option When OCR_USE_LLM is enabled, the system uses the configured inference model to extract text from images. If no inference provider is configured, it falls back to Tesseract. https://claude.ai/code/session_01Y7h7kDAmqXKXEWDmWbVkDs * format --------- Co-authored-by: Claude --- packages/shared/config.ts | 2 ++ 1 file changed, 2 insertions(+) (limited to 'packages/shared/config.ts') diff --git a/packages/shared/config.ts b/packages/shared/config.ts index 7238e90c..cfcf1532 100644 --- a/packages/shared/config.ts +++ b/packages/shared/config.ts @@ -82,6 +82,7 @@ const allEnv = z.object({ .default("eng") .transform((val) => val.split(",")), OCR_CONFIDENCE_THRESHOLD: z.coerce.number().default(50), + OCR_USE_LLM: stringBool("false"), CRAWLER_HEADLESS_BROWSER: stringBool("true"), BROWSER_WEB_URL: z.string().optional(), BROWSER_WEBSOCKET_URL: z.string().optional(), @@ -337,6 +338,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => { langs: val.OCR_LANGS, cacheDir: val.OCR_CACHE_DIR, confidenceThreshold: val.OCR_CONFIDENCE_THRESHOLD, + useLLM: val.OCR_USE_LLM, }, search: { numWorkers: val.SEARCH_NUM_WORKERS, -- cgit v1.2.3-70-g09d2