diff options
Diffstat (limited to 'packages')
| -rw-r--r-- | packages/shared/config.ts | 2 | ||||
| -rw-r--r-- | packages/shared/prompts.ts | 16 |
2 files changed, 18 insertions, 0 deletions
diff --git a/packages/shared/config.ts b/packages/shared/config.ts index 7238e90c..cfcf1532 100644 --- a/packages/shared/config.ts +++ b/packages/shared/config.ts @@ -82,6 +82,7 @@ const allEnv = z.object({ .default("eng") .transform((val) => val.split(",")), OCR_CONFIDENCE_THRESHOLD: z.coerce.number().default(50), + OCR_USE_LLM: stringBool("false"), CRAWLER_HEADLESS_BROWSER: stringBool("true"), BROWSER_WEB_URL: z.string().optional(), BROWSER_WEBSOCKET_URL: z.string().optional(), @@ -337,6 +338,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => { langs: val.OCR_LANGS, cacheDir: val.OCR_CACHE_DIR, confidenceThreshold: val.OCR_CONFIDENCE_THRESHOLD, + useLLM: val.OCR_USE_LLM, }, search: { numWorkers: val.SEARCH_NUM_WORKERS, diff --git a/packages/shared/prompts.ts b/packages/shared/prompts.ts index 00963550..e878a18b 100644 --- a/packages/shared/prompts.ts +++ b/packages/shared/prompts.ts @@ -106,3 +106,19 @@ export function buildSummaryPromptUntruncated( preprocessContent(content), ); } + +/** + * Build OCR prompt for extracting text from images using LLM + */ +export function buildOCRPrompt(): string { + return `You are an OCR (Optical Character Recognition) expert. Your task is to extract ALL text from this image. + +Rules: +- Extract every piece of text visible in the image, including titles, body text, captions, labels, watermarks, and any other textual content. +- Preserve the original structure and formatting as much as possible (e.g., paragraphs, lists, headings). +- If text appears in multiple columns, read from left to right, top to bottom. +- If text is partially obscured or unclear, make your best attempt and indicate uncertainty with [unclear] if needed. +- Do not add any commentary, explanations, or descriptions of non-text elements. +- If there is no text in the image, respond with an empty string. +- Output ONLY the extracted text, nothing else.`; +} |
