aboutsummaryrefslogtreecommitdiffstats
path: root/packages/shared
diff options
context:
space:
mode:
Diffstat (limited to 'packages/shared')
-rw-r--r--packages/shared/config.ts2
-rw-r--r--packages/shared/prompts.ts16
2 files changed, 18 insertions, 0 deletions
diff --git a/packages/shared/config.ts b/packages/shared/config.ts
index 7238e90c..cfcf1532 100644
--- a/packages/shared/config.ts
+++ b/packages/shared/config.ts
@@ -82,6 +82,7 @@ const allEnv = z.object({
.default("eng")
.transform((val) => val.split(",")),
OCR_CONFIDENCE_THRESHOLD: z.coerce.number().default(50),
+ OCR_USE_LLM: stringBool("false"),
CRAWLER_HEADLESS_BROWSER: stringBool("true"),
BROWSER_WEB_URL: z.string().optional(),
BROWSER_WEBSOCKET_URL: z.string().optional(),
@@ -337,6 +338,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
langs: val.OCR_LANGS,
cacheDir: val.OCR_CACHE_DIR,
confidenceThreshold: val.OCR_CONFIDENCE_THRESHOLD,
+ useLLM: val.OCR_USE_LLM,
},
search: {
numWorkers: val.SEARCH_NUM_WORKERS,
diff --git a/packages/shared/prompts.ts b/packages/shared/prompts.ts
index 00963550..e878a18b 100644
--- a/packages/shared/prompts.ts
+++ b/packages/shared/prompts.ts
@@ -106,3 +106,19 @@ export function buildSummaryPromptUntruncated(
preprocessContent(content),
);
}
+
+/**
+ * Build OCR prompt for extracting text from images using LLM
+ */
+export function buildOCRPrompt(): string {
+ return `You are an OCR (Optical Character Recognition) expert. Your task is to extract ALL text from this image.
+
+Rules:
+- Extract every piece of text visible in the image, including titles, body text, captions, labels, watermarks, and any other textual content.
+- Preserve the original structure and formatting as much as possible (e.g., paragraphs, lists, headings).
+- If text appears in multiple columns, read from left to right, top to bottom.
+- If text is partially obscured or unclear, make your best attempt and indicate uncertainty with [unclear] if needed.
+- Do not add any commentary, explanations, or descriptions of non-text elements.
+- If there is no text in the image, respond with an empty string.
+- Output ONLY the extracted text, nothing else.`;
+}