diff options
Diffstat (limited to 'packages/shared/prompts.ts')
| -rw-r--r-- | packages/shared/prompts.ts | 121 |
1 files changed, 48 insertions, 73 deletions
diff --git a/packages/shared/prompts.ts b/packages/shared/prompts.ts index 5a6a705e..6c5c02c4 100644 --- a/packages/shared/prompts.ts +++ b/packages/shared/prompts.ts @@ -1,19 +1,5 @@ -import type { Tiktoken } from "js-tiktoken"; - -let encoding: Tiktoken | null = null; - -/** - * Lazy load the encoding to avoid loading the tiktoken data into memory - * until it's actually needed - */ -async function getEncodingInstance(): Promise<Tiktoken> { - if (!encoding) { - // Dynamic import to lazy load the tiktoken module - const { getEncoding } = await import("js-tiktoken"); - encoding = getEncoding("o200k_base"); - } - return encoding; -} +import type { ZTagStyle } from "./types/users"; +import { getCuratedTagsPrompt, getTagStylePrompt } from "./utils/tag"; /** * Remove duplicate whitespaces to avoid tokenization issues @@ -22,33 +8,25 @@ function preprocessContent(content: string) { return content.replace(/(\s){10,}/g, "$1"); } -async function calculateNumTokens(text: string): Promise<number> { - const enc = await getEncodingInstance(); - return enc.encode(text).length; -} - -async function truncateContent( - content: string, - length: number, -): Promise<string> { - const enc = await getEncodingInstance(); - const tokens = enc.encode(content); - if (tokens.length <= length) { - return content; - } - const truncatedTokens = tokens.slice(0, length); - return enc.decode(truncatedTokens); -} +export function buildImagePrompt( + lang: string, + customPrompts: string[], + tagStyle: ZTagStyle, + curatedTags?: string[], +) { + const tagStyleInstruction = getTagStylePrompt(tagStyle); + const curatedInstruction = getCuratedTagsPrompt(curatedTags); -export function buildImagePrompt(lang: string, customPrompts: string[]) { return ` -You are an expert whose responsibility is to help with automatic text tagging for a read-it-later app. -Please analyze the attached image and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are: +You are an expert whose responsibility is to help with automatic text tagging for a read-it-later/bookmarking app. +Analyze the attached image and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are: - Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres. - The tags must be in ${lang}. - If the tag is not generic enough, don't include it. - Aim for 10-15 tags. - If there are no good tags, don't emit any. +${curatedInstruction} +${tagStyleInstruction} ${customPrompts && customPrompts.map((p) => `- ${p}`).join("\n")} You must respond in valid JSON with the key "tags" and the value is list of tags. Don't wrap the response in a markdown code.`; } @@ -56,20 +34,29 @@ You must respond in valid JSON with the key "tags" and the value is list of tags /** * Construct tagging prompt for text content */ -function constructTextTaggingPrompt( +export function constructTextTaggingPrompt( lang: string, customPrompts: string[], content: string, + tagStyle: ZTagStyle, + curatedTags?: string[], ): string { + const tagStyleInstruction = getTagStylePrompt(tagStyle); + const curatedInstruction = getCuratedTagsPrompt(curatedTags); + return ` -You are an expert whose responsibility is to help with automatic tagging for a read-it-later app. -Please analyze the TEXT_CONTENT below and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are: +You are an expert whose responsibility is to help with automatic tagging for a read-it-later/bookmarking app. +Analyze the TEXT_CONTENT below and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are: - Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres. - The tags must be in ${lang}. - If the tag is not generic enough, don't include it. -- The content can include text for cookie consent and privacy policy, ignore those while tagging. +- Do NOT generate tags related to: + - An error page (404, 403, blocked, not found, dns errors) + - Boilerplate content (cookie consent, login walls, GDPR notices) - Aim for 3-5 tags. - If there are no good tags, leave the array empty. +${curatedInstruction} +${tagStyleInstruction} ${customPrompts && customPrompts.map((p) => `- ${p}`).join("\n")} <TEXT_CONTENT> @@ -81,7 +68,7 @@ You must respond in JSON with the key "tags" and the value is an array of string /** * Construct summary prompt */ -function constructSummaryPrompt( +export function constructSummaryPrompt( lang: string, customPrompts: string[], content: string, @@ -101,46 +88,18 @@ export function buildTextPromptUntruncated( lang: string, customPrompts: string[], content: string, + tagStyle: ZTagStyle, + curatedTags?: string[], ): string { return constructTextTaggingPrompt( lang, customPrompts, preprocessContent(content), + tagStyle, + curatedTags, ); } -export async function buildTextPrompt( - lang: string, - customPrompts: string[], - content: string, - contextLength: number, -): Promise<string> { - content = preprocessContent(content); - const promptTemplate = constructTextTaggingPrompt(lang, customPrompts, ""); - const promptSize = await calculateNumTokens(promptTemplate); - const truncatedContent = await truncateContent( - content, - contextLength - promptSize, - ); - return constructTextTaggingPrompt(lang, customPrompts, truncatedContent); -} - -export async function buildSummaryPrompt( - lang: string, - customPrompts: string[], - content: string, - contextLength: number, -): Promise<string> { - content = preprocessContent(content); - const promptTemplate = constructSummaryPrompt(lang, customPrompts, ""); - const promptSize = await calculateNumTokens(promptTemplate); - const truncatedContent = await truncateContent( - content, - contextLength - promptSize, - ); - return constructSummaryPrompt(lang, customPrompts, truncatedContent); -} - /** * Build summary prompt without truncation (for previews/UI) */ @@ -155,3 +114,19 @@ export function buildSummaryPromptUntruncated( preprocessContent(content), ); } + +/** + * Build OCR prompt for extracting text from images using LLM + */ +export function buildOCRPrompt(): string { + return `You are an OCR (Optical Character Recognition) expert. Your task is to extract ALL text from this image. + +Rules: +- Extract every piece of text visible in the image, including titles, body text, captions, labels, watermarks, and any other textual content. +- Preserve the original structure and formatting as much as possible (e.g., paragraphs, lists, headings). +- If text appears in multiple columns, read from left to right, top to bottom. +- If text is partially obscured or unclear, make your best attempt and indicate uncertainty with [unclear] if needed. +- Do not add any commentary, explanations, or descriptions of non-text elements. +- If there is no text in the image, respond with an empty string. +- Output ONLY the extracted text, nothing else.`; +} |
