import type { Tiktoken } from "js-tiktoken"; import type { ZTagStyle } from "./types/users"; import { constructSummaryPrompt, constructTextTaggingPrompt } from "./prompts"; let encoding: Tiktoken | null = null; /** * Lazy load the encoding to avoid loading the tiktoken data into memory * until it's actually needed */ async function getEncodingInstance(): Promise { if (!encoding) { // Dynamic import to lazy load the tiktoken module const { getEncoding } = await import("js-tiktoken"); encoding = getEncoding("o200k_base"); } return encoding; } async function calculateNumTokens(text: string): Promise { const enc = await getEncodingInstance(); return enc.encode(text).length; } async function truncateContent( content: string, length: number, ): Promise { const enc = await getEncodingInstance(); const tokens = enc.encode(content); if (tokens.length <= length) { return content; } const truncatedTokens = tokens.slice(0, length); return enc.decode(truncatedTokens); } /** * Remove duplicate whitespaces to avoid tokenization issues */ function preprocessContent(content: string) { return content.replace(/(\s){10,}/g, "$1"); } export async function buildTextPrompt( lang: string, customPrompts: string[], content: string, contextLength: number, tagStyle: ZTagStyle, curatedTags?: string[], ): Promise { content = preprocessContent(content); const promptTemplate = constructTextTaggingPrompt( lang, customPrompts, "", tagStyle, curatedTags, ); const promptSize = await calculateNumTokens(promptTemplate); const available = Math.max(0, contextLength - promptSize); const truncatedContent = available === 0 ? "" : await truncateContent(content, available); return constructTextTaggingPrompt( lang, customPrompts, truncatedContent, tagStyle, curatedTags, ); } export async function buildSummaryPrompt( lang: string, customPrompts: string[], content: string, contextLength: number, ): Promise { content = preprocessContent(content); const promptTemplate = constructSummaryPrompt(lang, customPrompts, ""); const promptSize = await calculateNumTokens(promptTemplate); const available = Math.max(0, contextLength - promptSize); const truncatedContent = available === 0 ? "" : await truncateContent(content, available); return constructSummaryPrompt(lang, customPrompts, truncatedContent); }