From 45db6147032071d270fbf2b577a234393247d921 Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sun, 1 Feb 2026 19:15:13 +0000 Subject: fix(web): don't bundle tiktoken in client bundles --- packages/shared/prompts.server.ts | 87 +++++++++++++++++++++++++++++++++++++++ packages/shared/prompts.ts | 82 +----------------------------------- 2 files changed, 89 insertions(+), 80 deletions(-) create mode 100644 packages/shared/prompts.server.ts (limited to 'packages/shared') diff --git a/packages/shared/prompts.server.ts b/packages/shared/prompts.server.ts new file mode 100644 index 00000000..3e2666de --- /dev/null +++ b/packages/shared/prompts.server.ts @@ -0,0 +1,87 @@ +import type { Tiktoken } from "js-tiktoken"; + +import type { ZTagStyle } from "./types/users"; +import { constructSummaryPrompt, constructTextTaggingPrompt } from "./prompts"; + +let encoding: Tiktoken | null = null; + +/** + * Lazy load the encoding to avoid loading the tiktoken data into memory + * until it's actually needed + */ +async function getEncodingInstance(): Promise { + if (!encoding) { + // Dynamic import to lazy load the tiktoken module + const { getEncoding } = await import("js-tiktoken"); + encoding = getEncoding("o200k_base"); + } + return encoding; +} + +async function calculateNumTokens(text: string): Promise { + const enc = await getEncodingInstance(); + return enc.encode(text).length; +} + +async function truncateContent( + content: string, + length: number, +): Promise { + const enc = await getEncodingInstance(); + const tokens = enc.encode(content); + if (tokens.length <= length) { + return content; + } + const truncatedTokens = tokens.slice(0, length); + return enc.decode(truncatedTokens); +} + +/** + * Remove duplicate whitespaces to avoid tokenization issues + */ +function preprocessContent(content: string) { + return content.replace(/(\s){10,}/g, "$1"); +} + +export async function buildTextPrompt( + lang: string, + customPrompts: string[], + content: string, + contextLength: number, + tagStyle: ZTagStyle, +): Promise { + content = preprocessContent(content); + const promptTemplate = constructTextTaggingPrompt( + lang, + customPrompts, + "", + tagStyle, + ); + const promptSize = await calculateNumTokens(promptTemplate); + const truncatedContent = await truncateContent( + content, + contextLength - promptSize, + ); + return constructTextTaggingPrompt( + lang, + customPrompts, + truncatedContent, + tagStyle, + ); +} + +export async function buildSummaryPrompt( + lang: string, + customPrompts: string[], + content: string, + contextLength: number, +): Promise { + content = preprocessContent(content); + const promptTemplate = constructSummaryPrompt(lang, customPrompts, ""); + const promptSize = await calculateNumTokens(promptTemplate); + const truncatedContent = await truncateContent( + content, + contextLength - promptSize, + ); + return constructSummaryPrompt(lang, customPrompts, truncatedContent); +} diff --git a/packages/shared/prompts.ts b/packages/shared/prompts.ts index 7dff9616..00963550 100644 --- a/packages/shared/prompts.ts +++ b/packages/shared/prompts.ts @@ -1,23 +1,6 @@ -import type { Tiktoken } from "js-tiktoken"; - import type { ZTagStyle } from "./types/users"; import { getTagStylePrompt } from "./utils/tag"; -let encoding: Tiktoken | null = null; - -/** - * Lazy load the encoding to avoid loading the tiktoken data into memory - * until it's actually needed - */ -async function getEncodingInstance(): Promise { - if (!encoding) { - // Dynamic import to lazy load the tiktoken module - const { getEncoding } = await import("js-tiktoken"); - encoding = getEncoding("o200k_base"); - } - return encoding; -} - /** * Remove duplicate whitespaces to avoid tokenization issues */ @@ -25,24 +8,6 @@ function preprocessContent(content: string) { return content.replace(/(\s){10,}/g, "$1"); } -async function calculateNumTokens(text: string): Promise { - const enc = await getEncodingInstance(); - return enc.encode(text).length; -} - -async function truncateContent( - content: string, - length: number, -): Promise { - const enc = await getEncodingInstance(); - const tokens = enc.encode(content); - if (tokens.length <= length) { - return content; - } - const truncatedTokens = tokens.slice(0, length); - return enc.decode(truncatedTokens); -} - export function buildImagePrompt( lang: string, customPrompts: string[], @@ -66,7 +31,7 @@ You must respond in valid JSON with the key "tags" and the value is list of tags /** * Construct tagging prompt for text content */ -function constructTextTaggingPrompt( +export function constructTextTaggingPrompt( lang: string, customPrompts: string[], content: string, @@ -97,7 +62,7 @@ You must respond in JSON with the key "tags" and the value is an array of string /** * Construct summary prompt */ -function constructSummaryPrompt( +export function constructSummaryPrompt( lang: string, customPrompts: string[], content: string, @@ -127,49 +92,6 @@ export function buildTextPromptUntruncated( ); } -export async function buildTextPrompt( - lang: string, - customPrompts: string[], - content: string, - contextLength: number, - tagStyle: ZTagStyle, -): Promise { - content = preprocessContent(content); - const promptTemplate = constructTextTaggingPrompt( - lang, - customPrompts, - "", - tagStyle, - ); - const promptSize = await calculateNumTokens(promptTemplate); - const truncatedContent = await truncateContent( - content, - contextLength - promptSize, - ); - return constructTextTaggingPrompt( - lang, - customPrompts, - truncatedContent, - tagStyle, - ); -} - -export async function buildSummaryPrompt( - lang: string, - customPrompts: string[], - content: string, - contextLength: number, -): Promise { - content = preprocessContent(content); - const promptTemplate = constructSummaryPrompt(lang, customPrompts, ""); - const promptSize = await calculateNumTokens(promptTemplate); - const truncatedContent = await truncateContent( - content, - contextLength - promptSize, - ); - return constructSummaryPrompt(lang, customPrompts, truncatedContent); -} - /** * Build summary prompt without truncation (for previews/UI) */ -- cgit v1.2.3-70-g09d2