aboutsummaryrefslogtreecommitdiffstats
path: root/packages/shared/prompts.server.ts
diff options
context:
space:
mode:
authorMohamed Bassem <me@mbassem.com>2026-02-01 19:15:13 +0000
committerMohamed Bassem <me@mbassem.com>2026-02-01 19:15:13 +0000
commit45db6147032071d270fbf2b577a234393247d921 (patch)
treea739af8254efc2f417f27d702472c14782154e4d /packages/shared/prompts.server.ts
parentbf5c99cb10a0b35b0101bf8f9858c176889a0284 (diff)
downloadkarakeep-45db6147032071d270fbf2b577a234393247d921.tar.zst
fix(web): don't bundle tiktoken in client bundles
Diffstat (limited to 'packages/shared/prompts.server.ts')
-rw-r--r--packages/shared/prompts.server.ts87
1 files changed, 87 insertions, 0 deletions
diff --git a/packages/shared/prompts.server.ts b/packages/shared/prompts.server.ts
new file mode 100644
index 00000000..3e2666de
--- /dev/null
+++ b/packages/shared/prompts.server.ts
@@ -0,0 +1,87 @@
+import type { Tiktoken } from "js-tiktoken";
+
+import type { ZTagStyle } from "./types/users";
+import { constructSummaryPrompt, constructTextTaggingPrompt } from "./prompts";
+
+let encoding: Tiktoken | null = null;
+
+/**
+ * Lazy load the encoding to avoid loading the tiktoken data into memory
+ * until it's actually needed
+ */
+async function getEncodingInstance(): Promise<Tiktoken> {
+ if (!encoding) {
+ // Dynamic import to lazy load the tiktoken module
+ const { getEncoding } = await import("js-tiktoken");
+ encoding = getEncoding("o200k_base");
+ }
+ return encoding;
+}
+
+async function calculateNumTokens(text: string): Promise<number> {
+ const enc = await getEncodingInstance();
+ return enc.encode(text).length;
+}
+
+async function truncateContent(
+ content: string,
+ length: number,
+): Promise<string> {
+ const enc = await getEncodingInstance();
+ const tokens = enc.encode(content);
+ if (tokens.length <= length) {
+ return content;
+ }
+ const truncatedTokens = tokens.slice(0, length);
+ return enc.decode(truncatedTokens);
+}
+
+/**
+ * Remove duplicate whitespaces to avoid tokenization issues
+ */
+function preprocessContent(content: string) {
+ return content.replace(/(\s){10,}/g, "$1");
+}
+
+export async function buildTextPrompt(
+ lang: string,
+ customPrompts: string[],
+ content: string,
+ contextLength: number,
+ tagStyle: ZTagStyle,
+): Promise<string> {
+ content = preprocessContent(content);
+ const promptTemplate = constructTextTaggingPrompt(
+ lang,
+ customPrompts,
+ "",
+ tagStyle,
+ );
+ const promptSize = await calculateNumTokens(promptTemplate);
+ const truncatedContent = await truncateContent(
+ content,
+ contextLength - promptSize,
+ );
+ return constructTextTaggingPrompt(
+ lang,
+ customPrompts,
+ truncatedContent,
+ tagStyle,
+ );
+}
+
+export async function buildSummaryPrompt(
+ lang: string,
+ customPrompts: string[],
+ content: string,
+ contextLength: number,
+): Promise<string> {
+ content = preprocessContent(content);
+ const promptTemplate = constructSummaryPrompt(lang, customPrompts, "");
+ const promptSize = await calculateNumTokens(promptTemplate);
+ const truncatedContent = await truncateContent(
+ content,
+ contextLength - promptSize,
+ );
+ return constructSummaryPrompt(lang, customPrompts, truncatedContent);
+}