aboutsummaryrefslogtreecommitdiffstats
path: root/packages
diff options
context:
space:
mode:
Diffstat (limited to 'packages')
-rw-r--r--packages/shared/prompts.ts126
-rw-r--r--packages/trpc/routers/bookmarks.ts2
2 files changed, 101 insertions, 27 deletions
diff --git a/packages/shared/prompts.ts b/packages/shared/prompts.ts
index 0b79eb9a..5a6a705e 100644
--- a/packages/shared/prompts.ts
+++ b/packages/shared/prompts.ts
@@ -1,6 +1,19 @@
-import { getEncoding } from "js-tiktoken";
+import type { Tiktoken } from "js-tiktoken";
-const encoding = getEncoding("o200k_base");
+let encoding: Tiktoken | null = null;
+
+/**
+ * Lazy load the encoding to avoid loading the tiktoken data into memory
+ * until it's actually needed
+ */
+async function getEncodingInstance(): Promise<Tiktoken> {
+ if (!encoding) {
+ // Dynamic import to lazy load the tiktoken module
+ const { getEncoding } = await import("js-tiktoken");
+ encoding = getEncoding("o200k_base");
+ }
+ return encoding;
+}
/**
* Remove duplicate whitespaces to avoid tokenization issues
@@ -9,17 +22,22 @@ function preprocessContent(content: string) {
return content.replace(/(\s){10,}/g, "$1");
}
-function calculateNumTokens(text: string) {
- return encoding.encode(text).length;
+async function calculateNumTokens(text: string): Promise<number> {
+ const enc = await getEncodingInstance();
+ return enc.encode(text).length;
}
-function truncateContent(content: string, length: number) {
- const tokens = encoding.encode(content);
+async function truncateContent(
+ content: string,
+ length: number,
+): Promise<string> {
+ const enc = await getEncodingInstance();
+ const tokens = enc.encode(content);
if (tokens.length <= length) {
return content;
}
const truncatedTokens = tokens.slice(0, length);
- return encoding.decode(truncatedTokens);
+ return enc.decode(truncatedTokens);
}
export function buildImagePrompt(lang: string, customPrompts: string[]) {
@@ -35,14 +53,15 @@ ${customPrompts && customPrompts.map((p) => `- ${p}`).join("\n")}
You must respond in valid JSON with the key "tags" and the value is list of tags. Don't wrap the response in a markdown code.`;
}
-export function buildTextPrompt(
+/**
+ * Construct tagging prompt for text content
+ */
+function constructTextTaggingPrompt(
lang: string,
customPrompts: string[],
content: string,
- contextLength: number,
-) {
- content = preprocessContent(content);
- const constructPrompt = (c: string) => `
+): string {
+ return `
You are an expert whose responsibility is to help with automatic tagging for a read-it-later app.
Please analyze the TEXT_CONTENT below and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are:
- Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres.
@@ -54,30 +73,85 @@ Please analyze the TEXT_CONTENT below and suggest relevant tags that describe it
${customPrompts && customPrompts.map((p) => `- ${p}`).join("\n")}
<TEXT_CONTENT>
-${c}
+${content}
</TEXT_CONTENT>
You must respond in JSON with the key "tags" and the value is an array of string tags.`;
-
- const promptSize = calculateNumTokens(constructPrompt(""));
- const truncatedContent = truncateContent(content, contextLength - promptSize);
- return constructPrompt(truncatedContent);
}
-export function buildSummaryPrompt(
+/**
+ * Construct summary prompt
+ */
+function constructSummaryPrompt(
lang: string,
customPrompts: string[],
content: string,
- contextLength: number,
-) {
- content = preprocessContent(content);
- const constructPrompt = (c: string) => `
+): string {
+ return `
Summarize the following content responding ONLY with the summary. You MUST follow the following rules:
- Summary must be in 3-4 sentences.
- The summary must be in ${lang}.
${customPrompts && customPrompts.map((p) => `- ${p}`).join("\n")}
- ${c}`;
+ ${content}`;
+}
+
+/**
+ * Build text tagging prompt without truncation (for previews/UI)
+ */
+export function buildTextPromptUntruncated(
+ lang: string,
+ customPrompts: string[],
+ content: string,
+): string {
+ return constructTextTaggingPrompt(
+ lang,
+ customPrompts,
+ preprocessContent(content),
+ );
+}
+
+export async function buildTextPrompt(
+ lang: string,
+ customPrompts: string[],
+ content: string,
+ contextLength: number,
+): Promise<string> {
+ content = preprocessContent(content);
+ const promptTemplate = constructTextTaggingPrompt(lang, customPrompts, "");
+ const promptSize = await calculateNumTokens(promptTemplate);
+ const truncatedContent = await truncateContent(
+ content,
+ contextLength - promptSize,
+ );
+ return constructTextTaggingPrompt(lang, customPrompts, truncatedContent);
+}
+
+export async function buildSummaryPrompt(
+ lang: string,
+ customPrompts: string[],
+ content: string,
+ contextLength: number,
+): Promise<string> {
+ content = preprocessContent(content);
+ const promptTemplate = constructSummaryPrompt(lang, customPrompts, "");
+ const promptSize = await calculateNumTokens(promptTemplate);
+ const truncatedContent = await truncateContent(
+ content,
+ contextLength - promptSize,
+ );
+ return constructSummaryPrompt(lang, customPrompts, truncatedContent);
+}
- const promptSize = calculateNumTokens(constructPrompt(""));
- const truncatedContent = truncateContent(content, contextLength - promptSize);
- return constructPrompt(truncatedContent);
+/**
+ * Build summary prompt without truncation (for previews/UI)
+ */
+export function buildSummaryPromptUntruncated(
+ lang: string,
+ customPrompts: string[],
+ content: string,
+): string {
+ return constructSummaryPrompt(
+ lang,
+ customPrompts,
+ preprocessContent(content),
+ );
}
diff --git a/packages/trpc/routers/bookmarks.ts b/packages/trpc/routers/bookmarks.ts
index 05ffa240..f68d5ada 100644
--- a/packages/trpc/routers/bookmarks.ts
+++ b/packages/trpc/routers/bookmarks.ts
@@ -954,7 +954,7 @@ Author: ${bookmark.author ?? ""}
},
});
- const summaryPrompt = buildSummaryPrompt(
+ const summaryPrompt = await buildSummaryPrompt(
serverConfig.inference.inferredTagLang,
prompts.map((p) => p.text),
bookmarkDetails,