aboutsummaryrefslogtreecommitdiffstats
path: root/packages
diff options
context:
space:
mode:
Diffstat (limited to 'packages')
-rw-r--r--packages/shared/prompts.server.ts87
-rw-r--r--packages/shared/prompts.ts82
-rw-r--r--packages/trpc/routers/bookmarks.ts2
3 files changed, 90 insertions, 81 deletions
diff --git a/packages/shared/prompts.server.ts b/packages/shared/prompts.server.ts
new file mode 100644
index 00000000..3e2666de
--- /dev/null
+++ b/packages/shared/prompts.server.ts
@@ -0,0 +1,87 @@
+import type { Tiktoken } from "js-tiktoken";
+
+import type { ZTagStyle } from "./types/users";
+import { constructSummaryPrompt, constructTextTaggingPrompt } from "./prompts";
+
+let encoding: Tiktoken | null = null;
+
+/**
+ * Lazy load the encoding to avoid loading the tiktoken data into memory
+ * until it's actually needed
+ */
+async function getEncodingInstance(): Promise<Tiktoken> {
+ if (!encoding) {
+ // Dynamic import to lazy load the tiktoken module
+ const { getEncoding } = await import("js-tiktoken");
+ encoding = getEncoding("o200k_base");
+ }
+ return encoding;
+}
+
+async function calculateNumTokens(text: string): Promise<number> {
+ const enc = await getEncodingInstance();
+ return enc.encode(text).length;
+}
+
+async function truncateContent(
+ content: string,
+ length: number,
+): Promise<string> {
+ const enc = await getEncodingInstance();
+ const tokens = enc.encode(content);
+ if (tokens.length <= length) {
+ return content;
+ }
+ const truncatedTokens = tokens.slice(0, length);
+ return enc.decode(truncatedTokens);
+}
+
+/**
+ * Remove duplicate whitespaces to avoid tokenization issues
+ */
+function preprocessContent(content: string) {
+ return content.replace(/(\s){10,}/g, "$1");
+}
+
+export async function buildTextPrompt(
+ lang: string,
+ customPrompts: string[],
+ content: string,
+ contextLength: number,
+ tagStyle: ZTagStyle,
+): Promise<string> {
+ content = preprocessContent(content);
+ const promptTemplate = constructTextTaggingPrompt(
+ lang,
+ customPrompts,
+ "",
+ tagStyle,
+ );
+ const promptSize = await calculateNumTokens(promptTemplate);
+ const truncatedContent = await truncateContent(
+ content,
+ contextLength - promptSize,
+ );
+ return constructTextTaggingPrompt(
+ lang,
+ customPrompts,
+ truncatedContent,
+ tagStyle,
+ );
+}
+
+export async function buildSummaryPrompt(
+ lang: string,
+ customPrompts: string[],
+ content: string,
+ contextLength: number,
+): Promise<string> {
+ content = preprocessContent(content);
+ const promptTemplate = constructSummaryPrompt(lang, customPrompts, "");
+ const promptSize = await calculateNumTokens(promptTemplate);
+ const truncatedContent = await truncateContent(
+ content,
+ contextLength - promptSize,
+ );
+ return constructSummaryPrompt(lang, customPrompts, truncatedContent);
+}
diff --git a/packages/shared/prompts.ts b/packages/shared/prompts.ts
index 7dff9616..00963550 100644
--- a/packages/shared/prompts.ts
+++ b/packages/shared/prompts.ts
@@ -1,23 +1,6 @@
-import type { Tiktoken } from "js-tiktoken";
-
import type { ZTagStyle } from "./types/users";
import { getTagStylePrompt } from "./utils/tag";
-let encoding: Tiktoken | null = null;
-
-/**
- * Lazy load the encoding to avoid loading the tiktoken data into memory
- * until it's actually needed
- */
-async function getEncodingInstance(): Promise<Tiktoken> {
- if (!encoding) {
- // Dynamic import to lazy load the tiktoken module
- const { getEncoding } = await import("js-tiktoken");
- encoding = getEncoding("o200k_base");
- }
- return encoding;
-}
-
/**
* Remove duplicate whitespaces to avoid tokenization issues
*/
@@ -25,24 +8,6 @@ function preprocessContent(content: string) {
return content.replace(/(\s){10,}/g, "$1");
}
-async function calculateNumTokens(text: string): Promise<number> {
- const enc = await getEncodingInstance();
- return enc.encode(text).length;
-}
-
-async function truncateContent(
- content: string,
- length: number,
-): Promise<string> {
- const enc = await getEncodingInstance();
- const tokens = enc.encode(content);
- if (tokens.length <= length) {
- return content;
- }
- const truncatedTokens = tokens.slice(0, length);
- return enc.decode(truncatedTokens);
-}
-
export function buildImagePrompt(
lang: string,
customPrompts: string[],
@@ -66,7 +31,7 @@ You must respond in valid JSON with the key "tags" and the value is list of tags
/**
* Construct tagging prompt for text content
*/
-function constructTextTaggingPrompt(
+export function constructTextTaggingPrompt(
lang: string,
customPrompts: string[],
content: string,
@@ -97,7 +62,7 @@ You must respond in JSON with the key "tags" and the value is an array of string
/**
* Construct summary prompt
*/
-function constructSummaryPrompt(
+export function constructSummaryPrompt(
lang: string,
customPrompts: string[],
content: string,
@@ -127,49 +92,6 @@ export function buildTextPromptUntruncated(
);
}
-export async function buildTextPrompt(
- lang: string,
- customPrompts: string[],
- content: string,
- contextLength: number,
- tagStyle: ZTagStyle,
-): Promise<string> {
- content = preprocessContent(content);
- const promptTemplate = constructTextTaggingPrompt(
- lang,
- customPrompts,
- "",
- tagStyle,
- );
- const promptSize = await calculateNumTokens(promptTemplate);
- const truncatedContent = await truncateContent(
- content,
- contextLength - promptSize,
- );
- return constructTextTaggingPrompt(
- lang,
- customPrompts,
- truncatedContent,
- tagStyle,
- );
-}
-
-export async function buildSummaryPrompt(
- lang: string,
- customPrompts: string[],
- content: string,
- contextLength: number,
-): Promise<string> {
- content = preprocessContent(content);
- const promptTemplate = constructSummaryPrompt(lang, customPrompts, "");
- const promptSize = await calculateNumTokens(promptTemplate);
- const truncatedContent = await truncateContent(
- content,
- contextLength - promptSize,
- );
- return constructSummaryPrompt(lang, customPrompts, truncatedContent);
-}
-
/**
* Build summary prompt without truncation (for previews/UI)
*/
diff --git a/packages/trpc/routers/bookmarks.ts b/packages/trpc/routers/bookmarks.ts
index 5a39439f..882ff9b1 100644
--- a/packages/trpc/routers/bookmarks.ts
+++ b/packages/trpc/routers/bookmarks.ts
@@ -28,7 +28,7 @@ import {
import { SUPPORTED_BOOKMARK_ASSET_TYPES } from "@karakeep/shared/assetdb";
import serverConfig from "@karakeep/shared/config";
import { InferenceClientFactory } from "@karakeep/shared/inference";
-import { buildSummaryPrompt } from "@karakeep/shared/prompts";
+import { buildSummaryPrompt } from "@karakeep/shared/prompts.server";
import { EnqueueOptions } from "@karakeep/shared/queueing";
import { FilterQuery, getSearchClient } from "@karakeep/shared/search";
import { parseSearchQuery } from "@karakeep/shared/searchQueryParser";