1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
import type { Tiktoken } from "js-tiktoken";
import type { ZTagStyle } from "./types/users";
import { constructSummaryPrompt, constructTextTaggingPrompt } from "./prompts";
let encoding: Tiktoken | null = null;
/**
* Lazy load the encoding to avoid loading the tiktoken data into memory
* until it's actually needed
*/
async function getEncodingInstance(): Promise<Tiktoken> {
if (!encoding) {
// Dynamic import to lazy load the tiktoken module
const { getEncoding } = await import("js-tiktoken");
encoding = getEncoding("o200k_base");
}
return encoding;
}
async function calculateNumTokens(text: string): Promise<number> {
const enc = await getEncodingInstance();
return enc.encode(text).length;
}
async function truncateContent(
content: string,
length: number,
): Promise<string> {
const enc = await getEncodingInstance();
const tokens = enc.encode(content);
if (tokens.length <= length) {
return content;
}
const truncatedTokens = tokens.slice(0, length);
return enc.decode(truncatedTokens);
}
/**
* Remove duplicate whitespaces to avoid tokenization issues
*/
function preprocessContent(content: string) {
return content.replace(/(\s){10,}/g, "$1");
}
export async function buildTextPrompt(
lang: string,
customPrompts: string[],
content: string,
contextLength: number,
tagStyle: ZTagStyle,
): Promise<string> {
content = preprocessContent(content);
const promptTemplate = constructTextTaggingPrompt(
lang,
customPrompts,
"",
tagStyle,
);
const promptSize = await calculateNumTokens(promptTemplate);
const truncatedContent = await truncateContent(
content,
contextLength - promptSize,
);
return constructTextTaggingPrompt(
lang,
customPrompts,
truncatedContent,
tagStyle,
);
}
export async function buildSummaryPrompt(
lang: string,
customPrompts: string[],
content: string,
contextLength: number,
): Promise<string> {
content = preprocessContent(content);
const promptTemplate = constructSummaryPrompt(lang, customPrompts, "");
const promptSize = await calculateNumTokens(promptTemplate);
const truncatedContent = await truncateContent(
content,
contextLength - promptSize,
);
return constructSummaryPrompt(lang, customPrompts, truncatedContent);
}
|