aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--apps/web/next.config.mjs7
-rw-r--r--apps/web/package.json1
-rw-r--r--apps/workers/workers/inference/summarize.ts2
-rw-r--r--apps/workers/workers/inference/tagging.ts3
-rw-r--r--packages/shared/prompts.server.ts87
-rw-r--r--packages/shared/prompts.ts82
-rw-r--r--packages/trpc/routers/bookmarks.ts2
-rw-r--r--pnpm-lock.yaml43
-rw-r--r--tools/compare-models/src/bookmarkProcessor.ts2
9 files changed, 144 insertions, 85 deletions
diff --git a/apps/web/next.config.mjs b/apps/web/next.config.mjs
index 5f1c2bf6..136f6a22 100644
--- a/apps/web/next.config.mjs
+++ b/apps/web/next.config.mjs
@@ -1,5 +1,10 @@
+import bundleAnalyzer from "@next/bundle-analyzer";
import pwa from "next-pwa";
+const withBundleAnalyzer = bundleAnalyzer({
+ enabled: process.env.ANALYZE === "true",
+});
+
const withPWA = pwa({
dest: "public",
disable: process.env.NODE_ENV != "production",
@@ -53,4 +58,4 @@ const nextConfig = withPWA({
typescript: { ignoreBuildErrors: true },
});
-export default nextConfig;
+export default withBundleAnalyzer(nextConfig);
diff --git a/apps/web/package.json b/apps/web/package.json
index 91c257e1..affbb48f 100644
--- a/apps/web/package.json
+++ b/apps/web/package.json
@@ -108,6 +108,7 @@
"@karakeep/prettier-config": "workspace:^0.1.0",
"@karakeep/tailwind-config": "workspace:^0.1.0",
"@karakeep/tsconfig": "workspace:^0.1.0",
+ "@next/bundle-analyzer": "15.3.8",
"@types/csv-parse": "^1.2.5",
"@types/emoji-mart": "^3.0.14",
"@types/react": "^19.1.6",
diff --git a/apps/workers/workers/inference/summarize.ts b/apps/workers/workers/inference/summarize.ts
index 560bb5a2..094c46ca 100644
--- a/apps/workers/workers/inference/summarize.ts
+++ b/apps/workers/workers/inference/summarize.ts
@@ -6,7 +6,7 @@ import { triggerSearchReindex, ZOpenAIRequest } from "@karakeep/shared-server";
import serverConfig from "@karakeep/shared/config";
import { InferenceClient } from "@karakeep/shared/inference";
import logger from "@karakeep/shared/logger";
-import { buildSummaryPrompt } from "@karakeep/shared/prompts";
+import { buildSummaryPrompt } from "@karakeep/shared/prompts.server";
import { DequeuedJob } from "@karakeep/shared/queueing";
import { BookmarkTypes } from "@karakeep/shared/types/bookmarks";
import { Bookmark } from "@karakeep/trpc/models/bookmarks";
diff --git a/apps/workers/workers/inference/tagging.ts b/apps/workers/workers/inference/tagging.ts
index ace426a1..376eab14 100644
--- a/apps/workers/workers/inference/tagging.ts
+++ b/apps/workers/workers/inference/tagging.ts
@@ -24,7 +24,8 @@ import {
import { ASSET_TYPES, readAsset } from "@karakeep/shared/assetdb";
import serverConfig from "@karakeep/shared/config";
import logger from "@karakeep/shared/logger";
-import { buildImagePrompt, buildTextPrompt } from "@karakeep/shared/prompts";
+import { buildImagePrompt } from "@karakeep/shared/prompts";
+import { buildTextPrompt } from "@karakeep/shared/prompts.server";
import { DequeuedJob, EnqueueOptions } from "@karakeep/shared/queueing";
import { Bookmark } from "@karakeep/trpc/models/bookmarks";
diff --git a/packages/shared/prompts.server.ts b/packages/shared/prompts.server.ts
new file mode 100644
index 00000000..3e2666de
--- /dev/null
+++ b/packages/shared/prompts.server.ts
@@ -0,0 +1,87 @@
+import type { Tiktoken } from "js-tiktoken";
+
+import type { ZTagStyle } from "./types/users";
+import { constructSummaryPrompt, constructTextTaggingPrompt } from "./prompts";
+
+let encoding: Tiktoken | null = null;
+
+/**
+ * Lazy load the encoding to avoid loading the tiktoken data into memory
+ * until it's actually needed
+ */
+async function getEncodingInstance(): Promise<Tiktoken> {
+ if (!encoding) {
+ // Dynamic import to lazy load the tiktoken module
+ const { getEncoding } = await import("js-tiktoken");
+ encoding = getEncoding("o200k_base");
+ }
+ return encoding;
+}
+
+async function calculateNumTokens(text: string): Promise<number> {
+ const enc = await getEncodingInstance();
+ return enc.encode(text).length;
+}
+
+async function truncateContent(
+ content: string,
+ length: number,
+): Promise<string> {
+ const enc = await getEncodingInstance();
+ const tokens = enc.encode(content);
+ if (tokens.length <= length) {
+ return content;
+ }
+ const truncatedTokens = tokens.slice(0, length);
+ return enc.decode(truncatedTokens);
+}
+
+/**
+ * Remove duplicate whitespaces to avoid tokenization issues
+ */
+function preprocessContent(content: string) {
+ return content.replace(/(\s){10,}/g, "$1");
+}
+
+export async function buildTextPrompt(
+ lang: string,
+ customPrompts: string[],
+ content: string,
+ contextLength: number,
+ tagStyle: ZTagStyle,
+): Promise<string> {
+ content = preprocessContent(content);
+ const promptTemplate = constructTextTaggingPrompt(
+ lang,
+ customPrompts,
+ "",
+ tagStyle,
+ );
+ const promptSize = await calculateNumTokens(promptTemplate);
+ const truncatedContent = await truncateContent(
+ content,
+ contextLength - promptSize,
+ );
+ return constructTextTaggingPrompt(
+ lang,
+ customPrompts,
+ truncatedContent,
+ tagStyle,
+ );
+}
+
+export async function buildSummaryPrompt(
+ lang: string,
+ customPrompts: string[],
+ content: string,
+ contextLength: number,
+): Promise<string> {
+ content = preprocessContent(content);
+ const promptTemplate = constructSummaryPrompt(lang, customPrompts, "");
+ const promptSize = await calculateNumTokens(promptTemplate);
+ const truncatedContent = await truncateContent(
+ content,
+ contextLength - promptSize,
+ );
+ return constructSummaryPrompt(lang, customPrompts, truncatedContent);
+}
diff --git a/packages/shared/prompts.ts b/packages/shared/prompts.ts
index 7dff9616..00963550 100644
--- a/packages/shared/prompts.ts
+++ b/packages/shared/prompts.ts
@@ -1,23 +1,6 @@
-import type { Tiktoken } from "js-tiktoken";
-
import type { ZTagStyle } from "./types/users";
import { getTagStylePrompt } from "./utils/tag";
-let encoding: Tiktoken | null = null;
-
-/**
- * Lazy load the encoding to avoid loading the tiktoken data into memory
- * until it's actually needed
- */
-async function getEncodingInstance(): Promise<Tiktoken> {
- if (!encoding) {
- // Dynamic import to lazy load the tiktoken module
- const { getEncoding } = await import("js-tiktoken");
- encoding = getEncoding("o200k_base");
- }
- return encoding;
-}
-
/**
* Remove duplicate whitespaces to avoid tokenization issues
*/
@@ -25,24 +8,6 @@ function preprocessContent(content: string) {
return content.replace(/(\s){10,}/g, "$1");
}
-async function calculateNumTokens(text: string): Promise<number> {
- const enc = await getEncodingInstance();
- return enc.encode(text).length;
-}
-
-async function truncateContent(
- content: string,
- length: number,
-): Promise<string> {
- const enc = await getEncodingInstance();
- const tokens = enc.encode(content);
- if (tokens.length <= length) {
- return content;
- }
- const truncatedTokens = tokens.slice(0, length);
- return enc.decode(truncatedTokens);
-}
-
export function buildImagePrompt(
lang: string,
customPrompts: string[],
@@ -66,7 +31,7 @@ You must respond in valid JSON with the key "tags" and the value is list of tags
/**
* Construct tagging prompt for text content
*/
-function constructTextTaggingPrompt(
+export function constructTextTaggingPrompt(
lang: string,
customPrompts: string[],
content: string,
@@ -97,7 +62,7 @@ You must respond in JSON with the key "tags" and the value is an array of string
/**
* Construct summary prompt
*/
-function constructSummaryPrompt(
+export function constructSummaryPrompt(
lang: string,
customPrompts: string[],
content: string,
@@ -127,49 +92,6 @@ export function buildTextPromptUntruncated(
);
}
-export async function buildTextPrompt(
- lang: string,
- customPrompts: string[],
- content: string,
- contextLength: number,
- tagStyle: ZTagStyle,
-): Promise<string> {
- content = preprocessContent(content);
- const promptTemplate = constructTextTaggingPrompt(
- lang,
- customPrompts,
- "",
- tagStyle,
- );
- const promptSize = await calculateNumTokens(promptTemplate);
- const truncatedContent = await truncateContent(
- content,
- contextLength - promptSize,
- );
- return constructTextTaggingPrompt(
- lang,
- customPrompts,
- truncatedContent,
- tagStyle,
- );
-}
-
-export async function buildSummaryPrompt(
- lang: string,
- customPrompts: string[],
- content: string,
- contextLength: number,
-): Promise<string> {
- content = preprocessContent(content);
- const promptTemplate = constructSummaryPrompt(lang, customPrompts, "");
- const promptSize = await calculateNumTokens(promptTemplate);
- const truncatedContent = await truncateContent(
- content,
- contextLength - promptSize,
- );
- return constructSummaryPrompt(lang, customPrompts, truncatedContent);
-}
-
/**
* Build summary prompt without truncation (for previews/UI)
*/
diff --git a/packages/trpc/routers/bookmarks.ts b/packages/trpc/routers/bookmarks.ts
index 5a39439f..882ff9b1 100644
--- a/packages/trpc/routers/bookmarks.ts
+++ b/packages/trpc/routers/bookmarks.ts
@@ -28,7 +28,7 @@ import {
import { SUPPORTED_BOOKMARK_ASSET_TYPES } from "@karakeep/shared/assetdb";
import serverConfig from "@karakeep/shared/config";
import { InferenceClientFactory } from "@karakeep/shared/inference";
-import { buildSummaryPrompt } from "@karakeep/shared/prompts";
+import { buildSummaryPrompt } from "@karakeep/shared/prompts.server";
import { EnqueueOptions } from "@karakeep/shared/queueing";
import { FilterQuery, getSearchClient } from "@karakeep/shared/search";
import { parseSearchQuery } from "@karakeep/shared/searchQueryParser";
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 44e8e6b0..0cc750d9 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -778,6 +778,9 @@ importers:
'@karakeep/tsconfig':
specifier: workspace:^0.1.0
version: link:../../tooling/typescript
+ '@next/bundle-analyzer':
+ specifier: 15.3.8
+ version: 15.3.8
'@types/csv-parse':
specifier: ^1.2.5
version: 1.2.5
@@ -4281,6 +4284,9 @@ packages:
'@napi-rs/wasm-runtime@1.0.0':
resolution: {integrity: sha512-OInwPIZhcQ+aWOBFMUXzv95RLDTBRPaNPm5kSFJaL3gVAMVxrzc0YXNsVeLPHf+4sTviOy2e5wZdvKILb7dC/w==}
+ '@next/bundle-analyzer@15.3.8':
+ resolution: {integrity: sha512-hE+o8opf8kRGNNGKG0zvgVoa9xMFncGxFTiEzvhZaewY5x0ovK23u5gOquQ3vAzIvJhF0rk/1hOzTDHKVHzksQ==}
+
'@next/env@15.3.8':
resolution: {integrity: sha512-SAfHg0g91MQVMPioeFeDjE+8UPF3j3BvHjs8ZKJAUz1BG7eMPvfCKOAgNWJ6s1MLNeP6O2InKQRTNblxPWuq+Q==}
@@ -9988,6 +9994,10 @@ packages:
resolution: {integrity: sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==}
engines: {node: '>=0.10.0'}
+ is-plain-object@5.0.0:
+ resolution: {integrity: sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==}
+ engines: {node: '>=0.10.0'}
+
is-potential-custom-element-name@1.0.1:
resolution: {integrity: sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==}
@@ -15197,6 +15207,11 @@ packages:
resolution: {integrity: sha512-n4W4YFyz5JzOfQeA8oN7dUYpR+MBP3PIUsn2jLjWXwK5ASUzt0Jc/A5sAUZoCYFJRGF0FBKJ+1JjN43rNdsQzA==}
engines: {node: '>=20'}
+ webpack-bundle-analyzer@4.10.1:
+ resolution: {integrity: sha512-s3P7pgexgT/HTUSYgxJyn28A+99mmLq4HsJepMPzu0R8ImJc52QNqaFYW1Z2z2uIb1/J3eYgaAWVpaC+v/1aAQ==}
+ engines: {node: '>= 10.13.0'}
+ hasBin: true
+
webpack-bundle-analyzer@4.10.2:
resolution: {integrity: sha512-vJptkMm9pk5si4Bv922ZbKLV8UTT4zib4FPgXMhgzUny0bfDDkLXAVQs3ly3fS4/TN9ROFtb0NFrm04UXFE/Vw==}
engines: {node: '>= 10.13.0'}
@@ -20030,6 +20045,13 @@ snapshots:
'@tybys/wasm-util': 0.10.0
optional: true
+ '@next/bundle-analyzer@15.3.8':
+ dependencies:
+ webpack-bundle-analyzer: 4.10.1
+ transitivePeerDependencies:
+ - bufferutil
+ - utf-8-validate
+
'@next/env@15.3.8': {}
'@next/swc-darwin-arm64@15.3.5':
@@ -26575,6 +26597,8 @@ snapshots:
dependencies:
isobject: 3.0.1
+ is-plain-object@5.0.0: {}
+
is-potential-custom-element-name@1.0.1: {}
is-promise@4.0.0: {}
@@ -33305,6 +33329,25 @@ snapshots:
webidl-conversions@8.0.0: {}
+ webpack-bundle-analyzer@4.10.1:
+ dependencies:
+ '@discoveryjs/json-ext': 0.5.7
+ acorn: 8.15.0
+ acorn-walk: 8.3.4
+ commander: 7.2.0
+ debounce: 1.2.1
+ escape-string-regexp: 4.0.0
+ gzip-size: 6.0.0
+ html-escaper: 2.0.2
+ is-plain-object: 5.0.0
+ opener: 1.5.2
+ picocolors: 1.1.1
+ sirv: 2.0.4
+ ws: 7.5.10
+ transitivePeerDependencies:
+ - bufferutil
+ - utf-8-validate
+
webpack-bundle-analyzer@4.10.2:
dependencies:
'@discoveryjs/json-ext': 0.5.7
diff --git a/tools/compare-models/src/bookmarkProcessor.ts b/tools/compare-models/src/bookmarkProcessor.ts
index 21280b97..4a1bbf0a 100644
--- a/tools/compare-models/src/bookmarkProcessor.ts
+++ b/tools/compare-models/src/bookmarkProcessor.ts
@@ -1,5 +1,5 @@
import type { InferenceClient } from "@karakeep/shared/inference";
-import { buildTextPrompt } from "@karakeep/shared/prompts";
+import { buildTextPrompt } from "@karakeep/shared/prompts.server";
import { inferTags } from "./inferenceClient";
import type { Bookmark } from "./types";