aboutsummaryrefslogtreecommitdiffstats
path: root/packages
diff options
context:
space:
mode:
authorMohamed Bassem <me@mbassem.com>2025-07-20 23:39:38 +0000
committerMohamed Bassem <me@mbassem.com>2025-07-20 23:39:38 +0000
commit52ac0869d53b54e91db557f012f7ee9a3ecc3e9d (patch)
tree8c56b8aa621686f3379f4617f77536562ea046bf /packages
parent2f3da0b8ce039afebf3aca0cc4030fbf168c13dc (diff)
downloadkarakeep-52ac0869d53b54e91db557f012f7ee9a3ecc3e9d.tar.zst
feat: Add a max output tokens env variable
Diffstat (limited to 'packages')
-rw-r--r--packages/shared/config.ts2
-rw-r--r--packages/shared/inference.ts4
2 files changed, 5 insertions, 1 deletions
diff --git a/packages/shared/config.ts b/packages/shared/config.ts
index a18482c8..ea90ffcb 100644
--- a/packages/shared/config.ts
+++ b/packages/shared/config.ts
@@ -38,6 +38,7 @@ const allEnv = z.object({
INFERENCE_IMAGE_MODEL: z.string().default("gpt-4o-mini"),
EMBEDDING_TEXT_MODEL: z.string().default("text-embedding-3-small"),
INFERENCE_CONTEXT_LENGTH: z.coerce.number().default(2048),
+ INFERENCE_MAX_OUTPUT_TOKENS: z.coerce.number().default(2048),
INFERENCE_SUPPORTS_STRUCTURED_OUTPUT: optionalStringBool(),
INFERENCE_OUTPUT_SCHEMA: z
.enum(["structured", "json", "plain"])
@@ -190,6 +191,7 @@ const serverConfigSchema = allEnv
imageModel: val.INFERENCE_IMAGE_MODEL,
inferredTagLang: val.INFERENCE_LANG,
contextLength: val.INFERENCE_CONTEXT_LENGTH,
+ maxOutputTokens: val.INFERENCE_MAX_OUTPUT_TOKENS,
outputSchema:
val.INFERENCE_SUPPORTS_STRUCTURED_OUTPUT !== undefined
? val.INFERENCE_SUPPORTS_STRUCTURED_OUTPUT
diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts
index 04fa8cfc..41026fbd 100644
--- a/packages/shared/inference.ts
+++ b/packages/shared/inference.ts
@@ -90,6 +90,7 @@ class OpenAIInferenceClient implements InferenceClient {
{
messages: [{ role: "user", content: prompt }],
model: serverConfig.inference.textModel,
+ max_tokens: serverConfig.inference.maxOutputTokens,
response_format: mapInferenceOutputSchema(
{
structured: optsWithDefaults.schema
@@ -126,6 +127,7 @@ class OpenAIInferenceClient implements InferenceClient {
const chatCompletion = await this.openAI.chat.completions.create(
{
model: serverConfig.inference.imageModel,
+ max_tokens: serverConfig.inference.maxOutputTokens,
response_format: mapInferenceOutputSchema(
{
structured: optsWithDefaults.schema
@@ -151,7 +153,6 @@ class OpenAIInferenceClient implements InferenceClient {
],
},
],
- max_tokens: 2000,
},
{
signal: optsWithDefaults.abortSignal,
@@ -224,6 +225,7 @@ class OllamaInferenceClient implements InferenceClient {
keep_alive: serverConfig.inference.ollamaKeepAlive,
options: {
num_ctx: serverConfig.inference.contextLength,
+ num_predict: serverConfig.inference.maxOutputTokens,
},
messages: [
{ role: "user", content: prompt, images: image ? [image] : undefined },