diff options
| author | Mohamed Bassem <me@mbassem.com> | 2025-07-20 23:39:38 +0000 |
|---|---|---|
| committer | Mohamed Bassem <me@mbassem.com> | 2025-07-20 23:39:38 +0000 |
| commit | 52ac0869d53b54e91db557f012f7ee9a3ecc3e9d (patch) | |
| tree | 8c56b8aa621686f3379f4617f77536562ea046bf /packages/shared | |
| parent | 2f3da0b8ce039afebf3aca0cc4030fbf168c13dc (diff) | |
| download | karakeep-52ac0869d53b54e91db557f012f7ee9a3ecc3e9d.tar.zst | |
feat: Add a max output tokens env variable
Diffstat (limited to 'packages/shared')
| -rw-r--r-- | packages/shared/config.ts | 2 | ||||
| -rw-r--r-- | packages/shared/inference.ts | 4 |
2 files changed, 5 insertions, 1 deletions
diff --git a/packages/shared/config.ts b/packages/shared/config.ts index a18482c8..ea90ffcb 100644 --- a/packages/shared/config.ts +++ b/packages/shared/config.ts @@ -38,6 +38,7 @@ const allEnv = z.object({ INFERENCE_IMAGE_MODEL: z.string().default("gpt-4o-mini"), EMBEDDING_TEXT_MODEL: z.string().default("text-embedding-3-small"), INFERENCE_CONTEXT_LENGTH: z.coerce.number().default(2048), + INFERENCE_MAX_OUTPUT_TOKENS: z.coerce.number().default(2048), INFERENCE_SUPPORTS_STRUCTURED_OUTPUT: optionalStringBool(), INFERENCE_OUTPUT_SCHEMA: z .enum(["structured", "json", "plain"]) @@ -190,6 +191,7 @@ const serverConfigSchema = allEnv imageModel: val.INFERENCE_IMAGE_MODEL, inferredTagLang: val.INFERENCE_LANG, contextLength: val.INFERENCE_CONTEXT_LENGTH, + maxOutputTokens: val.INFERENCE_MAX_OUTPUT_TOKENS, outputSchema: val.INFERENCE_SUPPORTS_STRUCTURED_OUTPUT !== undefined ? val.INFERENCE_SUPPORTS_STRUCTURED_OUTPUT diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts index 04fa8cfc..41026fbd 100644 --- a/packages/shared/inference.ts +++ b/packages/shared/inference.ts @@ -90,6 +90,7 @@ class OpenAIInferenceClient implements InferenceClient { { messages: [{ role: "user", content: prompt }], model: serverConfig.inference.textModel, + max_tokens: serverConfig.inference.maxOutputTokens, response_format: mapInferenceOutputSchema( { structured: optsWithDefaults.schema @@ -126,6 +127,7 @@ class OpenAIInferenceClient implements InferenceClient { const chatCompletion = await this.openAI.chat.completions.create( { model: serverConfig.inference.imageModel, + max_tokens: serverConfig.inference.maxOutputTokens, response_format: mapInferenceOutputSchema( { structured: optsWithDefaults.schema @@ -151,7 +153,6 @@ class OpenAIInferenceClient implements InferenceClient { ], }, ], - max_tokens: 2000, }, { signal: optsWithDefaults.abortSignal, @@ -224,6 +225,7 @@ class OllamaInferenceClient implements InferenceClient { keep_alive: serverConfig.inference.ollamaKeepAlive, options: { num_ctx: serverConfig.inference.contextLength, + num_predict: serverConfig.inference.maxOutputTokens, }, messages: [ { role: "user", content: prompt, images: image ? [image] : undefined }, |
