diff options
Diffstat (limited to 'packages/shared')
| -rw-r--r-- | packages/shared/config.ts | 2 | ||||
| -rw-r--r-- | packages/shared/inference.ts | 8 |
2 files changed, 8 insertions, 2 deletions
diff --git a/packages/shared/config.ts b/packages/shared/config.ts index 5343246d..8e7d0252 100644 --- a/packages/shared/config.ts +++ b/packages/shared/config.ts @@ -65,6 +65,7 @@ const allEnv = z.object({ EMBEDDING_TEXT_MODEL: z.string().default("text-embedding-3-small"), INFERENCE_CONTEXT_LENGTH: z.coerce.number().default(2048), INFERENCE_MAX_OUTPUT_TOKENS: z.coerce.number().default(2048), + INFERENCE_USE_MAX_COMPLETION_TOKENS: stringBool("false"), INFERENCE_SUPPORTS_STRUCTURED_OUTPUT: optionalStringBool(), INFERENCE_OUTPUT_SCHEMA: z .enum(["structured", "json", "plain"]) @@ -241,6 +242,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => { inferredTagLang: val.INFERENCE_LANG, contextLength: val.INFERENCE_CONTEXT_LENGTH, maxOutputTokens: val.INFERENCE_MAX_OUTPUT_TOKENS, + useMaxCompletionTokens: val.INFERENCE_USE_MAX_COMPLETION_TOKENS, outputSchema: val.INFERENCE_SUPPORTS_STRUCTURED_OUTPUT !== undefined ? val.INFERENCE_SUPPORTS_STRUCTURED_OUTPUT diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts index 41026fbd..7689f4f4 100644 --- a/packages/shared/inference.ts +++ b/packages/shared/inference.ts @@ -90,7 +90,9 @@ class OpenAIInferenceClient implements InferenceClient { { messages: [{ role: "user", content: prompt }], model: serverConfig.inference.textModel, - max_tokens: serverConfig.inference.maxOutputTokens, + ...(serverConfig.inference.useMaxCompletionTokens + ? { max_completion_tokens: serverConfig.inference.maxOutputTokens } + : { max_tokens: serverConfig.inference.maxOutputTokens }), response_format: mapInferenceOutputSchema( { structured: optsWithDefaults.schema @@ -127,7 +129,9 @@ class OpenAIInferenceClient implements InferenceClient { const chatCompletion = await this.openAI.chat.completions.create( { model: serverConfig.inference.imageModel, - max_tokens: serverConfig.inference.maxOutputTokens, + ...(serverConfig.inference.useMaxCompletionTokens + ? { max_completion_tokens: serverConfig.inference.maxOutputTokens } + : { max_tokens: serverConfig.inference.maxOutputTokens }), response_format: mapInferenceOutputSchema( { structured: optsWithDefaults.schema |
