aboutsummaryrefslogtreecommitdiffstats
path: root/packages/shared
diff options
context:
space:
mode:
Diffstat (limited to 'packages/shared')
-rw-r--r--packages/shared/config.ts2
-rw-r--r--packages/shared/inference.ts8
2 files changed, 8 insertions, 2 deletions
diff --git a/packages/shared/config.ts b/packages/shared/config.ts
index 5343246d..8e7d0252 100644
--- a/packages/shared/config.ts
+++ b/packages/shared/config.ts
@@ -65,6 +65,7 @@ const allEnv = z.object({
EMBEDDING_TEXT_MODEL: z.string().default("text-embedding-3-small"),
INFERENCE_CONTEXT_LENGTH: z.coerce.number().default(2048),
INFERENCE_MAX_OUTPUT_TOKENS: z.coerce.number().default(2048),
+ INFERENCE_USE_MAX_COMPLETION_TOKENS: stringBool("false"),
INFERENCE_SUPPORTS_STRUCTURED_OUTPUT: optionalStringBool(),
INFERENCE_OUTPUT_SCHEMA: z
.enum(["structured", "json", "plain"])
@@ -241,6 +242,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
inferredTagLang: val.INFERENCE_LANG,
contextLength: val.INFERENCE_CONTEXT_LENGTH,
maxOutputTokens: val.INFERENCE_MAX_OUTPUT_TOKENS,
+ useMaxCompletionTokens: val.INFERENCE_USE_MAX_COMPLETION_TOKENS,
outputSchema:
val.INFERENCE_SUPPORTS_STRUCTURED_OUTPUT !== undefined
? val.INFERENCE_SUPPORTS_STRUCTURED_OUTPUT
diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts
index 41026fbd..7689f4f4 100644
--- a/packages/shared/inference.ts
+++ b/packages/shared/inference.ts
@@ -90,7 +90,9 @@ class OpenAIInferenceClient implements InferenceClient {
{
messages: [{ role: "user", content: prompt }],
model: serverConfig.inference.textModel,
- max_tokens: serverConfig.inference.maxOutputTokens,
+ ...(serverConfig.inference.useMaxCompletionTokens
+ ? { max_completion_tokens: serverConfig.inference.maxOutputTokens }
+ : { max_tokens: serverConfig.inference.maxOutputTokens }),
response_format: mapInferenceOutputSchema(
{
structured: optsWithDefaults.schema
@@ -127,7 +129,9 @@ class OpenAIInferenceClient implements InferenceClient {
const chatCompletion = await this.openAI.chat.completions.create(
{
model: serverConfig.inference.imageModel,
- max_tokens: serverConfig.inference.maxOutputTokens,
+ ...(serverConfig.inference.useMaxCompletionTokens
+ ? { max_completion_tokens: serverConfig.inference.maxOutputTokens }
+ : { max_tokens: serverConfig.inference.maxOutputTokens }),
response_format: mapInferenceOutputSchema(
{
structured: optsWithDefaults.schema