diff options
| author | Robert Rosca <32569096+RobertRosca@users.noreply.github.com> | 2026-01-03 20:31:41 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-01-03 19:31:41 +0000 |
| commit | aa7a81e0cad81f51ce21e2977c60ab4cb66e9e43 (patch) | |
| tree | f90dbab5e70642c081d33ad436b606d49710f363 /packages/shared | |
| parent | 2a6fe6e6fa52a670bd3a7f01cbfbcf05eb5971fc (diff) | |
| download | karakeep-aa7a81e0cad81f51ce21e2977c60ab4cb66e9e43.tar.zst | |
feat: add openai service tier configuration option (#2339)
Diffstat (limited to 'packages/shared')
| -rw-r--r-- | packages/shared/config.ts | 2 | ||||
| -rw-r--r-- | packages/shared/inference.ts | 8 |
2 files changed, 10 insertions, 0 deletions
diff --git a/packages/shared/config.ts b/packages/shared/config.ts index 28dcc624..7238e90c 100644 --- a/packages/shared/config.ts +++ b/packages/shared/config.ts @@ -59,6 +59,7 @@ const allEnv = z.object({ OPENAI_API_KEY: z.string().optional(), OPENAI_BASE_URL: z.string().url().optional(), OPENAI_PROXY_URL: z.string().url().optional(), + OPENAI_SERVICE_TIER: z.enum(["auto", "default", "flex"]).optional(), OLLAMA_BASE_URL: z.string().url().optional(), OLLAMA_KEEP_ALIVE: z.string().optional(), INFERENCE_JOB_TIMEOUT_SEC: z.coerce.number().default(30), @@ -277,6 +278,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => { openAIApiKey: val.OPENAI_API_KEY, openAIBaseUrl: val.OPENAI_BASE_URL, openAIProxyUrl: val.OPENAI_PROXY_URL, + openAIServiceTier: val.OPENAI_SERVICE_TIER, ollamaBaseUrl: val.OLLAMA_BASE_URL, ollamaKeepAlive: val.OLLAMA_KEEP_ALIVE, textModel: val.INFERENCE_TEXT_MODEL, diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts index fe71778e..61a621ac 100644 --- a/packages/shared/inference.ts +++ b/packages/shared/inference.ts @@ -56,6 +56,7 @@ export interface OpenAIInferenceConfig { apiKey: string; baseURL?: string; proxyUrl?: string; + serviceTier?: typeof serverConfig.inference.openAIServiceTier; textModel: string; imageModel: string; contextLength: number; @@ -106,6 +107,7 @@ export class OpenAIInferenceClient implements InferenceClient { apiKey: serverConfig.inference.openAIApiKey!, baseURL: serverConfig.inference.openAIBaseUrl, proxyUrl: serverConfig.inference.openAIProxyUrl, + serviceTier: serverConfig.inference.openAIServiceTier, textModel: serverConfig.inference.textModel, imageModel: serverConfig.inference.imageModel, contextLength: serverConfig.inference.contextLength, @@ -127,6 +129,9 @@ export class OpenAIInferenceClient implements InferenceClient { { messages: [{ role: "user", content: prompt }], model: this.config.textModel, + ...(this.config.serviceTier + ? { service_tier: this.config.serviceTier } + : {}), ...(this.config.useMaxCompletionTokens ? { max_completion_tokens: this.config.maxOutputTokens } : { max_tokens: this.config.maxOutputTokens }), @@ -166,6 +171,9 @@ export class OpenAIInferenceClient implements InferenceClient { const chatCompletion = await this.openAI.chat.completions.create( { model: this.config.imageModel, + ...(this.config.serviceTier + ? { service_tier: this.config.serviceTier } + : {}), ...(this.config.useMaxCompletionTokens ? { max_completion_tokens: this.config.maxOutputTokens } : { max_tokens: this.config.maxOutputTokens }), |
