feat: add openai service tier configuration option (#2339)

author: Robert Rosca <32569096+RobertRosca@users.noreply.github.com> 2026-01-03 20:31:41 +0100
committer: GitHub <noreply@github.com> 2026-01-03 19:31:41 +0000
commit: aa7a81e0cad81f51ce21e2977c60ab4cb66e9e43 (patch)
tree: f90dbab5e70642c081d33ad436b606d49710f363 /packages/shared/inference.ts
parent: 2a6fe6e6fa52a670bd3a7f01cbfbcf05eb5971fc (diff)
download: karakeep-aa7a81e0cad81f51ce21e2977c60ab4cb66e9e43.tar.zst
1 files changed, 8 insertions, 0 deletions
diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts
index fe71778e..61a621ac 100644
--- a/packages/shared/inference.ts
+++ b/packages/shared/inference.ts
@@ -56,6 +56,7 @@ export interface OpenAIInferenceConfig {
   apiKey: string;
   baseURL?: string;
   proxyUrl?: string;
+  serviceTier?: typeof serverConfig.inference.openAIServiceTier;
   textModel: string;
   imageModel: string;
   contextLength: number;
@@ -106,6 +107,7 @@ export class OpenAIInferenceClient implements InferenceClient {
       apiKey: serverConfig.inference.openAIApiKey!,
       baseURL: serverConfig.inference.openAIBaseUrl,
       proxyUrl: serverConfig.inference.openAIProxyUrl,
+      serviceTier: serverConfig.inference.openAIServiceTier,
       textModel: serverConfig.inference.textModel,
       imageModel: serverConfig.inference.imageModel,
       contextLength: serverConfig.inference.contextLength,
@@ -127,6 +129,9 @@ export class OpenAIInferenceClient implements InferenceClient {
       {
         messages: [{ role: "user", content: prompt }],
         model: this.config.textModel,
+        ...(this.config.serviceTier
+          ? { service_tier: this.config.serviceTier }
+          : {}),
         ...(this.config.useMaxCompletionTokens
           ? { max_completion_tokens: this.config.maxOutputTokens }
           : { max_tokens: this.config.maxOutputTokens }),
@@ -166,6 +171,9 @@ export class OpenAIInferenceClient implements InferenceClient {
     const chatCompletion = await this.openAI.chat.completions.create(
       {
         model: this.config.imageModel,
+        ...(this.config.serviceTier
+          ? { service_tier: this.config.serviceTier }
+          : {}),
         ...(this.config.useMaxCompletionTokens
           ? { max_completion_tokens: this.config.maxOutputTokens }
           : { max_tokens: this.config.maxOutputTokens }),
author	Robert Rosca <32569096+RobertRosca@users.noreply.github.com>	2026-01-03 20:31:41 +0100
committer	GitHub <noreply@github.com>	2026-01-03 19:31:41 +0000
commit	aa7a81e0cad81f51ce21e2977c60ab4cb66e9e43 (patch)
tree	f90dbab5e70642c081d33ad436b606d49710f363 /packages/shared/inference.ts
parent	2a6fe6e6fa52a670bd3a7f01cbfbcf05eb5971fc (diff)
download	karakeep-aa7a81e0cad81f51ce21e2977c60ab4cb66e9e43.tar.zst