feat: add openai service tier configuration option (#2339)

author: Robert Rosca <32569096+RobertRosca@users.noreply.github.com> 2026-01-03 20:31:41 +0100
committer: GitHub <noreply@github.com> 2026-01-03 19:31:41 +0000
commit: aa7a81e0cad81f51ce21e2977c60ab4cb66e9e43 (patch)
tree: f90dbab5e70642c081d33ad436b606d49710f363
parent: 2a6fe6e6fa52a670bd3a7f01cbfbcf05eb5971fc (diff)
download: karakeep-aa7a81e0cad81f51ce21e2977c60ab4cb66e9e43.tar.zst
5 files changed, 13 insertions, 0 deletions
diff --git a/docs/docs/03-configuration/01-environment-variables.md b/docs/docs/03-configuration/01-environment-variables.md
index 5584e620..7b09c38a 100644
--- a/docs/docs/03-configuration/01-environment-variables.md
+++ b/docs/docs/03-configuration/01-environment-variables.md
@@ -95,6 +95,7 @@ Either `OPENAI_API_KEY` or `OLLAMA_BASE_URL` need to be set for automatic taggin
 | OPENAI_API_KEY                       | No       | Not set                | The OpenAI key used for automatic tagging. More on that in [here](../integrations/openai).                                                                                                                                                                                                                                                                                            |
 | OPENAI_BASE_URL                      | No       | Not set                | If you just want to use OpenAI you don't need to pass this variable. If, however, you want to use some other openai compatible API (e.g. azure openai service), set this to the url of the API.                                                                                                                                                                                       |
 | OPENAI_PROXY_URL                     | No       | Not set                | HTTP proxy server URL for OpenAI API requests (e.g., `http://proxy.example.com:8080`).                                                                                                                                                                                                                                                                                                |
+| OPENAI_SERVICE_TIER                  | No       | Not set                | Set to `auto`, `default`, or `flex`. Flex processing provides lower costs in exchange for slower response times and occasional resource unavailability. See [OpenAI Flex Processing](https://platform.openai.com/docs/guides/flex-processing) and [Chat Service Tier](https://platform.openai.com/docs/api-reference/chat/object#chat-object-service_tier) for more details.          |
 | OLLAMA_BASE_URL                      | No       | Not set                | If you want to use ollama for local inference, set the address of ollama API here.                                                                                                                                                                                                                                                                                                    |
 | OLLAMA_KEEP_ALIVE                    | No       | Not set                | Controls how long the model will stay loaded into memory following the request (example value: "5m").                                                                                                                                                                                                                                                                                 |
 | INFERENCE_TEXT_MODEL                 | No       | gpt-4.1-mini           | The model to use for text inference. You'll need to change this to some other model if you're using ollama.                                                                                                                                                                                                                                                                           |
diff --git a/packages/shared/config.ts b/packages/shared/config.ts
index 28dcc624..7238e90c 100644
--- a/packages/shared/config.ts
+++ b/packages/shared/config.ts
@@ -59,6 +59,7 @@ const allEnv = z.object({
   OPENAI_API_KEY: z.string().optional(),
   OPENAI_BASE_URL: z.string().url().optional(),
   OPENAI_PROXY_URL: z.string().url().optional(),
+  OPENAI_SERVICE_TIER: z.enum(["auto", "default", "flex"]).optional(),
   OLLAMA_BASE_URL: z.string().url().optional(),
   OLLAMA_KEEP_ALIVE: z.string().optional(),
   INFERENCE_JOB_TIMEOUT_SEC: z.coerce.number().default(30),
@@ -277,6 +278,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
       openAIApiKey: val.OPENAI_API_KEY,
       openAIBaseUrl: val.OPENAI_BASE_URL,
       openAIProxyUrl: val.OPENAI_PROXY_URL,
+      openAIServiceTier: val.OPENAI_SERVICE_TIER,
       ollamaBaseUrl: val.OLLAMA_BASE_URL,
       ollamaKeepAlive: val.OLLAMA_KEEP_ALIVE,
       textModel: val.INFERENCE_TEXT_MODEL,
diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts
index fe71778e..61a621ac 100644
--- a/packages/shared/inference.ts
+++ b/packages/shared/inference.ts
@@ -56,6 +56,7 @@ export interface OpenAIInferenceConfig {
   apiKey: string;
   baseURL?: string;
   proxyUrl?: string;
+  serviceTier?: typeof serverConfig.inference.openAIServiceTier;
   textModel: string;
   imageModel: string;
   contextLength: number;
@@ -106,6 +107,7 @@ export class OpenAIInferenceClient implements InferenceClient {
       apiKey: serverConfig.inference.openAIApiKey!,
       baseURL: serverConfig.inference.openAIBaseUrl,
       proxyUrl: serverConfig.inference.openAIProxyUrl,
+      serviceTier: serverConfig.inference.openAIServiceTier,
       textModel: serverConfig.inference.textModel,
       imageModel: serverConfig.inference.imageModel,
       contextLength: serverConfig.inference.contextLength,
@@ -127,6 +129,9 @@ export class OpenAIInferenceClient implements InferenceClient {
       {
         messages: [{ role: "user", content: prompt }],
         model: this.config.textModel,
+        ...(this.config.serviceTier
+          ? { service_tier: this.config.serviceTier }
+          : {}),
         ...(this.config.useMaxCompletionTokens
           ? { max_completion_tokens: this.config.maxOutputTokens }
           : { max_tokens: this.config.maxOutputTokens }),
@@ -166,6 +171,9 @@ export class OpenAIInferenceClient implements InferenceClient {
     const chatCompletion = await this.openAI.chat.completions.create(
       {
         model: this.config.imageModel,
+        ...(this.config.serviceTier
+          ? { service_tier: this.config.serviceTier }
+          : {}),
         ...(this.config.useMaxCompletionTokens
           ? { max_completion_tokens: this.config.maxOutputTokens }
           : { max_tokens: this.config.maxOutputTokens }),
diff --git a/tools/compare-models/src/config.ts b/tools/compare-models/src/config.ts
index 0b5d217f..3a2d0d41 100644
--- a/tools/compare-models/src/config.ts
+++ b/tools/compare-models/src/config.ts
@@ -8,6 +8,7 @@ const envSchema = z.object({
   MODEL2_NAME: z.string().min(1).optional(),
   OPENAI_API_KEY: z.string().min(1),
   OPENAI_BASE_URL: z.string().url().optional(),
+  OPENAI_SERVICE_TIER: z.enum(["auto", "default", "flex"]).optional(),
   COMPARISON_MODE: z
     .enum(["model-vs-model", "model-vs-existing"])
     .default("model-vs-model"),
diff --git a/tools/compare-models/src/inferenceClient.ts b/tools/compare-models/src/inferenceClient.ts
index 8649f715..0a5ed8b5 100644
--- a/tools/compare-models/src/inferenceClient.ts
+++ b/tools/compare-models/src/inferenceClient.ts
@@ -11,6 +11,7 @@ export function createInferenceClient(modelName: string): InferenceClient {
   const inferenceConfig: OpenAIInferenceConfig = {
     apiKey: config.OPENAI_API_KEY,
     baseURL: config.OPENAI_BASE_URL,
+    serviceTier: config.OPENAI_SERVICE_TIER,
     textModel: modelName,
     imageModel: modelName, // Use same model for images if needed
     contextLength: config.INFERENCE_CONTEXT_LENGTH,
author	Robert Rosca <32569096+RobertRosca@users.noreply.github.com>	2026-01-03 20:31:41 +0100
committer	GitHub <noreply@github.com>	2026-01-03 19:31:41 +0000
commit	aa7a81e0cad81f51ce21e2977c60ab4cb66e9e43 (patch)
tree	f90dbab5e70642c081d33ad436b606d49710f363
parent	2a6fe6e6fa52a670bd3a7f01cbfbcf05eb5971fc (diff)
download	karakeep-aa7a81e0cad81f51ce21e2977c60ab4cb66e9e43.tar.zst