karakeep — aa7a81e0

Commit aa7a81e0

SHA	aa7a81e0cad81f51ce21e2977c60ab4cb66e9e43
Author	Robert Rosca <32569096+RobertRosca at users dot noreply dot github dot com>
Author Date	2026-01-03 20:31 +0100
Committer	GitHub <noreply at github dot com>
Commit Date	2026-01-03 19:31 +0000
Parent(s)	2a6fe6e6fa52 (diff)
Tree	f90dbab5e706

patch snapshot

feat: add openai service tier configuration option (#2339)

	File	+	-
M	docs/docs/03-configuration/01-environment-variables.md	+1	-0
M	packages/shared/config.ts	+2	-0
M	packages/shared/inference.ts	+8	-0
M	tools/compare-models/src/config.ts	+1	-0
M	tools/compare-models/src/inferenceClient.ts	+1	-0

5 file(s) changed, 13 insertions(+), 0 deletions(-)

docs/docs/03-configuration/01-environment-variables.md

diff --git a/docs/docs/03-configuration/01-environment-variables.md b/docs/docs/03-configuration/01-environment-variables.md
index 5584e620..7b09c38a 100644
--- a/docs/docs/03-configuration/01-environment-variables.md
+++ b/docs/docs/03-configuration/01-environment-variables.md
@@ -95,6 +95,7 @@ Either `OPENAI_API_KEY` or `OLLAMA_BASE_URL` need to be set for automatic taggin
 | OPENAI_API_KEY                       | No       | Not set                | The OpenAI key used for automatic tagging. More on that in [here](../integrations/openai).                                                                                                                                                                                                                                                                                            |
 | OPENAI_BASE_URL                      | No       | Not set                | If you just want to use OpenAI you don't need to pass this variable. If, however, you want to use some other openai compatible API (e.g. azure openai service), set this to the url of the API.                                                                                                                                                                                       |
 | OPENAI_PROXY_URL                     | No       | Not set                | HTTP proxy server URL for OpenAI API requests (e.g., `http://proxy.example.com:8080`).                                                                                                                                                                                                                                                                                                |
+| OPENAI_SERVICE_TIER                  | No       | Not set                | Set to `auto`, `default`, or `flex`. Flex processing provides lower costs in exchange for slower response times and occasional resource unavailability. See [OpenAI Flex Processing](https://platform.openai.com/docs/guides/flex-processing) and [Chat Service Tier](https://platform.openai.com/docs/api-reference/chat/object#chat-object-service_tier) for more details.          |
 | OLLAMA_BASE_URL                      | No       | Not set                | If you want to use ollama for local inference, set the address of ollama API here.                                                                                                                                                                                                                                                                                                    |
 | OLLAMA_KEEP_ALIVE                    | No       | Not set                | Controls how long the model will stay loaded into memory following the request (example value: "5m").                                                                                                                                                                                                                                                                                 |
 | INFERENCE_TEXT_MODEL                 | No       | gpt-4.1-mini           | The model to use for text inference. You'll need to change this to some other model if you're using ollama.                                                                                                                                                                                                                                                                           |

packages/shared/config.ts

diff --git a/packages/shared/config.ts b/packages/shared/config.ts
index 28dcc624..7238e90c 100644
--- a/packages/shared/config.ts
+++ b/packages/shared/config.ts
@@ -59,6 +59,7 @@ const allEnv = z.object({
   OPENAI_API_KEY: z.string().optional(),
   OPENAI_BASE_URL: z.string().url().optional(),
   OPENAI_PROXY_URL: z.string().url().optional(),
+  OPENAI_SERVICE_TIER: z.enum(["auto", "default", "flex"]).optional(),
   OLLAMA_BASE_URL: z.string().url().optional(),
   OLLAMA_KEEP_ALIVE: z.string().optional(),
   INFERENCE_JOB_TIMEOUT_SEC: z.coerce.number().default(30),
@@ -277,6 +278,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
       openAIApiKey: val.OPENAI_API_KEY,
       openAIBaseUrl: val.OPENAI_BASE_URL,
       openAIProxyUrl: val.OPENAI_PROXY_URL,
+      openAIServiceTier: val.OPENAI_SERVICE_TIER,
       ollamaBaseUrl: val.OLLAMA_BASE_URL,
       ollamaKeepAlive: val.OLLAMA_KEEP_ALIVE,
       textModel: val.INFERENCE_TEXT_MODEL,

packages/shared/inference.ts

diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts
index fe71778e..61a621ac 100644
--- a/packages/shared/inference.ts
+++ b/packages/shared/inference.ts
@@ -56,6 +56,7 @@ export interface OpenAIInferenceConfig {
   apiKey: string;
   baseURL?: string;
   proxyUrl?: string;
+  serviceTier?: typeof serverConfig.inference.openAIServiceTier;
   textModel: string;
   imageModel: string;
   contextLength: number;
@@ -106,6 +107,7 @@ export class OpenAIInferenceClient implements InferenceClient {
       apiKey: serverConfig.inference.openAIApiKey!,
       baseURL: serverConfig.inference.openAIBaseUrl,
       proxyUrl: serverConfig.inference.openAIProxyUrl,
+      serviceTier: serverConfig.inference.openAIServiceTier,
       textModel: serverConfig.inference.textModel,
       imageModel: serverConfig.inference.imageModel,
       contextLength: serverConfig.inference.contextLength,
@@ -127,6 +129,9 @@ export class OpenAIInferenceClient implements InferenceClient {
       {
         messages: [{ role: "user", content: prompt }],
         model: this.config.textModel,
+        ...(this.config.serviceTier
+          ? { service_tier: this.config.serviceTier }
+          : {}),
         ...(this.config.useMaxCompletionTokens
           ? { max_completion_tokens: this.config.maxOutputTokens }
           : { max_tokens: this.config.maxOutputTokens }),
@@ -166,6 +171,9 @@ export class OpenAIInferenceClient implements InferenceClient {
     const chatCompletion = await this.openAI.chat.completions.create(
       {
         model: this.config.imageModel,
+        ...(this.config.serviceTier
+          ? { service_tier: this.config.serviceTier }
+          : {}),
         ...(this.config.useMaxCompletionTokens
           ? { max_completion_tokens: this.config.maxOutputTokens }
           : { max_tokens: this.config.maxOutputTokens }),

tools/compare-models/src/config.ts

diff --git a/tools/compare-models/src/config.ts b/tools/compare-models/src/config.ts
index 0b5d217f..3a2d0d41 100644
--- a/tools/compare-models/src/config.ts
+++ b/tools/compare-models/src/config.ts
@@ -8,6 +8,7 @@ const envSchema = z.object({
   MODEL2_NAME: z.string().min(1).optional(),
   OPENAI_API_KEY: z.string().min(1),
   OPENAI_BASE_URL: z.string().url().optional(),
+  OPENAI_SERVICE_TIER: z.enum(["auto", "default", "flex"]).optional(),
   COMPARISON_MODE: z
     .enum(["model-vs-model", "model-vs-existing"])
     .default("model-vs-model"),

tools/compare-models/src/inferenceClient.ts

diff --git a/tools/compare-models/src/inferenceClient.ts b/tools/compare-models/src/inferenceClient.ts
index 8649f715..0a5ed8b5 100644
--- a/tools/compare-models/src/inferenceClient.ts
+++ b/tools/compare-models/src/inferenceClient.ts
@@ -11,6 +11,7 @@ export function createInferenceClient(modelName: string): InferenceClient {
   const inferenceConfig: OpenAIInferenceConfig = {
     apiKey: config.OPENAI_API_KEY,
     baseURL: config.OPENAI_BASE_URL,
+    serviceTier: config.OPENAI_SERVICE_TIER,
     textModel: modelName,
     imageModel: modelName, // Use same model for images if needed
     contextLength: config.INFERENCE_CONTEXT_LENGTH,