diff options
| author | Robert Rosca <32569096+RobertRosca@users.noreply.github.com> | 2026-01-03 20:31:41 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-01-03 19:31:41 +0000 |
| commit | aa7a81e0cad81f51ce21e2977c60ab4cb66e9e43 (patch) | |
| tree | f90dbab5e70642c081d33ad436b606d49710f363 | |
| parent | 2a6fe6e6fa52a670bd3a7f01cbfbcf05eb5971fc (diff) | |
| download | karakeep-aa7a81e0cad81f51ce21e2977c60ab4cb66e9e43.tar.zst | |
feat: add openai service tier configuration option (#2339)
| -rw-r--r-- | docs/docs/03-configuration/01-environment-variables.md | 1 | ||||
| -rw-r--r-- | packages/shared/config.ts | 2 | ||||
| -rw-r--r-- | packages/shared/inference.ts | 8 | ||||
| -rw-r--r-- | tools/compare-models/src/config.ts | 1 | ||||
| -rw-r--r-- | tools/compare-models/src/inferenceClient.ts | 1 |
5 files changed, 13 insertions, 0 deletions
diff --git a/docs/docs/03-configuration/01-environment-variables.md b/docs/docs/03-configuration/01-environment-variables.md index 5584e620..7b09c38a 100644 --- a/docs/docs/03-configuration/01-environment-variables.md +++ b/docs/docs/03-configuration/01-environment-variables.md @@ -95,6 +95,7 @@ Either `OPENAI_API_KEY` or `OLLAMA_BASE_URL` need to be set for automatic taggin | OPENAI_API_KEY | No | Not set | The OpenAI key used for automatic tagging. More on that in [here](../integrations/openai). | | OPENAI_BASE_URL | No | Not set | If you just want to use OpenAI you don't need to pass this variable. If, however, you want to use some other openai compatible API (e.g. azure openai service), set this to the url of the API. | | OPENAI_PROXY_URL | No | Not set | HTTP proxy server URL for OpenAI API requests (e.g., `http://proxy.example.com:8080`). | +| OPENAI_SERVICE_TIER | No | Not set | Set to `auto`, `default`, or `flex`. Flex processing provides lower costs in exchange for slower response times and occasional resource unavailability. See [OpenAI Flex Processing](https://platform.openai.com/docs/guides/flex-processing) and [Chat Service Tier](https://platform.openai.com/docs/api-reference/chat/object#chat-object-service_tier) for more details. | | OLLAMA_BASE_URL | No | Not set | If you want to use ollama for local inference, set the address of ollama API here. | | OLLAMA_KEEP_ALIVE | No | Not set | Controls how long the model will stay loaded into memory following the request (example value: "5m"). | | INFERENCE_TEXT_MODEL | No | gpt-4.1-mini | The model to use for text inference. You'll need to change this to some other model if you're using ollama. | diff --git a/packages/shared/config.ts b/packages/shared/config.ts index 28dcc624..7238e90c 100644 --- a/packages/shared/config.ts +++ b/packages/shared/config.ts @@ -59,6 +59,7 @@ const allEnv = z.object({ OPENAI_API_KEY: z.string().optional(), OPENAI_BASE_URL: z.string().url().optional(), OPENAI_PROXY_URL: z.string().url().optional(), + OPENAI_SERVICE_TIER: z.enum(["auto", "default", "flex"]).optional(), OLLAMA_BASE_URL: z.string().url().optional(), OLLAMA_KEEP_ALIVE: z.string().optional(), INFERENCE_JOB_TIMEOUT_SEC: z.coerce.number().default(30), @@ -277,6 +278,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => { openAIApiKey: val.OPENAI_API_KEY, openAIBaseUrl: val.OPENAI_BASE_URL, openAIProxyUrl: val.OPENAI_PROXY_URL, + openAIServiceTier: val.OPENAI_SERVICE_TIER, ollamaBaseUrl: val.OLLAMA_BASE_URL, ollamaKeepAlive: val.OLLAMA_KEEP_ALIVE, textModel: val.INFERENCE_TEXT_MODEL, diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts index fe71778e..61a621ac 100644 --- a/packages/shared/inference.ts +++ b/packages/shared/inference.ts @@ -56,6 +56,7 @@ export interface OpenAIInferenceConfig { apiKey: string; baseURL?: string; proxyUrl?: string; + serviceTier?: typeof serverConfig.inference.openAIServiceTier; textModel: string; imageModel: string; contextLength: number; @@ -106,6 +107,7 @@ export class OpenAIInferenceClient implements InferenceClient { apiKey: serverConfig.inference.openAIApiKey!, baseURL: serverConfig.inference.openAIBaseUrl, proxyUrl: serverConfig.inference.openAIProxyUrl, + serviceTier: serverConfig.inference.openAIServiceTier, textModel: serverConfig.inference.textModel, imageModel: serverConfig.inference.imageModel, contextLength: serverConfig.inference.contextLength, @@ -127,6 +129,9 @@ export class OpenAIInferenceClient implements InferenceClient { { messages: [{ role: "user", content: prompt }], model: this.config.textModel, + ...(this.config.serviceTier + ? { service_tier: this.config.serviceTier } + : {}), ...(this.config.useMaxCompletionTokens ? { max_completion_tokens: this.config.maxOutputTokens } : { max_tokens: this.config.maxOutputTokens }), @@ -166,6 +171,9 @@ export class OpenAIInferenceClient implements InferenceClient { const chatCompletion = await this.openAI.chat.completions.create( { model: this.config.imageModel, + ...(this.config.serviceTier + ? { service_tier: this.config.serviceTier } + : {}), ...(this.config.useMaxCompletionTokens ? { max_completion_tokens: this.config.maxOutputTokens } : { max_tokens: this.config.maxOutputTokens }), diff --git a/tools/compare-models/src/config.ts b/tools/compare-models/src/config.ts index 0b5d217f..3a2d0d41 100644 --- a/tools/compare-models/src/config.ts +++ b/tools/compare-models/src/config.ts @@ -8,6 +8,7 @@ const envSchema = z.object({ MODEL2_NAME: z.string().min(1).optional(), OPENAI_API_KEY: z.string().min(1), OPENAI_BASE_URL: z.string().url().optional(), + OPENAI_SERVICE_TIER: z.enum(["auto", "default", "flex"]).optional(), COMPARISON_MODE: z .enum(["model-vs-model", "model-vs-existing"]) .default("model-vs-model"), diff --git a/tools/compare-models/src/inferenceClient.ts b/tools/compare-models/src/inferenceClient.ts index 8649f715..0a5ed8b5 100644 --- a/tools/compare-models/src/inferenceClient.ts +++ b/tools/compare-models/src/inferenceClient.ts @@ -11,6 +11,7 @@ export function createInferenceClient(modelName: string): InferenceClient { const inferenceConfig: OpenAIInferenceConfig = { apiKey: config.OPENAI_API_KEY, baseURL: config.OPENAI_BASE_URL, + serviceTier: config.OPENAI_SERVICE_TIER, textModel: modelName, imageModel: modelName, // Use same model for images if needed contextLength: config.INFERENCE_CONTEXT_LENGTH, |
