aboutsummaryrefslogtreecommitdiffstats
path: root/packages
diff options
context:
space:
mode:
authorRobert Rosca <32569096+RobertRosca@users.noreply.github.com>2026-01-03 20:31:41 +0100
committerGitHub <noreply@github.com>2026-01-03 19:31:41 +0000
commitaa7a81e0cad81f51ce21e2977c60ab4cb66e9e43 (patch)
treef90dbab5e70642c081d33ad436b606d49710f363 /packages
parent2a6fe6e6fa52a670bd3a7f01cbfbcf05eb5971fc (diff)
downloadkarakeep-aa7a81e0cad81f51ce21e2977c60ab4cb66e9e43.tar.zst
feat: add openai service tier configuration option (#2339)
Diffstat (limited to 'packages')
-rw-r--r--packages/shared/config.ts2
-rw-r--r--packages/shared/inference.ts8
2 files changed, 10 insertions, 0 deletions
diff --git a/packages/shared/config.ts b/packages/shared/config.ts
index 28dcc624..7238e90c 100644
--- a/packages/shared/config.ts
+++ b/packages/shared/config.ts
@@ -59,6 +59,7 @@ const allEnv = z.object({
OPENAI_API_KEY: z.string().optional(),
OPENAI_BASE_URL: z.string().url().optional(),
OPENAI_PROXY_URL: z.string().url().optional(),
+ OPENAI_SERVICE_TIER: z.enum(["auto", "default", "flex"]).optional(),
OLLAMA_BASE_URL: z.string().url().optional(),
OLLAMA_KEEP_ALIVE: z.string().optional(),
INFERENCE_JOB_TIMEOUT_SEC: z.coerce.number().default(30),
@@ -277,6 +278,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
openAIApiKey: val.OPENAI_API_KEY,
openAIBaseUrl: val.OPENAI_BASE_URL,
openAIProxyUrl: val.OPENAI_PROXY_URL,
+ openAIServiceTier: val.OPENAI_SERVICE_TIER,
ollamaBaseUrl: val.OLLAMA_BASE_URL,
ollamaKeepAlive: val.OLLAMA_KEEP_ALIVE,
textModel: val.INFERENCE_TEXT_MODEL,
diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts
index fe71778e..61a621ac 100644
--- a/packages/shared/inference.ts
+++ b/packages/shared/inference.ts
@@ -56,6 +56,7 @@ export interface OpenAIInferenceConfig {
apiKey: string;
baseURL?: string;
proxyUrl?: string;
+ serviceTier?: typeof serverConfig.inference.openAIServiceTier;
textModel: string;
imageModel: string;
contextLength: number;
@@ -106,6 +107,7 @@ export class OpenAIInferenceClient implements InferenceClient {
apiKey: serverConfig.inference.openAIApiKey!,
baseURL: serverConfig.inference.openAIBaseUrl,
proxyUrl: serverConfig.inference.openAIProxyUrl,
+ serviceTier: serverConfig.inference.openAIServiceTier,
textModel: serverConfig.inference.textModel,
imageModel: serverConfig.inference.imageModel,
contextLength: serverConfig.inference.contextLength,
@@ -127,6 +129,9 @@ export class OpenAIInferenceClient implements InferenceClient {
{
messages: [{ role: "user", content: prompt }],
model: this.config.textModel,
+ ...(this.config.serviceTier
+ ? { service_tier: this.config.serviceTier }
+ : {}),
...(this.config.useMaxCompletionTokens
? { max_completion_tokens: this.config.maxOutputTokens }
: { max_tokens: this.config.maxOutputTokens }),
@@ -166,6 +171,9 @@ export class OpenAIInferenceClient implements InferenceClient {
const chatCompletion = await this.openAI.chat.completions.create(
{
model: this.config.imageModel,
+ ...(this.config.serviceTier
+ ? { service_tier: this.config.serviceTier }
+ : {}),
...(this.config.useMaxCompletionTokens
? { max_completion_tokens: this.config.maxOutputTokens }
: { max_tokens: this.config.maxOutputTokens }),