fix: Move away from JSON outputs to structured outputs. Fixes #1047

author: Mohamed Bassem <me@mbassem.com> 2025-03-02 11:56:29 +0000
committer: Mohamed Bassem <me@mbassem.com> 2025-03-02 11:56:53 +0000
commit: 69d81aafe113a2b4769ecb936b9a5a02e31a0fd8 (patch)
tree: 7793e90c655dc205e977c3ccbfe05d451cb9b3ac /packages/shared/inference.ts
parent: 379c49b2cd6d081cbe593c969b6f2128b60407c9 (diff)
download: karakeep-69d81aafe113a2b4769ecb936b9a5a02e31a0fd8.tar.zst
1 files changed, 21 insertions, 9 deletions
diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts
index 92d9dd94..43a14410 100644
--- a/packages/shared/inference.ts
+++ b/packages/shared/inference.ts
@@ -1,5 +1,8 @@
 import { Ollama } from "ollama";
 import OpenAI from "openai";
+import { zodResponseFormat } from "openai/helpers/zod";
+import { z } from "zod";
+import { zodToJsonSchema } from "zod-to-json-schema";
 
 import serverConfig from "./config";
 import { customFetch } from "./customFetch";
@@ -15,12 +18,13 @@ export interface EmbeddingResponse {
 }
 
 export interface InferenceOptions {
-  json: boolean;
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  schema: z.ZodSchema<any> | null;
   abortSignal?: AbortSignal;
 }
 
 const defaultInferenceOptions: InferenceOptions = {
-  json: true,
+  schema: null,
 };
 
 export interface InferenceClient {
@@ -72,9 +76,11 @@ class OpenAIInferenceClient implements InferenceClient {
       {
         messages: [{ role: "user", content: prompt }],
         model: serverConfig.inference.textModel,
-        response_format: optsWithDefaults.json
-          ? { type: "json_object" }
-          : undefined,
+        response_format:
+          optsWithDefaults.schema &&
+          serverConfig.inference.supportsStructuredOutput
+            ? zodResponseFormat(optsWithDefaults.schema, "schema")
+            : undefined,
       },
       {
         signal: optsWithDefaults.abortSignal,
@@ -101,9 +107,11 @@ class OpenAIInferenceClient implements InferenceClient {
     const chatCompletion = await this.openAI.chat.completions.create(
       {
         model: serverConfig.inference.imageModel,
-        response_format: optsWithDefaults.json
-          ? { type: "json_object" }
-          : undefined,
+        response_format:
+          optsWithDefaults.schema &&
+          serverConfig.inference.supportsStructuredOutput
+            ? zodResponseFormat(optsWithDefaults.schema, "schema")
+            : undefined,
         messages: [
           {
             role: "user",
@@ -178,7 +186,11 @@ class OllamaInferenceClient implements InferenceClient {
     }
     const chatCompletion = await this.ollama.chat({
       model: model,
-      format: optsWithDefaults.json ? "json" : undefined,
+      format:
+        optsWithDefaults.schema &&
+        serverConfig.inference.supportsStructuredOutput
+          ? zodToJsonSchema(optsWithDefaults.schema)
+          : undefined,
       stream: true,
       keep_alive: serverConfig.inference.ollamaKeepAlive,
       options: {
author	Mohamed Bassem <me@mbassem.com>	2025-03-02 11:56:29 +0000
committer	Mohamed Bassem <me@mbassem.com>	2025-03-02 11:56:53 +0000
commit	69d81aafe113a2b4769ecb936b9a5a02e31a0fd8 (patch)
tree	7793e90c655dc205e977c3ccbfe05d451cb9b3ac /packages/shared/inference.ts
parent	379c49b2cd6d081cbe593c969b6f2128b60407c9 (diff)
download	karakeep-69d81aafe113a2b4769ecb936b9a5a02e31a0fd8.tar.zst