karakeep — f00287ed

Commit f00287ed

SHA	f00287ede0675521c783c1199675538571f977d6
Author	Mohamed Bassem <me at mbassem dot com>
Author Date	2025-12-29 23:35 +0000
Committer	Mohamed Bassem <me at mbassem dot com>
Commit Date	2025-12-29 23:38 +0000
Parent(s)	ba8d84a555f9 (diff)
Tree	2d04b983fa51

patch snapshot

refactor: reduce duplication in compare-models tool

	File	+	-
M	packages/shared/inference.ts	+79	-28
M	tools/compare-models/.env.example	+17	-0
M	tools/compare-models/README.md	+54	-10
M	tools/compare-models/package.json	+2	-5
M	tools/compare-models/src/apiClient.ts	+7	-1
M	tools/compare-models/src/bookmarkProcessor.ts	+16	-4
M	tools/compare-models/src/config.ts	+18	-1
M	tools/compare-models/src/index.ts	+84	-26
M	tools/compare-models/src/inferenceClient.ts	+37	-120
M	tools/compare-models/src/types.ts	+2	-1

10 file(s) changed, 316 insertions(+), 196 deletions(-)

packages/shared/inference.ts

diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts
index fb9fce09..d6a9aa10 100644
--- a/packages/shared/inference.ts
+++ b/packages/shared/inference.ts
@@ -52,34 +52,47 @@ const mapInferenceOutputSchema = <
   return opts[type];
 };
 
+export interface OpenAIInferenceConfig {
+  apiKey: string;
+  baseURL?: string;
+  proxyUrl?: string;
+  textModel: string;
+  imageModel: string;
+  contextLength: number;
+  maxOutputTokens: number;
+  useMaxCompletionTokens: boolean;
+  outputSchema: "structured" | "json" | "plain";
+}
+
 export class InferenceClientFactory {
   static build(): InferenceClient | null {
     if (serverConfig.inference.openAIApiKey) {
-      return new OpenAIInferenceClient();
+      return OpenAIInferenceClient.fromConfig();
     }
 
     if (serverConfig.inference.ollamaBaseUrl) {
-      return new OllamaInferenceClient();
+      return OllamaInferenceClient.fromConfig();
     }
     return null;
   }
 }
 
-class OpenAIInferenceClient implements InferenceClient {
+export class OpenAIInferenceClient implements InferenceClient {
   openAI: OpenAI;
+  private config: OpenAIInferenceConfig;
 
-  constructor() {
-    const fetchOptions = serverConfig.inference.openAIProxyUrl
+  constructor(config: OpenAIInferenceConfig) {
+    this.config = config;
+
+    const fetchOptions = config.proxyUrl
       ? {
-          dispatcher: new undici.ProxyAgent(
-            serverConfig.inference.openAIProxyUrl,
-          ),
+          dispatcher: new undici.ProxyAgent(config.proxyUrl),
         }
       : undefined;
 
     this.openAI = new OpenAI({
-      apiKey: serverConfig.inference.openAIApiKey,
-      baseURL: serverConfig.inference.openAIBaseUrl,
+      apiKey: config.apiKey,
+      baseURL: config.baseURL,
       ...(fetchOptions ? { fetchOptions } : {}),
       defaultHeaders: {
         "X-Title": "Karakeep",
@@ -88,6 +101,20 @@ class OpenAIInferenceClient implements InferenceClient {
     });
   }
 
+  static fromConfig(): OpenAIInferenceClient {
+    return new OpenAIInferenceClient({
+      apiKey: serverConfig.inference.openAIApiKey!,
+      baseURL: serverConfig.inference.openAIBaseUrl,
+      proxyUrl: serverConfig.inference.openAIProxyUrl,
+      textModel: serverConfig.inference.textModel,
+      imageModel: serverConfig.inference.imageModel,
+      contextLength: serverConfig.inference.contextLength,
+      maxOutputTokens: serverConfig.inference.maxOutputTokens,
+      useMaxCompletionTokens: serverConfig.inference.useMaxCompletionTokens,
+      outputSchema: serverConfig.inference.outputSchema,
+    });
+  }
+
   async inferFromText(
     prompt: string,
     _opts: Partial<InferenceOptions>,
@@ -99,10 +126,10 @@ class OpenAIInferenceClient implements InferenceClient {
     const chatCompletion = await this.openAI.chat.completions.create(
       {
         messages: [{ role: "user", content: prompt }],
-        model: serverConfig.inference.textModel,
-        ...(serverConfig.inference.useMaxCompletionTokens
-          ? { max_completion_tokens: serverConfig.inference.maxOutputTokens }
-          : { max_tokens: serverConfig.inference.maxOutputTokens }),
+        model: this.config.textModel,
+        ...(this.config.useMaxCompletionTokens
+          ? { max_completion_tokens: this.config.maxOutputTokens }
+          : { max_tokens: this.config.maxOutputTokens }),
         response_format: mapInferenceOutputSchema(
           {
             structured: optsWithDefaults.schema
@@ -111,7 +138,7 @@ class OpenAIInferenceClient implements InferenceClient {
             json: { type: "json_object" },
             plain: undefined,
           },
-          serverConfig.inference.outputSchema,
+          this.config.outputSchema,
         ),
       },
       {
@@ -138,10 +165,10 @@ class OpenAIInferenceClient implements InferenceClient {
     };
     const chatCompletion = await this.openAI.chat.completions.create(
       {
-        model: serverConfig.inference.imageModel,
-        ...(serverConfig.inference.useMaxCompletionTokens
-          ? { max_completion_tokens: serverConfig.inference.maxOutputTokens }
-          : { max_tokens: serverConfig.inference.maxOutputTokens }),
+        model: this.config.imageModel,
+        ...(this.config.useMaxCompletionTokens
+          ? { max_completion_tokens: this.config.maxOutputTokens }
+          : { max_tokens: this.config.maxOutputTokens }),
         response_format: mapInferenceOutputSchema(
           {
             structured: optsWithDefaults.schema
@@ -150,7 +177,7 @@ class OpenAIInferenceClient implements InferenceClient {
             json: { type: "json_object" },
             plain: undefined,
           },
-          serverConfig.inference.outputSchema,
+          this.config.outputSchema,
         ),
         messages: [
           {
@@ -195,16 +222,40 @@ class OpenAIInferenceClient implements InferenceClient {
   }
 }
 
+export interface OllamaInferenceConfig {
+  baseUrl: string;
+  textModel: string;
+  imageModel: string;
+  contextLength: number;
+  maxOutputTokens: number;
+  keepAlive?: string;
+  outputSchema: "structured" | "json" | "plain";
+}
+
 class OllamaInferenceClient implements InferenceClient {
   ollama: Ollama;
+  private config: OllamaInferenceConfig;
 
-  constructor() {
+  constructor(config: OllamaInferenceConfig) {
+    this.config = config;
     this.ollama = new Ollama({
-      host: serverConfig.inference.ollamaBaseUrl,
+      host: config.baseUrl,
       fetch: customFetch, // Use the custom fetch with configurable timeout
     });
   }
 
+  static fromConfig(): OllamaInferenceClient {
+    return new OllamaInferenceClient({
+      baseUrl: serverConfig.inference.ollamaBaseUrl!,
+      textModel: serverConfig.inference.textModel,
+      imageModel: serverConfig.inference.imageModel,
+      contextLength: serverConfig.inference.contextLength,
+      maxOutputTokens: serverConfig.inference.maxOutputTokens,
+      keepAlive: serverConfig.inference.ollamaKeepAlive,
+      outputSchema: serverConfig.inference.outputSchema,
+    });
+  }
+
   async runModel(
     model: string,
     prompt: string,
@@ -233,13 +284,13 @@ class OllamaInferenceClient implements InferenceClient {
           json: "json",
           plain: undefined,
         },
-        serverConfig.inference.outputSchema,
+        this.config.outputSchema,
       ),
       stream: true,
-      keep_alive: serverConfig.inference.ollamaKeepAlive,
+      keep_alive: this.config.keepAlive,
       options: {
-        num_ctx: serverConfig.inference.contextLength,
-        num_predict: serverConfig.inference.maxOutputTokens,
+        num_ctx: this.config.contextLength,
+        num_predict: this.config.maxOutputTokens,
       },
       messages: [
         { role: "user", content: prompt, images: image ? [image] : undefined },
@@ -287,7 +338,7 @@ class OllamaInferenceClient implements InferenceClient {
       ..._opts,
     };
     return await this.runModel(
-      serverConfig.inference.textModel,
+      this.config.textModel,
       prompt,
       optsWithDefaults,
       undefined,
@@ -305,7 +356,7 @@ class OllamaInferenceClient implements InferenceClient {
       ..._opts,
     };
     return await this.runModel(
-      serverConfig.inference.imageModel,
+      this.config.imageModel,
       prompt,
       optsWithDefaults,
       image,

tools/compare-models/.env.example

diff --git a/tools/compare-models/.env.example b/tools/compare-models/.env.example
index aec095f0..f2f4c10c 100644
--- a/tools/compare-models/.env.example
+++ b/tools/compare-models/.env.example
@@ -2,7 +2,15 @@
 KARAKEEP_API_KEY=your_karakeep_api_key
 KARAKEEP_SERVER_ADDR=https://your-karakeep-instance.com
 
+# Comparison mode: 
+#   - "model-vs-model": Compare two models (requires MODEL1_NAME and MODEL2_NAME)
+#   - "model-vs-existing": Compare a model against existing AI tags (requires only MODEL1_NAME)
+# Default: model-vs-model
+COMPARISON_MODE=model-vs-model
+
 # Models to compare
+# MODEL1_NAME: The new model to test (always required)
+# MODEL2_NAME: The second model to compare against (required only for model-vs-model mode)
 MODEL1_NAME=gpt-4o-mini
 MODEL2_NAME=claude-3-5-sonnet
 
@@ -12,3 +20,12 @@ OPENAI_BASE_URL=https://openrouter.ai/api/v1
 
 # Optional: Number of bookmarks to test (default: 10)
 COMPARE_LIMIT=10
+
+# Optional: Context length for inference (default: 8000)
+INFERENCE_CONTEXT_LENGTH=8000
+
+# Optional: Max output tokens (default: 2048)
+INFERENCE_MAX_OUTPUT_TOKENS=2048
+
+# Optional: Use max_completion_tokens parameter (default: false)
+INFERENCE_USE_MAX_COMPLETION_TOKENS=false

tools/compare-models/README.md

diff --git a/tools/compare-models/README.md b/tools/compare-models/README.md
index b8ef5138..85c7c6ec 100644
--- a/tools/compare-models/README.md
+++ b/tools/compare-models/README.md
@@ -1,12 +1,15 @@
 # Model Comparison Tool
 
-A standalone CLI tool to compare the tagging performance of two AI models using your existing Karakeep bookmarks.
+A standalone CLI tool to compare the tagging performance of AI models using your existing Karakeep bookmarks.
 
 ## Features
 
+- **Two comparison modes:**
+  - **Model vs Model**: Compare two AI models against each other
+  - **Model vs Existing**: Compare a new model against existing AI-generated tags on your bookmarks
 - Fetches existing bookmarks from your Karakeep instance
-- Runs tagging inference on each bookmark with two different models
-- **Random shuffling**: Models are randomly assigned to "Model A" or "Model B" for each bookmark to eliminate bias
+- Runs tagging inference with AI models
+- **Random shuffling**: Models/tags are randomly assigned to "Model A" or "Model B" for each bookmark to eliminate bias
 - Blind comparison: Model names are hidden during voting (only shown as "Model A" and "Model B")
 - Interactive voting interface
 - Shows final results with winner
@@ -22,7 +25,14 @@ Required environment variables:
 KARAKEEP_API_KEY=your_api_key_here
 KARAKEEP_SERVER_ADDR=https://your-karakeep-instance.com
 
+# Comparison mode (default: model-vs-model)
+# - "model-vs-model": Compare two models against each other
+# - "model-vs-existing": Compare a model against existing AI tags
+COMPARISON_MODE=model-vs-model
+
 # Models to compare
+# MODEL1_NAME: The new model to test (always required)
+# MODEL2_NAME: The second model to compare against (required only for model-vs-model mode)
 MODEL1_NAME=gpt-4o-mini
 MODEL2_NAME=claude-3-5-sonnet
 
@@ -92,11 +102,43 @@ export OPENAI_API_KEY=your_openai_key
 node dist/index.js
 ```
 
+## Comparison Modes
+
+### Model vs Model Mode
+
+Compare two different AI models against each other:
+
+```bash
+COMPARISON_MODE=model-vs-model
+MODEL1_NAME=gpt-4o-mini
+MODEL2_NAME=claude-3-5-sonnet
+```
+
+This mode runs inference with both models on each bookmark and lets you choose which tags are better.
+
+### Model vs Existing Mode
+
+Compare a new model against existing AI-generated tags on your bookmarks:
+
+```bash
+COMPARISON_MODE=model-vs-existing
+MODEL1_NAME=gpt-4o-mini
+# MODEL2_NAME is not required in this mode
+```
+
+This mode is useful for:
+- Testing if a new model produces better tags than your current model
+- Evaluating whether to switch from one model to another
+- Quality assurance on existing AI tags
+
+**Note:** This mode only compares bookmarks that already have AI-generated tags (tags with `attachedBy: "ai"`). Bookmarks without AI tags are automatically filtered out.
+
 ## Usage Flow
 
 1. The tool fetches your latest link bookmarks from Karakeep
-2. For each bookmark, it randomly assigns your two models to "Model A" or "Model B" and runs tagging with both
-3. You'll see a side-by-side comparison (models are randomly shuffled each time):
+   - In **model-vs-existing** mode, only bookmarks with existing AI tags are included
+2. For each bookmark, it randomly assigns the options to "Model A" or "Model B" and runs tagging
+3. You'll see a side-by-side comparison (randomly shuffled each time):
    ```
    === Bookmark 1/10 ===
    How to Build Better AI Systems
@@ -150,13 +192,15 @@ The tool currently tests only:
 - **Link-type bookmarks** (not text notes or assets)
 - **Non-archived** bookmarks
 - **Latest N bookmarks** (where N is COMPARE_LIMIT)
+- **In model-vs-existing mode**: Only bookmarks with existing AI tags (tags with `attachedBy: "ai"`)
 
-## SDK Usage
+## Architecture
 
-This tool uses the Karakeep SDK for all API interactions:
-- Type-safe requests using `@karakeep/sdk`
-- Proper authentication handling via Bearer token
-- Pagination support for fetching multiple bookmarks
+This tool leverages Karakeep's shared infrastructure:
+- **API Client**: Uses `@karakeep/sdk` for type-safe API interactions with proper authentication
+- **Inference**: Reuses `@karakeep/shared/inference` for OpenAI client with structured output support
+- **Prompts**: Uses `@karakeep/shared/prompts` for consistent tagging prompt generation with token management
+- No code duplication - all core functionality is shared with the main Karakeep application
 
 
 ## Error Handling

tools/compare-models/package.json

diff --git a/tools/compare-models/package.json b/tools/compare-models/package.json
index 94307292..5a493bd2 100644
--- a/tools/compare-models/package.json
+++ b/tools/compare-models/package.json
@@ -12,14 +12,11 @@
   },
   "dependencies": {
     "@karakeep/sdk": "workspace:^",
+    "@karakeep/shared": "workspace:^",
     "chalk": "^5.3.0",
-    "openai": "^4.86.1",
-    "zod": "^3.24.2",
-    "zod-to-json-schema": "^3.24.3"
+    "zod": "^3.24.2"
   },
   "devDependencies": {
-    "@karakeep/shared": "workspace:^",
-    "@karakeep/trpc": "workspace:^",
     "@types/node": "^24",
     "tsx": "^4.8.1",
     "typescript": "^5.9"

tools/compare-models/src/apiClient.ts

diff --git a/tools/compare-models/src/apiClient.ts b/tools/compare-models/src/apiClient.ts
index f3a960cb..1d9f799d 100644
--- a/tools/compare-models/src/apiClient.ts
+++ b/tools/compare-models/src/apiClient.ts
@@ -53,7 +53,13 @@ export class KarakeepAPIClient {
 
       const batchBookmarks = (data?.bookmarks || [])
         .filter((b) => b.content?.type === "link")
-        .map((b) => b as Bookmark);
+        .map((b) => ({
+          ...b,
+          tags: (b.tags || []).map((tag) => ({
+            name: tag.name,
+            attachedBy: tag.attachedBy,
+          })),
+        })) as Bookmark[];
 
       bookmarks.push(...batchBookmarks);
       cursor = data?.nextCursor || null;

tools/compare-models/src/bookmarkProcessor.ts

diff --git a/tools/compare-models/src/bookmarkProcessor.ts b/tools/compare-models/src/bookmarkProcessor.ts
index 910957fe..21280b97 100644
--- a/tools/compare-models/src/bookmarkProcessor.ts
+++ b/tools/compare-models/src/bookmarkProcessor.ts
@@ -1,4 +1,7 @@
-import type { InferenceClient } from "./inferenceClient";
+import type { InferenceClient } from "@karakeep/shared/inference";
+import { buildTextPrompt } from "@karakeep/shared/prompts";
+
+import { inferTags } from "./inferenceClient";
 import type { Bookmark } from "./types";
 
 export async function extractBookmarkContent(
@@ -35,9 +38,9 @@ export async function extractBookmarkContent(
 
 export async function runTaggingForModel(
   bookmark: Bookmark,
-  model: string,
   inferenceClient: InferenceClient,
   lang: string = "english",
+  contextLength: number = 8000,
 ): Promise<string[]> {
   const content = await extractBookmarkContent(bookmark);
 
@@ -46,11 +49,20 @@ export async function runTaggingForModel(
   }
 
   try {
-    const tags = await inferenceClient.inferTags(content, model, lang, []);
+    // Use the shared prompt builder with empty custom prompts and default tag style
+    const prompt = await buildTextPrompt(
+      lang,
+      [], // No custom prompts for comparison tool
+      content,
+      contextLength,
+      "as-generated", // Use tags as generated by the model
+    );
+
+    const tags = await inferTags(inferenceClient, prompt);
     return tags;
   } catch (error) {
     throw new Error(
-      `Failed to generate tags with ${model}: ${error instanceof Error ? error.message : String(error)}`,
+      `Failed to generate tags: ${error instanceof Error ? error.message : String(error)}`,
     );
   }
 }

tools/compare-models/src/config.ts

diff --git a/tools/compare-models/src/config.ts b/tools/compare-models/src/config.ts
index 9c32610d..0b5d217f 100644
--- a/tools/compare-models/src/config.ts
+++ b/tools/compare-models/src/config.ts
@@ -1,16 +1,33 @@
 import { z } from "zod";
 
+// Local config schema for compare-models tool
 const envSchema = z.object({
   KARAKEEP_API_KEY: z.string().min(1),
   KARAKEEP_SERVER_ADDR: z.string().url(),
   MODEL1_NAME: z.string().min(1),
-  MODEL2_NAME: z.string().min(1),
+  MODEL2_NAME: z.string().min(1).optional(),
   OPENAI_API_KEY: z.string().min(1),
   OPENAI_BASE_URL: z.string().url().optional(),
+  COMPARISON_MODE: z
+    .enum(["model-vs-model", "model-vs-existing"])
+    .default("model-vs-model"),
   COMPARE_LIMIT: z
     .string()
     .optional()
     .transform((val) => (val ? parseInt(val, 10) : 10)),
+  INFERENCE_CONTEXT_LENGTH: z
+    .string()
+    .optional()
+    .transform((val) => (val ? parseInt(val, 10) : 8000)),
+  INFERENCE_MAX_OUTPUT_TOKENS: z
+    .string()
+    .optional()
+    .transform((val) => (val ? parseInt(val, 10) : 2048)),
+  INFERENCE_USE_MAX_COMPLETION_TOKENS: z
+    .string()
+    .optional()
+    .transform((val) => val === "true")
+    .default("false"),
 });
 
 export const config = envSchema.parse(process.env);

tools/compare-models/src/index.ts

diff --git a/tools/compare-models/src/index.ts b/tools/compare-models/src/index.ts
index c1a80ab5..88fc9249 100644
--- a/tools/compare-models/src/index.ts
+++ b/tools/compare-models/src/index.ts
@@ -4,7 +4,7 @@ import type { ComparisonResult } from "./types";
 import { KarakeepAPIClient } from "./apiClient";
 import { runTaggingForModel } from "./bookmarkProcessor";
 import { config } from "./config";
-import { InferenceClient } from "./inferenceClient";
+import { createInferenceClient } from "./inferenceClient";
 import {
   askQuestion,
   clearProgress,
@@ -32,18 +32,58 @@ interface ShuffleResult {
 async function main() {
   console.log(chalk.cyan("\n🚀 Karakeep Model Comparison Tool\n"));
 
-  const inferenceClient = new InferenceClient(
-    config.OPENAI_API_KEY,
-    config.OPENAI_BASE_URL,
-  );
+  const isExistingMode = config.COMPARISON_MODE === "model-vs-existing";
+
+  if (isExistingMode) {
+    console.log(
+      chalk.yellow(
+        `Mode: Comparing ${config.MODEL1_NAME} against existing AI tags\n`,
+      ),
+    );
+  } else {
+    if (!config.MODEL2_NAME) {
+      console.log(
+        chalk.red(
+          "\n✗ Error: MODEL2_NAME is required for model-vs-model comparison mode\n",
+        ),
+      );
+      process.exit(1);
+    }
+    console.log(
+      chalk.yellow(
+        `Mode: Comparing ${config.MODEL1_NAME} vs ${config.MODEL2_NAME}\n`,
+      ),
+    );
+  }
 
   const apiClient = new KarakeepAPIClient();
 
   displayProgress("Fetching bookmarks from Karakeep...");
-  const bookmarks = await apiClient.fetchBookmarks(config.COMPARE_LIMIT);
+  let bookmarks = await apiClient.fetchBookmarks(config.COMPARE_LIMIT);
   clearProgress();
 
-  console.log(chalk.green(`✓ Fetched ${bookmarks.length} link bookmarks\n`));
+  // Filter bookmarks with AI tags if in existing mode
+  if (isExistingMode) {
+    bookmarks = bookmarks.filter(
+      (b) => b.tags.some((t) => t.attachedBy === "ai"),
+    );
+    console.log(
+      chalk.green(
+        `✓ Fetched ${bookmarks.length} link bookmarks with existing AI tags\n`,
+      ),
+    );
+  } else {
+    console.log(chalk.green(`✓ Fetched ${bookmarks.length} link bookmarks\n`));
+  }
+
+  if (bookmarks.length === 0) {
+    console.log(
+      chalk.yellow(
+        "\n⚠ No bookmarks found with AI tags. Please add some bookmarks with AI tags first.\n",
+      ),
+    );
+    return;
+  }
 
   const counters: VoteCounters = {
     model1Votes: 0,
@@ -59,17 +99,20 @@ async function main() {
     const bookmark = bookmarks[i];
 
     displayProgress(
-      `[${i + 1}/${bookmarks.length}] Running inference on: ${bookmark.title || "Untitled"}`,
+      `[${i + 1}/${bookmarks.length}] Running inference on: ${bookmark.title || bookmark.content.title || "Untitled"}`,
     );
 
     let model1Tags: string[] = [];
     let model2Tags: string[] = [];
 
+    // Get tags for model 1 (new model)
     try {
+      const model1Client = createInferenceClient(config.MODEL1_NAME);
       model1Tags = await runTaggingForModel(
         bookmark,
-        config.MODEL1_NAME,
-        inferenceClient,
+        model1Client,
+        "english",
+        config.INFERENCE_CONTEXT_LENGTH,
       );
     } catch (error) {
       clearProgress();
@@ -80,31 +123,46 @@ async function main() {
       continue;
     }
 
-    try {
-      model2Tags = await runTaggingForModel(
-        bookmark,
-        config.MODEL2_NAME,
-        inferenceClient,
-      );
-    } catch (error) {
-      clearProgress();
-      displayError(
-        `${config.MODEL2_NAME} failed: ${error instanceof Error ? error.message : String(error)}`,
-      );
-      counters.errors++;
-      continue;
+    // Get tags for model 2 or existing AI tags
+    if (isExistingMode) {
+      // Use existing AI tags from the bookmark
+      model2Tags = bookmark.tags
+        .filter((t) => t.attachedBy === "ai")
+        .map((t) => t.name);
+    } else {
+      // Run inference with model 2
+      try {
+        const model2Client = createInferenceClient(config.MODEL2_NAME!);
+        model2Tags = await runTaggingForModel(
+          bookmark,
+          model2Client,
+          "english",
+          config.INFERENCE_CONTEXT_LENGTH,
+        );
+      } catch (error) {
+        clearProgress();
+        displayError(
+          `${config.MODEL2_NAME} failed: ${error instanceof Error ? error.message : String(error)}`,
+        );
+        counters.errors++;
+        continue;
+      }
     }
 
     clearProgress();
 
+    const model2Label = isExistingMode
+      ? "Existing AI Tags"
+      : config.MODEL2_NAME!;
+
     const shuffleResult: ShuffleResult = {
       modelA: config.MODEL1_NAME,
-      modelB: config.MODEL2_NAME,
+      modelB: model2Label,
       modelAIsModel1: Math.random() < 0.5,
     };
 
     if (!shuffleResult.modelAIsModel1) {
-      shuffleResult.modelA = config.MODEL2_NAME;
+      shuffleResult.modelA = model2Label;
       shuffleResult.modelB = config.MODEL1_NAME;
     }
 
@@ -156,7 +214,7 @@ async function main() {
 
   displayFinalResults({
     model1Name: config.MODEL1_NAME,
-    model2Name: config.MODEL2_NAME,
+    model2Name: isExistingMode ? "Existing AI Tags" : config.MODEL2_NAME!,
     model1Votes: counters.model1Votes,
     model2Votes: counters.model2Votes,
     skipped: counters.skipped,

tools/compare-models/src/inferenceClient.ts

diff --git a/tools/compare-models/src/inferenceClient.ts b/tools/compare-models/src/inferenceClient.ts
index 33617318..8649f715 100644
--- a/tools/compare-models/src/inferenceClient.ts
+++ b/tools/compare-models/src/inferenceClient.ts
@@ -1,128 +1,45 @@
-import OpenAI from "openai";
-import { zodResponseFormat } from "openai/helpers/zod";
+import type { InferenceClient } from "@karakeep/shared/inference";
+import {
+  OpenAIInferenceClient,
+  type OpenAIInferenceConfig,
+} from "@karakeep/shared/inference";
 import { z } from "zod";
 
-export interface InferenceOptions {
-  schema: z.ZodSchema<any> | null;
+import { config } from "./config";
+
+export function createInferenceClient(modelName: string): InferenceClient {
+  const inferenceConfig: OpenAIInferenceConfig = {
+    apiKey: config.OPENAI_API_KEY,
+    baseURL: config.OPENAI_BASE_URL,
+    textModel: modelName,
+    imageModel: modelName, // Use same model for images if needed
+    contextLength: config.INFERENCE_CONTEXT_LENGTH,
+    maxOutputTokens: config.INFERENCE_MAX_OUTPUT_TOKENS,
+    useMaxCompletionTokens: config.INFERENCE_USE_MAX_COMPLETION_TOKENS,
+    outputSchema: "structured",
+  };
+
+  return new OpenAIInferenceClient(inferenceConfig);
 }
 
-export interface InferenceResponse {
-  response: string;
-  totalTokens: number | undefined;
-}
-
-export class InferenceClient {
-  private client: OpenAI;
-
-  constructor(apiKey: string, baseUrl?: string) {
-    this.client = new OpenAI({
-      apiKey,
-      baseURL: baseUrl,
-      defaultHeaders: {
-        "X-Title": "Karakeep Model Comparison",
-      },
-    });
-  }
-
-  async inferTags(
-    content: string,
-    model: string,
-    lang: string = "english",
-    customPrompts: string[] = [],
-  ): Promise<string[]> {
-    const tagsSchema = z.object({
-      tags: z.array(z.string()),
-    });
-
-    const response = await this.inferFromText(
-      this.buildPrompt(content, lang, customPrompts),
-      model,
-      { schema: tagsSchema },
+export async function inferTags(
+  inferenceClient: InferenceClient,
+  prompt: string,
+): Promise<string[]> {
+  const tagsSchema = z.object({
+    tags: z.array(z.string()),
+  });
+
+  const response = await inferenceClient.inferFromText(prompt, {
+    schema: tagsSchema,
+  });
+
+  const parsed = tagsSchema.safeParse(JSON.parse(response.response));
+  if (!parsed.success) {
+    throw new Error(
+      `Failed to parse model response: ${parsed.error.message}`,
     );
-
-    const parsed = tagsSchema.safeParse(
-      this.parseJsonFromResponse(response.response),
-    );
-    if (!parsed.success) {
-      throw new Error(
-        `Failed to parse model response: ${parsed.error.message}`,
-      );
-    }
-
-    return parsed.data.tags;
-  }
-
-  private async inferFromText(
-    prompt: string,
-    model: string,
-    opts: InferenceOptions,
-  ): Promise<InferenceResponse> {
-    const chatCompletion = await this.client.chat.completions.create({
-      messages: [{ role: "user", content: prompt }],
-      model: model,
-      response_format: opts.schema
-        ? zodResponseFormat(opts.schema, "schema")
-        : { type: "json_object" },
-    });
-
-    const response = chatCompletion.choices[0].message.content;
-    if (!response) {
-      throw new Error("Got no message content from model");
-    }
-
-    return {
-      response,
-      totalTokens: chatCompletion.usage?.total_tokens,
-    };
-  }
-
-  private buildPrompt(
-    content: string,
-    lang: string,
-    customPrompts: string[],
-  ): string {
-    return `
-You are an expert whose responsibility is to help with automatic tagging for a read-it-later app.
-Please analyze the TEXT_CONTENT below and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are:
-- Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres.
-- The tags must be in ${lang}.
-- If tag is not generic enough, don't include it.
-- The content can include text for cookie consent and privacy policy, ignore those while tagging.
-- Aim for 3-5 tags.
-- If there are no good tags, leave the array empty.
-${customPrompts.map((p) => `- ${p}`).join("\n")}
-
-<TEXT_CONTENT>
-${content}
-</TEXT_CONTENT>
-You must respond in JSON with key "tags" and the value is an array of string tags.`;
   }
 
-  private parseJsonFromResponse(response: string): unknown {
-    const trimmedResponse = response.trim();
-
-    try {
-      return JSON.parse(trimmedResponse);
-    } catch {
-      const jsonBlockRegex = /```(?:json)?\s*(\{[\s\S]*?\})\s*```/i;
-      const match = trimmedResponse.match(jsonBlockRegex);
-
-      if (match) {
-        try {
-          return JSON.parse(match[1]);
-        } catch {}
-      }
-
-      const jsonObjectRegex = /\{[\s\S]*\}/;
-      const objectMatch = trimmedResponse.match(jsonObjectRegex);
-
-      if (objectMatch) {
-        try {
-          return JSON.parse(objectMatch[0]);
-        } catch {}
-      }
-
-      return JSON.parse(trimmedResponse);
-    }
-  }
+  return parsed.data.tags;
 }

tools/compare-models/src/types.ts

diff --git a/tools/compare-models/src/types.ts b/tools/compare-models/src/types.ts
index b8bdc024..35a677ae 100644
--- a/tools/compare-models/src/types.ts
+++ b/tools/compare-models/src/types.ts
@@ -3,12 +3,13 @@ export interface Bookmark {
   title: string | null;
   content: {
     type: string;
+    title: string;
     url?: string;
     text?: string;
     htmlContent?: string;
     description?: string;
   };
-  tags: Array<{ name: string }>;
+  tags: Array<{ name: string; attachedBy?: "ai" | "human" }>;
 }
 
 export interface ModelConfig {