diff options
Diffstat (limited to 'tools/compare-models')
| -rw-r--r-- | tools/compare-models/.env.example | 31 | ||||
| -rw-r--r-- | tools/compare-models/.gitignore | 3 | ||||
| -rw-r--r-- | tools/compare-models/README.md | 230 | ||||
| -rw-r--r-- | tools/compare-models/package.json | 24 | ||||
| -rw-r--r-- | tools/compare-models/src/apiClient.ts | 71 | ||||
| -rw-r--r-- | tools/compare-models/src/bookmarkProcessor.ts | 68 | ||||
| -rw-r--r-- | tools/compare-models/src/config.ts | 34 | ||||
| -rw-r--r-- | tools/compare-models/src/index.ts | 229 | ||||
| -rw-r--r-- | tools/compare-models/src/inferenceClient.ts | 46 | ||||
| -rw-r--r-- | tools/compare-models/src/interactive.ts | 128 | ||||
| -rw-r--r-- | tools/compare-models/src/types.ts | 38 | ||||
| -rw-r--r-- | tools/compare-models/tsconfig.json | 24 |
12 files changed, 926 insertions, 0 deletions
diff --git a/tools/compare-models/.env.example b/tools/compare-models/.env.example new file mode 100644 index 00000000..f2f4c10c --- /dev/null +++ b/tools/compare-models/.env.example @@ -0,0 +1,31 @@ +# Karakeep API configuration +KARAKEEP_API_KEY=your_karakeep_api_key +KARAKEEP_SERVER_ADDR=https://your-karakeep-instance.com + +# Comparison mode: +# - "model-vs-model": Compare two models (requires MODEL1_NAME and MODEL2_NAME) +# - "model-vs-existing": Compare a model against existing AI tags (requires only MODEL1_NAME) +# Default: model-vs-model +COMPARISON_MODE=model-vs-model + +# Models to compare +# MODEL1_NAME: The new model to test (always required) +# MODEL2_NAME: The second model to compare against (required only for model-vs-model mode) +MODEL1_NAME=gpt-4o-mini +MODEL2_NAME=claude-3-5-sonnet + +# OpenAI/OpenRouter API configuration +OPENAI_API_KEY=your_openai_or_openrouter_key +OPENAI_BASE_URL=https://openrouter.ai/api/v1 + +# Optional: Number of bookmarks to test (default: 10) +COMPARE_LIMIT=10 + +# Optional: Context length for inference (default: 8000) +INFERENCE_CONTEXT_LENGTH=8000 + +# Optional: Max output tokens (default: 2048) +INFERENCE_MAX_OUTPUT_TOKENS=2048 + +# Optional: Use max_completion_tokens parameter (default: false) +INFERENCE_USE_MAX_COMPLETION_TOKENS=false diff --git a/tools/compare-models/.gitignore b/tools/compare-models/.gitignore new file mode 100644 index 00000000..9c97bbd4 --- /dev/null +++ b/tools/compare-models/.gitignore @@ -0,0 +1,3 @@ +node_modules +dist +.env diff --git a/tools/compare-models/README.md b/tools/compare-models/README.md new file mode 100644 index 00000000..85c7c6ec --- /dev/null +++ b/tools/compare-models/README.md @@ -0,0 +1,230 @@ +# Model Comparison Tool + +A standalone CLI tool to compare the tagging performance of AI models using your existing Karakeep bookmarks. + +## Features + +- **Two comparison modes:** + - **Model vs Model**: Compare two AI models against each other + - **Model vs Existing**: Compare a new model against existing AI-generated tags on your bookmarks +- Fetches existing bookmarks from your Karakeep instance +- Runs tagging inference with AI models +- **Random shuffling**: Models/tags are randomly assigned to "Model A" or "Model B" for each bookmark to eliminate bias +- Blind comparison: Model names are hidden during voting (only shown as "Model A" and "Model B") +- Interactive voting interface +- Shows final results with winner + +## Setup + +### Environment Variables + +Required environment variables: + +```bash +# Karakeep API configuration +KARAKEEP_API_KEY=your_api_key_here +KARAKEEP_SERVER_ADDR=https://your-karakeep-instance.com + +# Comparison mode (default: model-vs-model) +# - "model-vs-model": Compare two models against each other +# - "model-vs-existing": Compare a model against existing AI tags +COMPARISON_MODE=model-vs-model + +# Models to compare +# MODEL1_NAME: The new model to test (always required) +# MODEL2_NAME: The second model to compare against (required only for model-vs-model mode) +MODEL1_NAME=gpt-4o-mini +MODEL2_NAME=claude-3-5-sonnet + +# OpenAI/OpenRouter API configuration (for running inference) +OPENAI_API_KEY=your_openai_or_openrouter_key +OPENAI_BASE_URL=https://openrouter.ai/api/v1 # Optional, defaults to OpenAI + +# Optional: Number of bookmarks to test (default: 10) +COMPARE_LIMIT=10 +``` + +### Using OpenRouter + +For OpenRouter, set: +```bash +OPENAI_BASE_URL=https://openrouter.ai/api/v1 +OPENAI_API_KEY=your_openrouter_key +``` + +### Using OpenAI Directly + +For OpenAI directly: +```bash +OPENAI_API_KEY=your_openai_key +# OPENAI_BASE_URL can be omitted for direct OpenAI +``` + +## Usage + +### Run with pnpm (Recommended) + +```bash +cd tools/compare-models +pnpm install +pnpm run +``` + +### Run with environment file + +Create a `.env` file: + +```env +KARAKEEP_API_KEY=your_api_key +KARAKEEP_SERVER_ADDR=https://your-karakeep-instance.com +MODEL1_NAME=gpt-4o-mini +MODEL2_NAME=claude-3-5-sonnet +OPENAI_API_KEY=your_openai_key +COMPARE_LIMIT=10 +``` + +Then run: +```bash +pnpm run +``` + +### Using directly with node + +If you prefer to run the compiled JavaScript directly: + +```bash +pnpm build +export KARAKEEP_API_KEY=your_api_key +export KARAKEEP_SERVER_ADDR=https://your-karakeep-instance.com +export MODEL1_NAME=gpt-4o-mini +export MODEL2_NAME=claude-3-5-sonnet +export OPENAI_API_KEY=your_openai_key +node dist/index.js +``` + +## Comparison Modes + +### Model vs Model Mode + +Compare two different AI models against each other: + +```bash +COMPARISON_MODE=model-vs-model +MODEL1_NAME=gpt-4o-mini +MODEL2_NAME=claude-3-5-sonnet +``` + +This mode runs inference with both models on each bookmark and lets you choose which tags are better. + +### Model vs Existing Mode + +Compare a new model against existing AI-generated tags on your bookmarks: + +```bash +COMPARISON_MODE=model-vs-existing +MODEL1_NAME=gpt-4o-mini +# MODEL2_NAME is not required in this mode +``` + +This mode is useful for: +- Testing if a new model produces better tags than your current model +- Evaluating whether to switch from one model to another +- Quality assurance on existing AI tags + +**Note:** This mode only compares bookmarks that already have AI-generated tags (tags with `attachedBy: "ai"`). Bookmarks without AI tags are automatically filtered out. + +## Usage Flow + +1. The tool fetches your latest link bookmarks from Karakeep + - In **model-vs-existing** mode, only bookmarks with existing AI tags are included +2. For each bookmark, it randomly assigns the options to "Model A" or "Model B" and runs tagging +3. You'll see a side-by-side comparison (randomly shuffled each time): + ``` + === Bookmark 1/10 === + How to Build Better AI Systems + https://example.com/article + This article explores modern approaches to... + + ───────────────────────────────────── + + Model A (blind): + • ai + • machine-learning + • engineering + + Model B (blind): + • artificial-intelligence + • ML + • software-development + + ───────────────────────────────────── + + Which tags do you prefer? [1=Model A, 2=Model B, s=skip, q=quit] > + ``` + +4. Choose your preference: + - `1` - Vote for Model A + - `2` - Vote for Model B + - `s` or `skip` - Skip this comparison + - `q` or `quit` - Exit early and show current results + +5. After completing all comparisons (or quitting early), results are displayed: + ``` + ─────────────────────────────────────── + === FINAL RESULTS === + ─────────────────────────────────────── + gpt-4o-mini: 6 votes + claude-3-5-sonnet: 3 votes + Skipped: 1 + Errors: 0 + ─────────────────────────────────────── + Total bookmarks tested: 10 + + 🏆 WINNER: gpt-4o-mini + ─────────────────────────────────────── + ``` + +6. The actual model names are only shown in the final results - during voting you see only "Model A" and "Model B" + +## Bookmark Filtering + +The tool currently tests only: +- **Link-type bookmarks** (not text notes or assets) +- **Non-archived** bookmarks +- **Latest N bookmarks** (where N is COMPARE_LIMIT) +- **In model-vs-existing mode**: Only bookmarks with existing AI tags (tags with `attachedBy: "ai"`) + +## Architecture + +This tool leverages Karakeep's shared infrastructure: +- **API Client**: Uses `@karakeep/sdk` for type-safe API interactions with proper authentication +- **Inference**: Reuses `@karakeep/shared/inference` for OpenAI client with structured output support +- **Prompts**: Uses `@karakeep/shared/prompts` for consistent tagging prompt generation with token management +- No code duplication - all core functionality is shared with the main Karakeep application + + +## Error Handling + +- If a model fails to generate tags for a bookmark, an error is shown and comparison continues +- Errors are counted separately in final results +- Missing required environment variables will cause the tool to exit with a clear error message + +## Build + +To build a standalone binary: + +```bash +pnpm build +``` + +The built binary will be in `dist/index.js`. + +## Notes + +- The tool is designed for manual, human-in-the-loop evaluation +- No results are persisted - they're only displayed in console +- Content is fetched with `includeContent=true` from Karakeep API +- Uses Karakeep SDK (`@karakeep/sdk`) for type-safe API interactions +- Inference runs sequentially to keep state management simple +- Recommended to use `pnpm run` for the best experience (uses tsx for development) +- **Random shuffling**: For each bookmark, models are randomly assigned to "Model A" or "Model B" to eliminate position bias. The actual model names are only revealed in the final results. diff --git a/tools/compare-models/package.json b/tools/compare-models/package.json new file mode 100644 index 00000000..5a493bd2 --- /dev/null +++ b/tools/compare-models/package.json @@ -0,0 +1,24 @@ +{ + "name": "@karakeep/compare-models", + "version": "0.1.0", + "description": "Standalone tool to compare tagging performance between AI models", + "bin": { + "compare-models": "dist/index.js" + }, + "scripts": { + "build": "tsc && chmod +x dist/index.js", + "run": "tsx --env-file=./.env src/index.ts", + "typecheck": "tsc --noEmit" + }, + "dependencies": { + "@karakeep/sdk": "workspace:^", + "@karakeep/shared": "workspace:^", + "chalk": "^5.3.0", + "zod": "^3.24.2" + }, + "devDependencies": { + "@types/node": "^24", + "tsx": "^4.8.1", + "typescript": "^5.9" + } +} diff --git a/tools/compare-models/src/apiClient.ts b/tools/compare-models/src/apiClient.ts new file mode 100644 index 00000000..1d9f799d --- /dev/null +++ b/tools/compare-models/src/apiClient.ts @@ -0,0 +1,71 @@ +import { createKarakeepClient } from "@karakeep/sdk"; + +import type { Bookmark } from "./types"; +import { config } from "./config"; + +export class KarakeepAPIClient { + private readonly client: ReturnType<typeof createKarakeepClient>; + + constructor() { + this.client = createKarakeepClient({ + baseUrl: `${config.KARAKEEP_SERVER_ADDR}/api/v1/`, + headers: { + "Content-Type": "application/json", + authorization: `Bearer ${config.KARAKEEP_API_KEY}`, + }, + }); + } + + async fetchBookmarks(limit: number): Promise<Bookmark[]> { + const bookmarks: Bookmark[] = []; + let cursor: string | null = null; + let hasMore = true; + + while (hasMore && bookmarks.length < limit) { + const params: { + limit: number; + includeContent: true; + archived?: boolean; + cursor?: string; + } = { + limit: Math.min(limit - bookmarks.length, 50), + includeContent: true, + archived: false, + }; + + if (cursor) { + params.cursor = cursor; + } + + const { data, response, error } = await this.client.GET("/bookmarks", { + params: { + query: params, + }, + }); + + if (error) { + throw new Error(`Failed to fetch bookmarks: ${String(error)}`); + } + + if (!response.ok) { + throw new Error(`Failed to fetch bookmarks: ${response.status}`); + } + + const batchBookmarks = (data?.bookmarks || []) + .filter((b) => b.content?.type === "link") + .map((b) => ({ + ...b, + tags: (b.tags || []).map((tag) => ({ + name: tag.name, + attachedBy: tag.attachedBy, + })), + })) as Bookmark[]; + + bookmarks.push(...batchBookmarks); + cursor = data?.nextCursor || null; + hasMore = !!cursor; + } + + return bookmarks.slice(0, limit); + } +} diff --git a/tools/compare-models/src/bookmarkProcessor.ts b/tools/compare-models/src/bookmarkProcessor.ts new file mode 100644 index 00000000..4a1bbf0a --- /dev/null +++ b/tools/compare-models/src/bookmarkProcessor.ts @@ -0,0 +1,68 @@ +import type { InferenceClient } from "@karakeep/shared/inference"; +import { buildTextPrompt } from "@karakeep/shared/prompts.server"; + +import { inferTags } from "./inferenceClient"; +import type { Bookmark } from "./types"; + +export async function extractBookmarkContent( + bookmark: Bookmark, +): Promise<string> { + if (bookmark.content.type === "link") { + const parts = []; + + if (bookmark.content.url) { + parts.push(`URL: ${bookmark.content.url}`); + } + + if (bookmark.title) { + parts.push(`Title: ${bookmark.title}`); + } + + if (bookmark.content.description) { + parts.push(`Description: ${bookmark.content.description}`); + } + + if (bookmark.content.htmlContent) { + parts.push(`Content: ${bookmark.content.htmlContent}`); + } + + return parts.join("\n"); + } + + if (bookmark.content.type === "text" && bookmark.content.text) { + return bookmark.content.text; + } + + return ""; +} + +export async function runTaggingForModel( + bookmark: Bookmark, + inferenceClient: InferenceClient, + lang: string = "english", + contextLength: number = 8000, +): Promise<string[]> { + const content = await extractBookmarkContent(bookmark); + + if (!content) { + return []; + } + + try { + // Use the shared prompt builder with empty custom prompts and default tag style + const prompt = await buildTextPrompt( + lang, + [], // No custom prompts for comparison tool + content, + contextLength, + "as-generated", // Use tags as generated by the model + ); + + const tags = await inferTags(inferenceClient, prompt); + return tags; + } catch (error) { + throw new Error( + `Failed to generate tags: ${error instanceof Error ? error.message : String(error)}`, + ); + } +} diff --git a/tools/compare-models/src/config.ts b/tools/compare-models/src/config.ts new file mode 100644 index 00000000..3a2d0d41 --- /dev/null +++ b/tools/compare-models/src/config.ts @@ -0,0 +1,34 @@ +import { z } from "zod"; + +// Local config schema for compare-models tool +const envSchema = z.object({ + KARAKEEP_API_KEY: z.string().min(1), + KARAKEEP_SERVER_ADDR: z.string().url(), + MODEL1_NAME: z.string().min(1), + MODEL2_NAME: z.string().min(1).optional(), + OPENAI_API_KEY: z.string().min(1), + OPENAI_BASE_URL: z.string().url().optional(), + OPENAI_SERVICE_TIER: z.enum(["auto", "default", "flex"]).optional(), + COMPARISON_MODE: z + .enum(["model-vs-model", "model-vs-existing"]) + .default("model-vs-model"), + COMPARE_LIMIT: z + .string() + .optional() + .transform((val) => (val ? parseInt(val, 10) : 10)), + INFERENCE_CONTEXT_LENGTH: z + .string() + .optional() + .transform((val) => (val ? parseInt(val, 10) : 8000)), + INFERENCE_MAX_OUTPUT_TOKENS: z + .string() + .optional() + .transform((val) => (val ? parseInt(val, 10) : 2048)), + INFERENCE_USE_MAX_COMPLETION_TOKENS: z + .string() + .optional() + .transform((val) => val === "true") + .default("false"), +}); + +export const config = envSchema.parse(process.env); diff --git a/tools/compare-models/src/index.ts b/tools/compare-models/src/index.ts new file mode 100644 index 00000000..88fc9249 --- /dev/null +++ b/tools/compare-models/src/index.ts @@ -0,0 +1,229 @@ +import chalk from "chalk"; + +import type { ComparisonResult } from "./types"; +import { KarakeepAPIClient } from "./apiClient"; +import { runTaggingForModel } from "./bookmarkProcessor"; +import { config } from "./config"; +import { createInferenceClient } from "./inferenceClient"; +import { + askQuestion, + clearProgress, + close, + displayComparison, + displayError, + displayFinalResults, + displayProgress, +} from "./interactive"; + +interface VoteCounters { + model1Votes: number; + model2Votes: number; + skipped: number; + errors: number; + total: number; +} + +interface ShuffleResult { + modelA: string; + modelB: string; + modelAIsModel1: boolean; +} + +async function main() { + console.log(chalk.cyan("\n🚀 Karakeep Model Comparison Tool\n")); + + const isExistingMode = config.COMPARISON_MODE === "model-vs-existing"; + + if (isExistingMode) { + console.log( + chalk.yellow( + `Mode: Comparing ${config.MODEL1_NAME} against existing AI tags\n`, + ), + ); + } else { + if (!config.MODEL2_NAME) { + console.log( + chalk.red( + "\n✗ Error: MODEL2_NAME is required for model-vs-model comparison mode\n", + ), + ); + process.exit(1); + } + console.log( + chalk.yellow( + `Mode: Comparing ${config.MODEL1_NAME} vs ${config.MODEL2_NAME}\n`, + ), + ); + } + + const apiClient = new KarakeepAPIClient(); + + displayProgress("Fetching bookmarks from Karakeep..."); + let bookmarks = await apiClient.fetchBookmarks(config.COMPARE_LIMIT); + clearProgress(); + + // Filter bookmarks with AI tags if in existing mode + if (isExistingMode) { + bookmarks = bookmarks.filter( + (b) => b.tags.some((t) => t.attachedBy === "ai"), + ); + console.log( + chalk.green( + `✓ Fetched ${bookmarks.length} link bookmarks with existing AI tags\n`, + ), + ); + } else { + console.log(chalk.green(`✓ Fetched ${bookmarks.length} link bookmarks\n`)); + } + + if (bookmarks.length === 0) { + console.log( + chalk.yellow( + "\n⚠ No bookmarks found with AI tags. Please add some bookmarks with AI tags first.\n", + ), + ); + return; + } + + const counters: VoteCounters = { + model1Votes: 0, + model2Votes: 0, + skipped: 0, + errors: 0, + total: bookmarks.length, + }; + + const detailedResults: ComparisonResult[] = []; + + for (let i = 0; i < bookmarks.length; i++) { + const bookmark = bookmarks[i]; + + displayProgress( + `[${i + 1}/${bookmarks.length}] Running inference on: ${bookmark.title || bookmark.content.title || "Untitled"}`, + ); + + let model1Tags: string[] = []; + let model2Tags: string[] = []; + + // Get tags for model 1 (new model) + try { + const model1Client = createInferenceClient(config.MODEL1_NAME); + model1Tags = await runTaggingForModel( + bookmark, + model1Client, + "english", + config.INFERENCE_CONTEXT_LENGTH, + ); + } catch (error) { + clearProgress(); + displayError( + `${config.MODEL1_NAME} failed: ${error instanceof Error ? error.message : String(error)}`, + ); + counters.errors++; + continue; + } + + // Get tags for model 2 or existing AI tags + if (isExistingMode) { + // Use existing AI tags from the bookmark + model2Tags = bookmark.tags + .filter((t) => t.attachedBy === "ai") + .map((t) => t.name); + } else { + // Run inference with model 2 + try { + const model2Client = createInferenceClient(config.MODEL2_NAME!); + model2Tags = await runTaggingForModel( + bookmark, + model2Client, + "english", + config.INFERENCE_CONTEXT_LENGTH, + ); + } catch (error) { + clearProgress(); + displayError( + `${config.MODEL2_NAME} failed: ${error instanceof Error ? error.message : String(error)}`, + ); + counters.errors++; + continue; + } + } + + clearProgress(); + + const model2Label = isExistingMode + ? "Existing AI Tags" + : config.MODEL2_NAME!; + + const shuffleResult: ShuffleResult = { + modelA: config.MODEL1_NAME, + modelB: model2Label, + modelAIsModel1: Math.random() < 0.5, + }; + + if (!shuffleResult.modelAIsModel1) { + shuffleResult.modelA = model2Label; + shuffleResult.modelB = config.MODEL1_NAME; + } + + const comparison: ComparisonResult = { + bookmark, + modelA: shuffleResult.modelA, + modelATags: shuffleResult.modelAIsModel1 ? model1Tags : model2Tags, + modelB: shuffleResult.modelB, + modelBTags: shuffleResult.modelAIsModel1 ? model2Tags : model1Tags, + }; + + displayComparison(i + 1, bookmarks.length, comparison, true); + + const answer = await askQuestion( + "Which tags do you prefer? [1=Model A, 2=Model B, s=skip, q=quit] > ", + ); + + const normalizedAnswer = answer.toLowerCase(); + + if (normalizedAnswer === "q" || normalizedAnswer === "quit") { + console.log(chalk.yellow("\n⏸ Quitting early...\n")); + break; + } + + if (normalizedAnswer === "1") { + comparison.winner = "modelA"; + if (shuffleResult.modelAIsModel1) { + counters.model1Votes++; + } else { + counters.model2Votes++; + } + detailedResults.push(comparison); + } else if (normalizedAnswer === "2") { + comparison.winner = "modelB"; + if (shuffleResult.modelAIsModel1) { + counters.model2Votes++; + } else { + counters.model1Votes++; + } + detailedResults.push(comparison); + } else { + comparison.winner = "skip"; + counters.skipped++; + detailedResults.push(comparison); + } + } + + close(); + + displayFinalResults({ + model1Name: config.MODEL1_NAME, + model2Name: isExistingMode ? "Existing AI Tags" : config.MODEL2_NAME!, + model1Votes: counters.model1Votes, + model2Votes: counters.model2Votes, + skipped: counters.skipped, + errors: counters.errors, + total: counters.total, + }); +} + +main().catch((error) => { + console.error(chalk.red(`\n✗ Fatal error: ${error}\n`)); + process.exit(1); +}); diff --git a/tools/compare-models/src/inferenceClient.ts b/tools/compare-models/src/inferenceClient.ts new file mode 100644 index 00000000..0a5ed8b5 --- /dev/null +++ b/tools/compare-models/src/inferenceClient.ts @@ -0,0 +1,46 @@ +import type { InferenceClient } from "@karakeep/shared/inference"; +import { + OpenAIInferenceClient, + type OpenAIInferenceConfig, +} from "@karakeep/shared/inference"; +import { z } from "zod"; + +import { config } from "./config"; + +export function createInferenceClient(modelName: string): InferenceClient { + const inferenceConfig: OpenAIInferenceConfig = { + apiKey: config.OPENAI_API_KEY, + baseURL: config.OPENAI_BASE_URL, + serviceTier: config.OPENAI_SERVICE_TIER, + textModel: modelName, + imageModel: modelName, // Use same model for images if needed + contextLength: config.INFERENCE_CONTEXT_LENGTH, + maxOutputTokens: config.INFERENCE_MAX_OUTPUT_TOKENS, + useMaxCompletionTokens: config.INFERENCE_USE_MAX_COMPLETION_TOKENS, + outputSchema: "structured", + }; + + return new OpenAIInferenceClient(inferenceConfig); +} + +export async function inferTags( + inferenceClient: InferenceClient, + prompt: string, +): Promise<string[]> { + const tagsSchema = z.object({ + tags: z.array(z.string()), + }); + + const response = await inferenceClient.inferFromText(prompt, { + schema: tagsSchema, + }); + + const parsed = tagsSchema.safeParse(JSON.parse(response.response)); + if (!parsed.success) { + throw new Error( + `Failed to parse model response: ${parsed.error.message}`, + ); + } + + return parsed.data.tags; +} diff --git a/tools/compare-models/src/interactive.ts b/tools/compare-models/src/interactive.ts new file mode 100644 index 00000000..b93fc1d7 --- /dev/null +++ b/tools/compare-models/src/interactive.ts @@ -0,0 +1,128 @@ +import * as readline from "node:readline"; +import chalk from "chalk"; + +import type { ComparisonResult } from "./types"; + +const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, +}); + +export async function askQuestion(question: string): Promise<string> { + return new Promise((resolve) => { + rl.question(question, (answer) => { + resolve(answer.trim()); + }); + }); +} + +export function displayComparison( + index: number, + total: number, + result: ComparisonResult, + blind: boolean = true, +): void { + const divider = chalk.gray("─".repeat(80)); + const header = chalk.bold.cyan(`\n=== Bookmark ${index}/${total} ===`); + const title = chalk.bold.white(result.bookmark.title || "Untitled"); + const url = result.bookmark.content.url + ? chalk.gray(result.bookmark.content.url) + : ""; + const content = chalk.gray( + result.bookmark.content.description + ? result.bookmark.content.description.substring(0, 200) + "..." + : "", + ); + + const modelAName = blind ? "Model A" : result.modelA; + const modelBName = blind ? "Model B" : result.modelB; + + const modelATags = result.modelATags + .map((tag) => chalk.green(` • ${tag}`)) + .join("\n"); + const modelBTags = result.modelBTags + .map((tag) => chalk.yellow(` • ${tag}`)) + .join("\n"); + + console.log(header); + console.log(title); + if (url) console.log(url); + if (content) console.log(content); + console.log(divider); + console.log(); + console.log(chalk.green(`${modelAName}:`)); + if (modelATags) { + console.log(modelATags); + } else { + console.log(chalk.gray(" (no tags)")); + } + console.log(); + console.log(chalk.yellow(`${modelBName}:`)); + if (modelBTags) { + console.log(modelBTags); + } else { + console.log(chalk.gray(" (no tags)")); + } + console.log(); +} + +export function displayError(message: string): void { + console.log(chalk.red(`\n✗ Error: ${message}\n`)); +} + +export function displayProgress(message: string): void { + process.stdout.write(chalk.gray(message)); +} + +export function clearProgress(): void { + process.stdout.write("\r\x1b[K"); +} + +export function close(): void { + rl.close(); +} + +export function displayFinalResults(results: { + model1Name: string; + model2Name: string; + model1Votes: number; + model2Votes: number; + skipped: number; + errors: number; + total: number; +}): void { + const winner = + results.model1Votes > results.model2Votes + ? results.model1Name + : results.model2Votes > results.model1Votes + ? results.model2Name + : "TIE"; + + const divider = chalk.gray("─".repeat(80)); + const header = chalk.bold.cyan("\n=== FINAL RESULTS ==="); + const model1Line = chalk.green( + `${results.model1Name}: ${results.model1Votes} votes`, + ); + const model2Line = chalk.yellow( + `${results.model2Name}: ${results.model2Votes} votes`, + ); + const skippedLine = chalk.gray(`Skipped: ${results.skipped}`); + const errorsLine = chalk.red(`Errors: ${results.errors}`); + const totalLine = chalk.bold(`Total bookmarks tested: ${results.total}`); + const winnerLine = + winner === "TIE" + ? chalk.bold.cyan(`\n🏁 RESULT: TIE`) + : chalk.bold.green(`\n🏆 WINNER: ${winner}`); + + console.log(divider); + console.log(header); + console.log(divider); + console.log(model1Line); + console.log(model2Line); + console.log(skippedLine); + console.log(errorsLine); + console.log(divider); + console.log(totalLine); + console.log(winnerLine); + console.log(divider); +} diff --git a/tools/compare-models/src/types.ts b/tools/compare-models/src/types.ts new file mode 100644 index 00000000..35a677ae --- /dev/null +++ b/tools/compare-models/src/types.ts @@ -0,0 +1,38 @@ +export interface Bookmark { + id: string; + title: string | null; + content: { + type: string; + title: string; + url?: string; + text?: string; + htmlContent?: string; + description?: string; + }; + tags: Array<{ name: string; attachedBy?: "ai" | "human" }>; +} + +export interface ModelConfig { + name: string; + apiKey: string; + baseUrl?: string; +} + +export interface ComparisonResult { + bookmark: Bookmark; + modelA: string; + modelATags: string[]; + modelB: string; + modelBTags: string[]; + winner?: "modelA" | "modelB" | "skip"; +} + +export interface FinalResults { + model1Name: string; + model2Name: string; + model1Votes: number; + model2Votes: number; + skipped: number; + errors: number; + total: number; +} diff --git a/tools/compare-models/tsconfig.json b/tools/compare-models/tsconfig.json new file mode 100644 index 00000000..edeec809 --- /dev/null +++ b/tools/compare-models/tsconfig.json @@ -0,0 +1,24 @@ +{ + "$schema": "https://json.schemastore.org/tsconfig", + "extends": "@tsconfig/node22/tsconfig.json", + "compilerOptions": { + "target": "ES2022", + "module": "CommonJS", + "lib": ["ES2022"], + "outDir": "dist", + "rootDir": "src", + "moduleResolution": "node", + "esModuleInterop": true, + "allowSyntheticDefaultImports": true, + "strict": true, + "skipLibCheck": true, + "allowJs": true, + "resolveJsonModule": true, + "isolatedModules": true, + "noEmit": false, + "declaration": false, + "sourceMap": false + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} |
