From 0621cd920a6461b46778fc5dfc1b02014c494517 Mon Sep 17 00:00:00 2001 From: Gavin Mogan Date: Sun, 16 Feb 2025 09:10:52 -0800 Subject: fix: custom fetch wrapper for ollama inference. Fixes #656 (#1032) * Add configurable fetch timeout for Ollama client * Worker service needs access to the .env file * repair typescript types * Update customFetch.ts * update the config docs --------- Co-authored-by: sbarbett Co-authored-by: Mohamed Bassem --- packages/shared/config.ts | 2 ++ packages/shared/customFetch.ts | 13 +++++++++++++ packages/shared/inference.ts | 2 ++ 3 files changed, 17 insertions(+) create mode 100644 packages/shared/customFetch.ts (limited to 'packages/shared') diff --git a/packages/shared/config.ts b/packages/shared/config.ts index adc5749e..6e5a4404 100644 --- a/packages/shared/config.ts +++ b/packages/shared/config.ts @@ -22,6 +22,7 @@ const allEnv = z.object({ OLLAMA_BASE_URL: z.string().url().optional(), OLLAMA_KEEP_ALIVE: z.string().optional(), INFERENCE_JOB_TIMEOUT_SEC: z.coerce.number().default(30), + INFERENCE_FETCH_TIMEOUT_SEC: z.coerce.number().default(300), INFERENCE_TEXT_MODEL: z.string().default("gpt-4o-mini"), INFERENCE_IMAGE_MODEL: z.string().default("gpt-4o-mini"), EMBEDDING_TEXT_MODEL: z.string().default("text-embedding-3-small"), @@ -84,6 +85,7 @@ const serverConfigSchema = allEnv.transform((val) => { }, inference: { jobTimeoutSec: val.INFERENCE_JOB_TIMEOUT_SEC, + fetchTimeoutSec: val.INFERENCE_FETCH_TIMEOUT_SEC, openAIApiKey: val.OPENAI_API_KEY, openAIBaseUrl: val.OPENAI_BASE_URL, ollamaBaseUrl: val.OLLAMA_BASE_URL, diff --git a/packages/shared/customFetch.ts b/packages/shared/customFetch.ts new file mode 100644 index 00000000..e9ac8a89 --- /dev/null +++ b/packages/shared/customFetch.ts @@ -0,0 +1,13 @@ +import serverConfig from "./config"; + +// Custom fetch function with configurable timeout +export function customFetch( + input: Parameters[0], + init?: Parameters[1], +): ReturnType { + const timeout = serverConfig.inference.fetchTimeoutSec * 1000; // Convert to milliseconds + return fetch(input, { + signal: AbortSignal.timeout(timeout), + ...init, + }); +} diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts index e5ddf5ca..92d9dd94 100644 --- a/packages/shared/inference.ts +++ b/packages/shared/inference.ts @@ -2,6 +2,7 @@ import { Ollama } from "ollama"; import OpenAI from "openai"; import serverConfig from "./config"; +import { customFetch } from "./customFetch"; import logger from "./logger"; export interface InferenceResponse { @@ -153,6 +154,7 @@ class OllamaInferenceClient implements InferenceClient { constructor() { this.ollama = new Ollama({ host: serverConfig.inference.ollamaBaseUrl, + fetch: customFetch, // Use the custom fetch with configurable timeout }); } -- cgit v1.2.3-70-g09d2