From a698aeaaebbaca14202670dd1efbbf666e360b8a Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sat, 1 Feb 2025 18:53:43 +0000 Subject: fix: Fix missing handling for AbortSignal in inference client --- packages/shared/inference.ts | 81 +++++++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 28 deletions(-) (limited to 'packages/shared/inference.ts') diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts index f1bea9ff..e5ddf5ca 100644 --- a/packages/shared/inference.ts +++ b/packages/shared/inference.ts @@ -67,13 +67,18 @@ class OpenAIInferenceClient implements InferenceClient { ...defaultInferenceOptions, ..._opts, }; - const chatCompletion = await this.openAI.chat.completions.create({ - messages: [{ role: "user", content: prompt }], - model: serverConfig.inference.textModel, - response_format: optsWithDefaults.json - ? { type: "json_object" } - : undefined, - }); + const chatCompletion = await this.openAI.chat.completions.create( + { + messages: [{ role: "user", content: prompt }], + model: serverConfig.inference.textModel, + response_format: optsWithDefaults.json + ? { type: "json_object" } + : undefined, + }, + { + signal: optsWithDefaults.abortSignal, + }, + ); const response = chatCompletion.choices[0].message.content; if (!response) { @@ -92,28 +97,33 @@ class OpenAIInferenceClient implements InferenceClient { ...defaultInferenceOptions, ..._opts, }; - const chatCompletion = await this.openAI.chat.completions.create({ - model: serverConfig.inference.imageModel, - response_format: optsWithDefaults.json - ? { type: "json_object" } - : undefined, - messages: [ - { - role: "user", - content: [ - { type: "text", text: prompt }, - { - type: "image_url", - image_url: { - url: `data:${contentType};base64,${image}`, - detail: "low", + const chatCompletion = await this.openAI.chat.completions.create( + { + model: serverConfig.inference.imageModel, + response_format: optsWithDefaults.json + ? { type: "json_object" } + : undefined, + messages: [ + { + role: "user", + content: [ + { type: "text", text: prompt }, + { + type: "image_url", + image_url: { + url: `data:${contentType};base64,${image}`, + detail: "low", + }, }, - }, - ], - }, - ], - max_tokens: 2000, - }); + ], + }, + ], + max_tokens: 2000, + }, + { + signal: optsWithDefaults.abortSignal, + }, + ); const response = chatCompletion.choices[0].message.content; if (!response) { @@ -156,6 +166,14 @@ class OllamaInferenceClient implements InferenceClient { ...defaultInferenceOptions, ..._opts, }; + + let newAbortSignal = undefined; + if (optsWithDefaults.abortSignal) { + newAbortSignal = AbortSignal.any([optsWithDefaults.abortSignal]); + newAbortSignal.onabort = () => { + this.ollama.abort(); + }; + } const chatCompletion = await this.ollama.chat({ model: model, format: optsWithDefaults.json ? "json" : undefined, @@ -182,6 +200,9 @@ class OllamaInferenceClient implements InferenceClient { } } } catch (e) { + if (e instanceof Error && e.name === "AbortError") { + throw e; + } // There seem to be some bug in ollama where you can get some successful response, but still throw an error. // Using stream + accumulating the response so far is a workaround. // https://github.com/ollama/ollama-js/issues/72 @@ -189,6 +210,10 @@ class OllamaInferenceClient implements InferenceClient { logger.warn( `Got an exception from ollama, will still attempt to deserialize the response we got so far: ${e}`, ); + } finally { + if (newAbortSignal) { + newAbortSignal.onabort = null; + } } return { response, totalTokens }; -- cgit v1.2.3-70-g09d2