aboutsummaryrefslogtreecommitdiffstats
path: root/packages
diff options
context:
space:
mode:
authorErik Tews <erik@datenzone.de>2026-01-01 09:05:51 +0100
committerGitHub <noreply@github.com>2026-01-01 08:05:51 +0000
commite8c79f2944c021b86c068ea77ff937650cd291d7 (patch)
tree263d6fdc10ccbff3c6de09c66a31198df0af5401 /packages
parent3d652eee04d13ce992fbcce9a0fce53d52e99a07 (diff)
downloadkarakeep-e8c79f2944c021b86c068ea77ff937650cd291d7.tar.zst
fix: use the Ollama generate endpoint instead of chat (#2324)
* Use the Ollama generate endpoint instead of chat Ollama has two API endpoints for text generation. There is a chat endpoint for interactive and interactive chat like generation of text and there is a generate endpoint that is used one one-shot prompts, such as summarization tasks and similar things. Karakeep used the chat endpoint that resulted in odd summaries. This commit makes karakeep use the generate endpoint instead, which results in better and more compact summaries. * format
Diffstat (limited to 'packages')
-rw-r--r--packages/shared/inference.ts9
1 files changed, 4 insertions, 5 deletions
diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts
index d6a9aa10..fe71778e 100644
--- a/packages/shared/inference.ts
+++ b/packages/shared/inference.ts
@@ -274,7 +274,7 @@ class OllamaInferenceClient implements InferenceClient {
this.ollama.abort();
};
}
- const chatCompletion = await this.ollama.chat({
+ const chatCompletion = await this.ollama.generate({
model: model,
format: mapInferenceOutputSchema(
{
@@ -292,16 +292,15 @@ class OllamaInferenceClient implements InferenceClient {
num_ctx: this.config.contextLength,
num_predict: this.config.maxOutputTokens,
},
- messages: [
- { role: "user", content: prompt, images: image ? [image] : undefined },
- ],
+ prompt: prompt,
+ images: image ? [image] : undefined,
});
let totalTokens = 0;
let response = "";
try {
for await (const part of chatCompletion) {
- response += part.message.content;
+ response += part.response;
if (!isNaN(part.eval_count)) {
totalTokens += part.eval_count;
}