aboutsummaryrefslogtreecommitdiffstats
path: root/apps/workers/openaiWorker.ts
diff options
context:
space:
mode:
authorkamtschatka <sschatka@gmail.com>2024-05-18 14:36:10 +0200
committerGitHub <noreply@github.com>2024-05-18 13:36:10 +0100
commitfdc27d38a22dfdb1ec8b91b0d80d654dd8b866e0 (patch)
treefa2e8c346fe53ec70a6ed9e94d8f1e7eee8dbf1c /apps/workers/openaiWorker.ts
parenteb218cefbf82f460e3830d66d7f0d0ca63d1ac6d (diff)
downloadkarakeep-fdc27d38a22dfdb1ec8b91b0d80d654dd8b866e0.tar.zst
feature(inference): Improve ollama tagging (#162)
* Inference Failed with Ollama #20 Changed the prompt to be split in 2, so ollama does not forget them * Update apps/workers/openaiWorker.ts Co-authored-by: Mohamed Bassem <me@mbassem.com> --------- Co-authored-by: kamtschatka <simon.schatka@gmx.at> Co-authored-by: Mohamed Bassem <me@mbassem.com>
Diffstat (limited to 'apps/workers/openaiWorker.ts')
-rw-r--r--apps/workers/openaiWorker.ts17
1 files changed, 12 insertions, 5 deletions
diff --git a/apps/workers/openaiWorker.ts b/apps/workers/openaiWorker.ts
index f0314884..697c9c53 100644
--- a/apps/workers/openaiWorker.ts
+++ b/apps/workers/openaiWorker.ts
@@ -97,12 +97,17 @@ with the key "tags" and the value is list of tags. Don't wrap the response in a
const TEXT_PROMPT_BASE = `
I'm building a read-it-later app and I need your help with automatic tagging.
-Please analyze the text after the sentence "CONTENT START HERE:" and suggest relevant tags that describe its key themes, topics, and main ideas.
+Please analyze the text between the sentences "CONTENT START HERE" and "CONTENT END HERE" and suggest relevant tags that describe its key themes, topics, and main ideas.
Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres. The tags language must be ${serverConfig.inference.inferredTagLang}. If it's a famous website
-you may also include a tag for the website. If the tag is not generic enough, don't include it. Aim for 3-5 tags. If there are no good tags, don't emit any.
+you may also include a tag for the website. If the tag is not generic enough, don't include it.
The content can include text for cookie consent and privacy policy, ignore those while tagging.
-You must respond in JSON with the key "tags" and the value is list of tags.
-CONTENT START HERE:
+CONTENT START HERE
+`;
+
+const TEXT_PROMPT_INSTRUCTIONS = `
+CONTENT END HERE
+You must respond in JSON with the key "tags" and the value is an array of string tags.
+Aim for 3-5 tags. If there are no good tags, leave the array empty.
`;
function buildPrompt(
@@ -125,7 +130,7 @@ URL: ${bookmark.link.url}
Title: ${bookmark.link.title ?? ""}
Description: ${bookmark.link.description ?? ""}
Content: ${content ?? ""}
- `;
+${TEXT_PROMPT_INSTRUCTIONS}`;
}
if (bookmark.text) {
@@ -134,6 +139,7 @@ Content: ${content ?? ""}
return `
${TEXT_PROMPT_BASE}
${content}
+${TEXT_PROMPT_INSTRUCTIONS}
`;
}
@@ -205,6 +211,7 @@ async function inferTagsFromPDF(
const prompt = `${TEXT_PROMPT_BASE}
Content: ${truncateContent(pdfParse.text)}
+${TEXT_PROMPT_INSTRUCTIONS}
`;
return inferenceClient.inferFromText(prompt);
}