aboutsummaryrefslogtreecommitdiffstats
path: root/apps
diff options
context:
space:
mode:
authorMohamed Bassem <me@mbassem.com>2025-07-04 08:31:40 +0000
committerMohamed Bassem <me@mbassem.com>2025-07-04 16:27:29 +0000
commit53b6b3c24d9669ba240c1f9c5fb58672b6cf8666 (patch)
tree4dc1a9634655fdd7260baee7f6650e3f6816ec9b /apps
parentf144f1bcc21e20f29381aa5d69ed3f822dbaec9a (diff)
downloadkarakeep-53b6b3c24d9669ba240c1f9c5fb58672b6cf8666.tar.zst
fix(workers): A more lenient JSON parsing for LLM responses. Fixes #1267
Diffstat (limited to 'apps')
-rw-r--r--apps/workers/workers/inference/tagging.ts40
1 files changed, 39 insertions, 1 deletions
diff --git a/apps/workers/workers/inference/tagging.ts b/apps/workers/workers/inference/tagging.ts
index 5a01e2df..d666b3d9 100644
--- a/apps/workers/workers/inference/tagging.ts
+++ b/apps/workers/workers/inference/tagging.ts
@@ -26,6 +26,42 @@ const openAIResponseSchema = z.object({
tags: z.array(z.string()),
});
+function parseJsonFromLLMResponse(response: string): unknown {
+ const trimmedResponse = response.trim();
+
+ // Try parsing the response as-is first
+ try {
+ return JSON.parse(trimmedResponse);
+ } catch {
+ // If that fails, try to extract JSON from markdown code blocks
+ const jsonBlockRegex = /```(?:json)?\s*(\{[\s\S]*?\})\s*```/i;
+ const match = trimmedResponse.match(jsonBlockRegex);
+
+ if (match) {
+ try {
+ return JSON.parse(match[1]);
+ } catch {
+ // Fall through to other extraction methods
+ }
+ }
+
+ // Try to find JSON object boundaries in the text
+ const jsonObjectRegex = /\{[\s\S]*\}/;
+ const objectMatch = trimmedResponse.match(jsonObjectRegex);
+
+ if (objectMatch) {
+ try {
+ return JSON.parse(objectMatch[0]);
+ } catch {
+ // Fall through to final attempt
+ }
+ }
+
+ // Last resort: try to parse the original response again to get the original error
+ return JSON.parse(trimmedResponse);
+ }
+}
+
function tagNormalizer(col: Column) {
function normalizeTag(tag: string) {
return tag.toLowerCase().replace(/[ \-_]/g, "");
@@ -225,7 +261,9 @@ async function inferTags(
}
try {
- let tags = openAIResponseSchema.parse(JSON.parse(response.response)).tags;
+ let tags = openAIResponseSchema.parse(
+ parseJsonFromLLMResponse(response.response),
+ ).tags;
logger.info(
`[inference][${jobId}] Inferring tag for bookmark "${bookmark.id}" used ${response.totalTokens} tokens and inferred: ${tags}`,
);