aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--apps/workers/workers/inference/tagging.ts40
1 files changed, 39 insertions, 1 deletions
diff --git a/apps/workers/workers/inference/tagging.ts b/apps/workers/workers/inference/tagging.ts
index 5a01e2df..d666b3d9 100644
--- a/apps/workers/workers/inference/tagging.ts
+++ b/apps/workers/workers/inference/tagging.ts
@@ -26,6 +26,42 @@ const openAIResponseSchema = z.object({
tags: z.array(z.string()),
});
+function parseJsonFromLLMResponse(response: string): unknown {
+ const trimmedResponse = response.trim();
+
+ // Try parsing the response as-is first
+ try {
+ return JSON.parse(trimmedResponse);
+ } catch {
+ // If that fails, try to extract JSON from markdown code blocks
+ const jsonBlockRegex = /```(?:json)?\s*(\{[\s\S]*?\})\s*```/i;
+ const match = trimmedResponse.match(jsonBlockRegex);
+
+ if (match) {
+ try {
+ return JSON.parse(match[1]);
+ } catch {
+ // Fall through to other extraction methods
+ }
+ }
+
+ // Try to find JSON object boundaries in the text
+ const jsonObjectRegex = /\{[\s\S]*\}/;
+ const objectMatch = trimmedResponse.match(jsonObjectRegex);
+
+ if (objectMatch) {
+ try {
+ return JSON.parse(objectMatch[0]);
+ } catch {
+ // Fall through to final attempt
+ }
+ }
+
+ // Last resort: try to parse the original response again to get the original error
+ return JSON.parse(trimmedResponse);
+ }
+}
+
function tagNormalizer(col: Column) {
function normalizeTag(tag: string) {
return tag.toLowerCase().replace(/[ \-_]/g, "");
@@ -225,7 +261,9 @@ async function inferTags(
}
try {
- let tags = openAIResponseSchema.parse(JSON.parse(response.response)).tags;
+ let tags = openAIResponseSchema.parse(
+ parseJsonFromLLMResponse(response.response),
+ ).tags;
logger.info(
`[inference][${jobId}] Inferring tag for bookmark "${bookmark.id}" used ${response.totalTokens} tokens and inferred: ${tags}`,
);