diff options
Diffstat (limited to 'apps')
| -rw-r--r-- | apps/workers/workers/inference/tagging.ts | 40 |
1 files changed, 39 insertions, 1 deletions
diff --git a/apps/workers/workers/inference/tagging.ts b/apps/workers/workers/inference/tagging.ts index 5a01e2df..d666b3d9 100644 --- a/apps/workers/workers/inference/tagging.ts +++ b/apps/workers/workers/inference/tagging.ts @@ -26,6 +26,42 @@ const openAIResponseSchema = z.object({ tags: z.array(z.string()), }); +function parseJsonFromLLMResponse(response: string): unknown { + const trimmedResponse = response.trim(); + + // Try parsing the response as-is first + try { + return JSON.parse(trimmedResponse); + } catch { + // If that fails, try to extract JSON from markdown code blocks + const jsonBlockRegex = /```(?:json)?\s*(\{[\s\S]*?\})\s*```/i; + const match = trimmedResponse.match(jsonBlockRegex); + + if (match) { + try { + return JSON.parse(match[1]); + } catch { + // Fall through to other extraction methods + } + } + + // Try to find JSON object boundaries in the text + const jsonObjectRegex = /\{[\s\S]*\}/; + const objectMatch = trimmedResponse.match(jsonObjectRegex); + + if (objectMatch) { + try { + return JSON.parse(objectMatch[0]); + } catch { + // Fall through to final attempt + } + } + + // Last resort: try to parse the original response again to get the original error + return JSON.parse(trimmedResponse); + } +} + function tagNormalizer(col: Column) { function normalizeTag(tag: string) { return tag.toLowerCase().replace(/[ \-_]/g, ""); @@ -225,7 +261,9 @@ async function inferTags( } try { - let tags = openAIResponseSchema.parse(JSON.parse(response.response)).tags; + let tags = openAIResponseSchema.parse( + parseJsonFromLLMResponse(response.response), + ).tags; logger.info( `[inference][${jobId}] Inferring tag for bookmark "${bookmark.id}" used ${response.totalTokens} tokens and inferred: ${tags}`, ); |
