diff options
Diffstat (limited to 'apps/workers/openaiWorker.ts')
| -rw-r--r-- | apps/workers/openaiWorker.ts | 50 |
1 files changed, 2 insertions, 48 deletions
diff --git a/apps/workers/openaiWorker.ts b/apps/workers/openaiWorker.ts index 0e3193b6..bad06bb3 100644 --- a/apps/workers/openaiWorker.ts +++ b/apps/workers/openaiWorker.ts @@ -7,7 +7,6 @@ import type { InferenceClient } from "@hoarder/shared/inference"; import type { ZOpenAIRequest } from "@hoarder/shared/queues"; import { db } from "@hoarder/db"; import { - bookmarkAssets, bookmarks, bookmarkTags, customPrompts, @@ -24,8 +23,6 @@ import { zOpenAIRequestSchema, } from "@hoarder/shared/queues"; -import { readImageText, readPDFText } from "./utils"; - const openAIResponseSchema = z.object({ tags: z.array(z.string()), }); @@ -156,25 +153,6 @@ async function inferTagsFromImage( ); } - let imageText = null; - try { - imageText = await readImageText(asset); - } catch (e) { - logger.error(`[inference][${jobId}] Failed to read image text: ${e}`); - } - - if (imageText) { - logger.info( - `[inference][${jobId}] Extracted ${imageText.length} characters from image.`, - ); - await db - .update(bookmarkAssets) - .set({ - content: imageText, - }) - .where(eq(bookmarkAssets.id, bookmark.id)); - } - const base64 = asset.toString("base64"); return inferenceClient.inferFromImage( buildImagePrompt( @@ -245,38 +223,14 @@ function containsTagsPlaceholder(prompts: { text: string }[]): boolean { } async function inferTagsFromPDF( - jobId: string, + _jobId: string, bookmark: NonNullable<Awaited<ReturnType<typeof fetchBookmark>>>, inferenceClient: InferenceClient, ) { - const { asset } = await readAsset({ - userId: bookmark.userId, - assetId: bookmark.asset.assetId, - }); - if (!asset) { - throw new Error( - `[inference][${jobId}] AssetId ${bookmark.asset.assetId} for bookmark ${bookmark.id} not found`, - ); - } - const pdfParse = await readPDFText(asset); - if (!pdfParse?.text) { - throw new Error( - `[inference][${jobId}] PDF text is empty. Please make sure that the PDF includes text and not just images.`, - ); - } - - await db - .update(bookmarkAssets) - .set({ - content: pdfParse.text, - metadata: pdfParse.metadata ? JSON.stringify(pdfParse.metadata) : null, - }) - .where(eq(bookmarkAssets.id, bookmark.id)); - const prompt = buildTextPrompt( serverConfig.inference.inferredTagLang, await fetchCustomPrompts(bookmark.userId, "text"), - `Content: ${pdfParse.text}`, + `Content: ${bookmark.asset.content}`, serverConfig.inference.contextLength, ); return inferenceClient.inferFromText(prompt, { json: true }); |
