diff options
| author | Ahmad Mujahid <55625580+AhmadMuj@users.noreply.github.com> | 2024-04-11 15:29:51 +0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-04-11 14:29:51 +0300 |
| commit | be622e5594ecb21c82bb6066a82c86e0917bcc35 (patch) | |
| tree | e77973630b30bb5d51abc2ade6993c523a8413b9 /apps/workers/openaiWorker.ts | |
| parent | 2806701318dff77b10a5574d4b26ef6032f6b9bc (diff) | |
| download | karakeep-be622e5594ecb21c82bb6066a82c86e0917bcc35.tar.zst | |
feature: Add PDF support (#88)
* feature: Add PDF support
* fix: PDF feature enhancements
* fix: Freeze expo-share-intent version to prevent breaking changes
* fix: set endOfLine to auto for cross-platform development
* fix: Upgrading eslint/parser and eslint-plugin to 7.6.0 to solve the linting issues
* fix: enhancing PDF feature
* fix: Allowing null in fiename for backward compatibility
* fix: update pnpm file with pnpm 9.0.0-alpha-8
* fix:(web): PDF Preview for web
Diffstat (limited to 'apps/workers/openaiWorker.ts')
| -rw-r--r-- | apps/workers/openaiWorker.ts | 69 |
1 files changed, 57 insertions, 12 deletions
diff --git a/apps/workers/openaiWorker.ts b/apps/workers/openaiWorker.ts index c7b519e2..b07e02fe 100644 --- a/apps/workers/openaiWorker.ts +++ b/apps/workers/openaiWorker.ts @@ -5,7 +5,12 @@ import { z } from "zod"; import type { ZOpenAIRequest } from "@hoarder/shared/queues"; import { db } from "@hoarder/db"; -import { bookmarks, bookmarkTags, tagsOnBookmarks } from "@hoarder/db/schema"; +import { + bookmarkAssets, + bookmarks, + bookmarkTags, + tagsOnBookmarks, +} from "@hoarder/db/schema"; import { readAsset } from "@hoarder/shared/assetdb"; import serverConfig from "@hoarder/shared/config"; import logger from "@hoarder/shared/logger"; @@ -18,6 +23,7 @@ import { import type { InferenceClient } from "./inference"; import { InferenceClientFactory } from "./inference"; +import { readPDFText, truncateContent } from "./utils"; const openAIResponseSchema = z.object({ tags: z.array(z.string()), @@ -91,14 +97,6 @@ CONTENT START HERE: function buildPrompt( bookmark: NonNullable<Awaited<ReturnType<typeof fetchBookmark>>>, ) { - const truncateContent = (content: string) => { - let words = content.split(" "); - if (words.length > 1500) { - words = words.slice(1500); - content = words.join(" "); - } - return content; - }; if (bookmark.link) { if (!bookmark.link.description && !bookmark.link.content) { throw new Error( @@ -158,14 +156,48 @@ async function inferTagsFromImage( ); } const base64 = asset.toString("base64"); - - return await inferenceClient.inferFromImage( + return inferenceClient.inferFromImage( IMAGE_PROMPT_BASE, metadata.contentType, base64, ); } +async function inferTagsFromPDF( + jobId: string, + bookmark: NonNullable<Awaited<ReturnType<typeof fetchBookmark>>>, + inferenceClient: InferenceClient, +) { + const { asset } = await readAsset({ + userId: bookmark.userId, + assetId: bookmark.asset.assetId, + }); + if (!asset) { + throw new Error( + `[inference][${jobId}] AssetId ${bookmark.asset.assetId} for bookmark ${bookmark.id} not found`, + ); + } + const pdfParse = await readPDFText(asset); + if (!pdfParse?.text) { + throw new Error( + `[inference][${jobId}] PDF text is empty. Please make sure that the PDF includes text and not just images.`, + ); + } + + await db + .update(bookmarkAssets) + .set({ + content: pdfParse.text, + metadata: pdfParse.metadata ? JSON.stringify(pdfParse.metadata) : null, + }) + .where(eq(bookmarkAssets.id, bookmark.id)); + + const prompt = `${TEXT_PROMPT_BASE} +Content: ${truncateContent(pdfParse.text)} +`; + return inferenceClient.inferFromText(prompt); +} + async function inferTagsFromText( bookmark: NonNullable<Awaited<ReturnType<typeof fetchBookmark>>>, inferenceClient: InferenceClient, @@ -182,11 +214,24 @@ async function inferTags( if (bookmark.link || bookmark.text) { response = await inferTagsFromText(bookmark, inferenceClient); } else if (bookmark.asset) { - response = await inferTagsFromImage(jobId, bookmark, inferenceClient); + switch (bookmark.asset.assetType) { + case "image": + response = await inferTagsFromImage(jobId, bookmark, inferenceClient); + break; + case "pdf": + response = await inferTagsFromPDF(jobId, bookmark, inferenceClient); + break; + default: + throw new Error(`[inference][${jobId}] Unsupported bookmark type`); + } } else { throw new Error(`[inference][${jobId}] Unsupported bookmark type`); } + if (!response) { + throw new Error(`[inference][${jobId}] Inference response is empty`); + } + try { let tags = openAIResponseSchema.parse(JSON.parse(response.response)).tags; logger.info( |
