diff options
| author | Ahmad Mujahid <55625580+AhmadMuj@users.noreply.github.com> | 2024-04-11 15:29:51 +0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-04-11 14:29:51 +0300 |
| commit | be622e5594ecb21c82bb6066a82c86e0917bcc35 (patch) | |
| tree | e77973630b30bb5d51abc2ade6993c523a8413b9 /apps/workers/utils.ts | |
| parent | 2806701318dff77b10a5574d4b26ef6032f6b9bc (diff) | |
| download | karakeep-be622e5594ecb21c82bb6066a82c86e0917bcc35.tar.zst | |
feature: Add PDF support (#88)
* feature: Add PDF support
* fix: PDF feature enhancements
* fix: Freeze expo-share-intent version to prevent breaking changes
* fix: set endOfLine to auto for cross-platform development
* fix: Upgrading eslint/parser and eslint-plugin to 7.6.0 to solve the linting issues
* fix: enhancing PDF feature
* fix: Allowing null in fiename for backward compatibility
* fix: update pnpm file with pnpm 9.0.0-alpha-8
* fix:(web): PDF Preview for web
Diffstat (limited to 'apps/workers/utils.ts')
| -rw-r--r-- | apps/workers/utils.ts | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/apps/workers/utils.ts b/apps/workers/utils.ts index 2f56d3f0..f8c48408 100644 --- a/apps/workers/utils.ts +++ b/apps/workers/utils.ts @@ -1,3 +1,5 @@ +import PDFParser from "pdf2json"; + export function withTimeout<T, Ret>( func: (param: T) => Promise<Ret>, timeoutSec: number, @@ -14,3 +16,33 @@ export function withTimeout<T, Ret>( ]); }; } + +export async function readPDFText(buffer: Buffer): Promise<{ + text: string; + metadata: Record<string, string>; +}> { + return new Promise((resolve, reject) => { + // Need raw text flag represents as number (1), reference : https://github.com/modesty/pdf2json/issues/76#issuecomment-236569265 + const pdfParser = new PDFParser(null, 1); + pdfParser.on("pdfParser_dataError", reject); + pdfParser.on("pdfParser_dataReady", (pdfData) => { + // eslint-disable-next-line + resolve({ + // The type isn't set correctly, reference : https://github.com/modesty/pdf2json/issues/327 + // eslint-disable-next-line + text: (pdfParser as any).getRawTextContent(), + metadata: pdfData.Meta, + }); + }); + pdfParser.parseBuffer(buffer); + }); +} + +export function truncateContent(content: string, length = 1500) { + let words = content.split(" "); + if (words.length > length) { + words = words.slice(length); + content = words.join(" "); + } + return content; +} |
