From be622e5594ecb21c82bb6066a82c86e0917bcc35 Mon Sep 17 00:00:00 2001 From: Ahmad Mujahid <55625580+AhmadMuj@users.noreply.github.com> Date: Thu, 11 Apr 2024 15:29:51 +0400 Subject: feature: Add PDF support (#88) * feature: Add PDF support * fix: PDF feature enhancements * fix: Freeze expo-share-intent version to prevent breaking changes * fix: set endOfLine to auto for cross-platform development * fix: Upgrading eslint/parser and eslint-plugin to 7.6.0 to solve the linting issues * fix: enhancing PDF feature * fix: Allowing null in fiename for backward compatibility * fix: update pnpm file with pnpm 9.0.0-alpha-8 * fix:(web): PDF Preview for web --- apps/workers/utils.ts | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'apps/workers/utils.ts') diff --git a/apps/workers/utils.ts b/apps/workers/utils.ts index 2f56d3f0..f8c48408 100644 --- a/apps/workers/utils.ts +++ b/apps/workers/utils.ts @@ -1,3 +1,5 @@ +import PDFParser from "pdf2json"; + export function withTimeout( func: (param: T) => Promise, timeoutSec: number, @@ -14,3 +16,33 @@ export function withTimeout( ]); }; } + +export async function readPDFText(buffer: Buffer): Promise<{ + text: string; + metadata: Record; +}> { + return new Promise((resolve, reject) => { + // Need raw text flag represents as number (1), reference : https://github.com/modesty/pdf2json/issues/76#issuecomment-236569265 + const pdfParser = new PDFParser(null, 1); + pdfParser.on("pdfParser_dataError", reject); + pdfParser.on("pdfParser_dataReady", (pdfData) => { + // eslint-disable-next-line + resolve({ + // The type isn't set correctly, reference : https://github.com/modesty/pdf2json/issues/327 + // eslint-disable-next-line + text: (pdfParser as any).getRawTextContent(), + metadata: pdfData.Meta, + }); + }); + pdfParser.parseBuffer(buffer); + }); +} + +export function truncateContent(content: string, length = 1500) { + let words = content.split(" "); + if (words.length > length) { + words = words.slice(length); + content = words.join(" "); + } + return content; +} -- cgit v1.2.3-70-g09d2