aboutsummaryrefslogtreecommitdiffstats
path: root/apps/workers/utils.ts
diff options
context:
space:
mode:
authorAhmad Mujahid <55625580+AhmadMuj@users.noreply.github.com>2024-04-11 15:29:51 +0400
committerGitHub <noreply@github.com>2024-04-11 14:29:51 +0300
commitbe622e5594ecb21c82bb6066a82c86e0917bcc35 (patch)
treee77973630b30bb5d51abc2ade6993c523a8413b9 /apps/workers/utils.ts
parent2806701318dff77b10a5574d4b26ef6032f6b9bc (diff)
downloadkarakeep-be622e5594ecb21c82bb6066a82c86e0917bcc35.tar.zst
feature: Add PDF support (#88)
* feature: Add PDF support * fix: PDF feature enhancements * fix: Freeze expo-share-intent version to prevent breaking changes * fix: set endOfLine to auto for cross-platform development * fix: Upgrading eslint/parser and eslint-plugin to 7.6.0 to solve the linting issues * fix: enhancing PDF feature * fix: Allowing null in fiename for backward compatibility * fix: update pnpm file with pnpm 9.0.0-alpha-8 * fix:(web): PDF Preview for web
Diffstat (limited to 'apps/workers/utils.ts')
-rw-r--r--apps/workers/utils.ts32
1 files changed, 32 insertions, 0 deletions
diff --git a/apps/workers/utils.ts b/apps/workers/utils.ts
index 2f56d3f0..f8c48408 100644
--- a/apps/workers/utils.ts
+++ b/apps/workers/utils.ts
@@ -1,3 +1,5 @@
+import PDFParser from "pdf2json";
+
export function withTimeout<T, Ret>(
func: (param: T) => Promise<Ret>,
timeoutSec: number,
@@ -14,3 +16,33 @@ export function withTimeout<T, Ret>(
]);
};
}
+
+export async function readPDFText(buffer: Buffer): Promise<{
+ text: string;
+ metadata: Record<string, string>;
+}> {
+ return new Promise((resolve, reject) => {
+ // Need raw text flag represents as number (1), reference : https://github.com/modesty/pdf2json/issues/76#issuecomment-236569265
+ const pdfParser = new PDFParser(null, 1);
+ pdfParser.on("pdfParser_dataError", reject);
+ pdfParser.on("pdfParser_dataReady", (pdfData) => {
+ // eslint-disable-next-line
+ resolve({
+ // The type isn't set correctly, reference : https://github.com/modesty/pdf2json/issues/327
+ // eslint-disable-next-line
+ text: (pdfParser as any).getRawTextContent(),
+ metadata: pdfData.Meta,
+ });
+ });
+ pdfParser.parseBuffer(buffer);
+ });
+}
+
+export function truncateContent(content: string, length = 1500) {
+ let words = content.split(" ");
+ if (words.length > length) {
+ words = words.slice(length);
+ content = words.join(" ");
+ }
+ return content;
+}