aboutsummaryrefslogtreecommitdiffstats
path: root/apps/workers/utils.ts
diff options
context:
space:
mode:
Diffstat (limited to 'apps/workers/utils.ts')
-rw-r--r--apps/workers/utils.ts44
1 files changed, 0 insertions, 44 deletions
diff --git a/apps/workers/utils.ts b/apps/workers/utils.ts
index 15634902..2f56d3f0 100644
--- a/apps/workers/utils.ts
+++ b/apps/workers/utils.ts
@@ -1,9 +1,3 @@
-import os from "os";
-import PDFParser from "pdf2json";
-import { createWorker } from "tesseract.js";
-
-import serverConfig from "@hoarder/shared/config";
-
export function withTimeout<T, Ret>(
func: (param: T) => Promise<Ret>,
timeoutSec: number,
@@ -20,41 +14,3 @@ export function withTimeout<T, Ret>(
]);
};
}
-
-export async function readImageText(buffer: Buffer) {
- if (serverConfig.ocr.langs.length == 1 && serverConfig.ocr.langs[0] == "") {
- return null;
- }
- const worker = await createWorker(serverConfig.ocr.langs, undefined, {
- cachePath: serverConfig.ocr.cacheDir ?? os.tmpdir(),
- });
- try {
- const ret = await worker.recognize(buffer);
- if (ret.data.confidence <= serverConfig.ocr.confidenceThreshold) {
- return null;
- }
- return ret.data.text;
- } finally {
- await worker.terminate();
- }
-}
-
-export async function readPDFText(buffer: Buffer): Promise<{
- text: string;
- metadata: Record<string, string>;
-}> {
- return new Promise((resolve, reject) => {
- // Need raw text flag represents as number (1), reference : https://github.com/modesty/pdf2json/issues/76#issuecomment-236569265
- const pdfParser = new PDFParser(null, 1);
- pdfParser.on("pdfParser_dataError", reject);
- pdfParser.on("pdfParser_dataReady", (pdfData) => {
- resolve({
- // The type isn't set correctly, reference : https://github.com/modesty/pdf2json/issues/327
- // eslint-disable-next-line
- text: (pdfParser as any).getRawTextContent(),
- metadata: pdfData.Meta,
- });
- });
- pdfParser.parseBuffer(buffer);
- });
-}