aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMohamedBassem <me@mbassem.com>2024-04-23 21:22:53 +0100
committerMohamedBassem <me@mbassem.com>2024-04-23 21:22:53 +0100
commit5dac180f486cbc6bb202debd5dde996a9c8204b4 (patch)
treee94ae0a1904c774a0fd1114010fd4db7f67878ac
parentab92969b7ec356c6c8978a596d6ed2c29dd3a907 (diff)
downloadkarakeep-5dac180f486cbc6bb202debd5dde996a9c8204b4.tar.zst
fix(crawler): Better extraction for amazon images
-rw-r--r--apps/workers/crawlerWorker.ts2
-rw-r--r--apps/workers/package.json1
-rw-r--r--pnpm-lock.yaml17
3 files changed, 20 insertions, 0 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts
index 2d1c7564..b583864d 100644
--- a/apps/workers/crawlerWorker.ts
+++ b/apps/workers/crawlerWorker.ts
@@ -10,6 +10,7 @@ import { eq } from "drizzle-orm";
import { isShuttingDown } from "exit";
import { JSDOM } from "jsdom";
import metascraper from "metascraper";
+import metascraperAmazon from "metascraper-amazon";
import metascraperDescription from "metascraper-description";
import metascraperImage from "metascraper-image";
import metascraperLogo from "metascraper-logo-favicon";
@@ -37,6 +38,7 @@ import {
} from "@hoarder/shared/queues";
const metascraperParser = metascraper([
+ metascraperAmazon(),
metascraperReadability(),
metascraperTitle(),
metascraperDescription(),
diff --git a/apps/workers/package.json b/apps/workers/package.json
index e14c576b..7975cc84 100644
--- a/apps/workers/package.json
+++ b/apps/workers/package.json
@@ -16,6 +16,7 @@
"drizzle-orm": "^0.29.4",
"jsdom": "^24.0.0",
"metascraper": "^5.43.4",
+ "metascraper-amazon": "^5.45.0",
"metascraper-description": "^5.43.4",
"metascraper-image": "^5.43.4",
"metascraper-logo": "^5.43.4",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index f7b88839..754fe314 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -654,6 +654,9 @@ importers:
metascraper:
specifier: ^5.43.4
version: 5.45.0
+ metascraper-amazon:
+ specifier: ^5.45.0
+ version: 5.45.0
metascraper-description:
specifier: ^5.43.4
version: 5.45.0
@@ -8454,6 +8457,10 @@ packages:
resolution: {integrity: sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==}
engines: {node: '>= 8'}
+ metascraper-amazon@5.45.0:
+ resolution: {integrity: sha512-+T7keyfyPLtLqiXfbm+k8zWLl07Q2iYQklDChZk2nD3sauF4EP024X5ooDP+oJxYqcKFjJq35nDsFotd3dlugA==}
+ engines: {node: '>= 16'}
+
metascraper-description@5.45.0:
resolution: {integrity: sha512-LtAcHkHH9cHwPjuJ+0MBxLMJdZCzljM1QDnJ6oEzrv/8YgXCl+XNSCyip52p4j9ZbpQHSZ7+eYpwZ7IvV3cDKg==}
engines: {node: '>= 16'}
@@ -23915,6 +23922,16 @@ snapshots:
merge2@1.4.1: {}
+ metascraper-amazon@5.45.0:
+ dependencies:
+ '@metascraper/helpers': 5.45.0
+ transitivePeerDependencies:
+ - bufferutil
+ - canvas
+ - supports-color
+ - utf-8-validate
+ dev: false
+
metascraper-description@5.45.0:
dependencies:
'@metascraper/helpers': 5.45.0