From 7cc4b08aab654818933d655ee3cbd2db217090a2 Mon Sep 17 00:00:00 2001 From: David Woods Date: Sun, 22 Jun 2025 16:14:43 -0400 Subject: feat(workers): adding a local metascraper plugin for Reddit posts (#1302) * chore: metascraper 5.x comes with its own types, including @types/metascraper is now redundant; also updating to latest versions of metascraper libraries * feat (workers): creating a local metascraper plugin for Reddit posts In the past, the preview images for bookmarks from Reddit links were poorly chosen. Reddit does not use opengraph tags, so metascraper-images simply looked for all images on the page and returned the first. This tended to be the profile picture for the poster for the Reddit link. This new plugin, using the existing metascraper framework, provides a better selection of image for the bookmark when the URL domain is 'reddit'. In addition, recent changes (I believe this was a side effect of adding the metascraper-author and/or the metascaper-publisher plugins, but it could also be related to the metascraper-readibility plugin) broke what used to be a good choice of bookmark title. Previously, titles looked like 'Tinyauth just reached 1000 stars! : r/selfhosted' with both thread title and subreddit mentioned. After this update, all Reddit posts now have the same title: 'The heart of the internet'. To return to the better format, this new metascraper-reddit plugin now attempts to retrieve the better title from reddit URLs. Note that in order to gain precendence in title selection, the 'metascraperReddit()' inclusion in the crawlerWorkers.ts metascraper instantiation list had to be moved above metascraperReadability(). * chore: updated Hoarder in text to Karakeep * chore: update metascraper versions fix for metascraper types has been merged; the expect-error comment can be removed * chore: merge with master --------- Co-authored-by: Mohamed Bassem --- apps/workers/package.json | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'apps/workers/package.json') diff --git a/apps/workers/package.json b/apps/workers/package.json index 2ed6f9df..595a6e00 100644 --- a/apps/workers/package.json +++ b/apps/workers/package.json @@ -18,19 +18,19 @@ "execa": "9.3.1", "jsdom": "^24.0.0", "liteque": "^0.3.2", - "metascraper": "^5.46.5", - "metascraper-amazon": "^5.45.22", - "metascraper-author": "5.46.5", - "metascraper-date": "^5.46.5", - "metascraper-description": "^5.45.22", - "metascraper-image": "^5.45.22", - "metascraper-logo": "^5.45.22", - "metascraper-logo-favicon": "^5.45.22", - "metascraper-publisher": "^5.46.5", - "metascraper-readability": "^5.45.22", - "metascraper-title": "^5.45.22", + "metascraper": "^5.46.18", + "metascraper-amazon": "^5.46.18", + "metascraper-author": "5.46.18", + "metascraper-date": "^5.46.18", + "metascraper-description": "^5.46.18", + "metascraper-image": "^5.46.18", + "metascraper-logo": "^5.46.18", + "metascraper-logo-favicon": "^5.46.18", + "metascraper-publisher": "^5.46.18", + "metascraper-readability": "^5.46.18", + "metascraper-title": "^5.46.18", "metascraper-twitter": "^5.45.6", - "metascraper-url": "^5.45.22", + "metascraper-url": "^5.46.18", "node-cron": "^3.0.3", "node-fetch": "^3.3.2", "pdf2json": "^3.1.5", @@ -48,7 +48,6 @@ "devDependencies": { "@karakeep/prettier-config": "workspace:^0.1.0", "@types/jsdom": "^21.1.6", - "@types/metascraper": "^5.14.3", "@types/node-cron": "^3.0.11" }, "scripts": { -- cgit v1.2.3-70-g09d2