aboutsummaryrefslogtreecommitdiffstats
path: root/apps
diff options
context:
space:
mode:
authorerik-nilcoast <138068205+erik-nilcoast@users.noreply.github.com>2025-03-22 17:38:50 -0500
committerGitHub <noreply@github.com>2025-03-22 22:38:50 +0000
commitb3417d87a0565b3536029e3b1e347611b5c6679b (patch)
treef68d2faba080c41bb6c14d9e44ef68f6735f832e /apps
parent13ba417f55ffd77bb5f4d4f4b53f349bd6d2dc4d (diff)
downloadkarakeep-b3417d87a0565b3536029e3b1e347611b5c6679b.tar.zst
feat(workers): Adds publisher and author og:meta tags to Bookmark (#1141)
Diffstat (limited to 'apps')
-rw-r--r--apps/workers/crawlerWorker.ts24
-rw-r--r--apps/workers/package.json5
-rw-r--r--apps/workers/searchWorker.ts4
3 files changed, 32 insertions, 1 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts
index 17dba443..5798b98c 100644
--- a/apps/workers/crawlerWorker.ts
+++ b/apps/workers/crawlerWorker.ts
@@ -14,9 +14,12 @@ import { JSDOM } from "jsdom";
import { DequeuedJob, Runner } from "liteque";
import metascraper from "metascraper";
import metascraperAmazon from "metascraper-amazon";
+import metascraperAuthor from "metascraper-author";
+import metascraperDate from "metascraper-date";
import metascraperDescription from "metascraper-description";
import metascraperImage from "metascraper-image";
import metascraperLogo from "metascraper-logo-favicon";
+import metascraperPublisher from "metascraper-publisher";
import metascraperReadability from "metascraper-readability";
import metascraperTitle from "metascraper-title";
import metascraperTwitter from "metascraper-twitter";
@@ -61,8 +64,14 @@ import {
import { BookmarkTypes } from "@hoarder/shared/types/bookmarks";
const metascraperParser = metascraper([
+ metascraperDate({
+ dateModified: true,
+ datePublished: true,
+ }),
metascraperAmazon(),
metascraperReadability(),
+ metascraperAuthor(),
+ metascraperPublisher(),
metascraperTitle(),
metascraperDescription(),
metascraperTwitter(),
@@ -663,6 +672,17 @@ async function crawlAndParseUrl(
}
abortSignal.throwIfAborted();
+ const parseDate = (date: string | undefined) => {
+ if (!date) {
+ return null;
+ }
+ try {
+ return new Date(date);
+ } catch (_e) {
+ return null;
+ }
+ };
+
// TODO(important): Restrict the size of content to store
await db.transaction(async (txn) => {
await txn
@@ -677,6 +697,10 @@ async function crawlAndParseUrl(
htmlContent: readableContent?.content,
crawledAt: new Date(),
crawlStatusCode: statusCode,
+ author: meta.author,
+ publisher: meta.publisher,
+ datePublished: parseDate(meta.datePublished),
+ dateModified: parseDate(meta.dateModified),
})
.where(eq(bookmarkLinks.id, bookmarkId));
diff --git a/apps/workers/package.json b/apps/workers/package.json
index be700b36..86d781aa 100644
--- a/apps/workers/package.json
+++ b/apps/workers/package.json
@@ -18,12 +18,15 @@
"execa": "9.3.1",
"jsdom": "^24.0.0",
"liteque": "^0.3.2",
- "metascraper": "^5.45.24",
+ "metascraper": "^5.46.5",
"metascraper-amazon": "^5.45.22",
+ "metascraper-author": "5.46.5",
+ "metascraper-date": "^5.46.5",
"metascraper-description": "^5.45.22",
"metascraper-image": "^5.45.22",
"metascraper-logo": "^5.45.22",
"metascraper-logo-favicon": "^5.45.22",
+ "metascraper-publisher": "^5.46.5",
"metascraper-readability": "^5.45.22",
"metascraper-title": "^5.45.22",
"metascraper-twitter": "^5.45.6",
diff --git a/apps/workers/searchWorker.ts b/apps/workers/searchWorker.ts
index 1fbdbe73..7ad8b430 100644
--- a/apps/workers/searchWorker.ts
+++ b/apps/workers/searchWorker.ts
@@ -85,6 +85,10 @@ async function runIndex(
linkTitle: bookmark.link.title,
description: bookmark.link.description,
content: bookmark.link.content,
+ publisher: bookmark.link.publisher,
+ author: bookmark.link.author,
+ datePublished: bookmark.link.datePublished,
+ dateModified: bookmark.link.dateModified,
}
: undefined),
...(bookmark.asset