aboutsummaryrefslogtreecommitdiffstats
path: root/workers
diff options
context:
space:
mode:
Diffstat (limited to 'workers')
-rw-r--r--workers/crawler.ts72
-rw-r--r--workers/index.ts32
-rw-r--r--workers/package.json19
3 files changed, 123 insertions, 0 deletions
diff --git a/workers/crawler.ts b/workers/crawler.ts
new file mode 100644
index 00000000..c0f433af
--- /dev/null
+++ b/workers/crawler.ts
@@ -0,0 +1,72 @@
+import logger from "@remember/shared/logger";
+import {
+ ZCrawlLinkRequest,
+ zCrawlLinkRequestSchema,
+} from "@remember/shared/queues";
+import { Job } from "bullmq";
+
+import prisma from "@remember/db";
+
+import metascraper from "metascraper";
+
+const metascraperParser = metascraper([
+ require("metascraper-description")(),
+ require("metascraper-image")(),
+ require("metascraper-logo-favicon")(),
+ require("metascraper-title")(),
+ require("metascraper-url")(),
+]);
+
+export default async function runCrawler(job: Job<ZCrawlLinkRequest, void>) {
+ const jobId = job.id || "unknown";
+
+ const request = zCrawlLinkRequestSchema.safeParse(job.data);
+ if (!request.success) {
+ logger.error(
+ `[Crawler][${jobId}] Got malformed job request: ${request.error.toString()}`,
+ );
+ return;
+ }
+
+ const { url, linkId } = request.data;
+
+ logger.info(
+ `[Crawler][${jobId}] Will crawl "${url}" for link with id "${linkId}"`,
+ );
+ // TODO(IMPORTANT): Run security validations on the input URL (e.g. deny localhost, etc)
+
+ const resp = await fetch(url);
+ const respBody = await resp.text();
+
+ const meta = await metascraperParser({
+ url,
+ html: respBody,
+ });
+
+ await prisma.bookmarkedLink.update({
+ where: {
+ id: linkId,
+ },
+ data: {
+ details: {
+ upsert: {
+ create: {
+ title: meta.title,
+ description: meta.description,
+ imageUrl: meta.image,
+ favicon: meta.logo,
+ },
+ update: {
+ title: meta.title,
+ description: meta.description,
+ imageUrl: meta.image,
+ favicon: meta.logo,
+ },
+ },
+ },
+ },
+ include: {
+ details: true,
+ },
+ });
+}
diff --git a/workers/index.ts b/workers/index.ts
new file mode 100644
index 00000000..76c6f03f
--- /dev/null
+++ b/workers/index.ts
@@ -0,0 +1,32 @@
+import { Worker } from "bullmq";
+
+import {
+ LinkCrawlerQueue,
+ ZCrawlLinkRequest,
+ queueConnectionDetails,
+} from "@remember/shared/queues";
+import logger from "@remember/shared/logger";
+import runCrawler from "./crawler";
+
+logger.info("Starting crawler worker ...");
+
+const crawlerWorker = new Worker<ZCrawlLinkRequest, void>(
+ LinkCrawlerQueue.name,
+ runCrawler,
+ {
+ connection: queueConnectionDetails,
+ autorun: false,
+ },
+);
+
+crawlerWorker.on("completed", (job) => {
+ const jobId = job?.id || "unknown";
+ logger.info(`[Crawler][${jobId}] Completed successfully`);
+});
+
+crawlerWorker.on("failed", (job, error) => {
+ const jobId = job?.id || "unknown";
+ logger.error(`[Crawler][${jobId}] Crawling job failed: ${error}`);
+});
+
+await Promise.all([crawlerWorker.run()]);
diff --git a/workers/package.json b/workers/package.json
new file mode 100644
index 00000000..950233ab
--- /dev/null
+++ b/workers/package.json
@@ -0,0 +1,19 @@
+{
+ "$schema": "https://json.schemastore.org/package.json",
+ "name": "@remember/workers",
+ "version": "0.1.0",
+ "private": true,
+ "dependencies": {
+ "@remember/shared": "workspace:*",
+ "metascraper": "^5.43.4",
+ "metascraper-description": "^5.43.4",
+ "metascraper-image": "^5.43.4",
+ "metascraper-logo": "^5.43.4",
+ "metascraper-title": "^5.43.4",
+ "metascraper-url": "^5.43.4",
+ "metascraper-logo-favicon": "^5.43.4"
+ },
+ "devDependencies": {
+ "@types/metascraper": "^5.14.3"
+ }
+}