aboutsummaryrefslogtreecommitdiffstats
path: root/crawler
diff options
context:
space:
mode:
authorMohamedBassem <me@mbassem.com>2024-02-07 18:29:52 +0000
committerMohamedBassem <me@mbassem.com>2024-02-07 18:37:20 +0000
commit3ec45e8bbb8285b17c703907d4c161b633663096 (patch)
treeee52a753740a4a45e0ffe34840fc878ac383a5e2 /crawler
parentb12b964e0617f410b5c7b0989754cf94d01177cf (diff)
downloadkarakeep-3ec45e8bbb8285b17c703907d4c161b633663096.tar.zst
[refactor] Rename the crawlers package to workers
Diffstat (limited to 'crawler')
-rw-r--r--crawler/crawler.ts72
-rw-r--r--crawler/index.ts32
-rw-r--r--crawler/package.json19
3 files changed, 0 insertions, 123 deletions
diff --git a/crawler/crawler.ts b/crawler/crawler.ts
deleted file mode 100644
index c0f433af..00000000
--- a/crawler/crawler.ts
+++ /dev/null
@@ -1,72 +0,0 @@
-import logger from "@remember/shared/logger";
-import {
- ZCrawlLinkRequest,
- zCrawlLinkRequestSchema,
-} from "@remember/shared/queues";
-import { Job } from "bullmq";
-
-import prisma from "@remember/db";
-
-import metascraper from "metascraper";
-
-const metascraperParser = metascraper([
- require("metascraper-description")(),
- require("metascraper-image")(),
- require("metascraper-logo-favicon")(),
- require("metascraper-title")(),
- require("metascraper-url")(),
-]);
-
-export default async function runCrawler(job: Job<ZCrawlLinkRequest, void>) {
- const jobId = job.id || "unknown";
-
- const request = zCrawlLinkRequestSchema.safeParse(job.data);
- if (!request.success) {
- logger.error(
- `[Crawler][${jobId}] Got malformed job request: ${request.error.toString()}`,
- );
- return;
- }
-
- const { url, linkId } = request.data;
-
- logger.info(
- `[Crawler][${jobId}] Will crawl "${url}" for link with id "${linkId}"`,
- );
- // TODO(IMPORTANT): Run security validations on the input URL (e.g. deny localhost, etc)
-
- const resp = await fetch(url);
- const respBody = await resp.text();
-
- const meta = await metascraperParser({
- url,
- html: respBody,
- });
-
- await prisma.bookmarkedLink.update({
- where: {
- id: linkId,
- },
- data: {
- details: {
- upsert: {
- create: {
- title: meta.title,
- description: meta.description,
- imageUrl: meta.image,
- favicon: meta.logo,
- },
- update: {
- title: meta.title,
- description: meta.description,
- imageUrl: meta.image,
- favicon: meta.logo,
- },
- },
- },
- },
- include: {
- details: true,
- },
- });
-}
diff --git a/crawler/index.ts b/crawler/index.ts
deleted file mode 100644
index 76c6f03f..00000000
--- a/crawler/index.ts
+++ /dev/null
@@ -1,32 +0,0 @@
-import { Worker } from "bullmq";
-
-import {
- LinkCrawlerQueue,
- ZCrawlLinkRequest,
- queueConnectionDetails,
-} from "@remember/shared/queues";
-import logger from "@remember/shared/logger";
-import runCrawler from "./crawler";
-
-logger.info("Starting crawler worker ...");
-
-const crawlerWorker = new Worker<ZCrawlLinkRequest, void>(
- LinkCrawlerQueue.name,
- runCrawler,
- {
- connection: queueConnectionDetails,
- autorun: false,
- },
-);
-
-crawlerWorker.on("completed", (job) => {
- const jobId = job?.id || "unknown";
- logger.info(`[Crawler][${jobId}] Completed successfully`);
-});
-
-crawlerWorker.on("failed", (job, error) => {
- const jobId = job?.id || "unknown";
- logger.error(`[Crawler][${jobId}] Crawling job failed: ${error}`);
-});
-
-await Promise.all([crawlerWorker.run()]);
diff --git a/crawler/package.json b/crawler/package.json
deleted file mode 100644
index 9b590eb8..00000000
--- a/crawler/package.json
+++ /dev/null
@@ -1,19 +0,0 @@
-{
- "$schema": "https://json.schemastore.org/package.json",
- "name": "@remember/crawler",
- "version": "0.1.0",
- "private": true,
- "dependencies": {
- "@remember/shared": "workspace:*",
- "metascraper": "^5.43.4",
- "metascraper-description": "^5.43.4",
- "metascraper-image": "^5.43.4",
- "metascraper-logo": "^5.43.4",
- "metascraper-title": "^5.43.4",
- "metascraper-url": "^5.43.4",
- "metascraper-logo-favicon": "^5.43.4"
- },
- "devDependencies": {
- "@types/metascraper": "^5.14.3"
- }
-}