From 80bb8a108f29331cdb2f2695f6801beee104dc89 Mon Sep 17 00:00:00 2001 From: MohamedBassem Date: Thu, 8 Feb 2024 15:14:23 +0000 Subject: [refactor] Move the different packages to the package subdir --- packages/workers/crawler.ts | 78 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 packages/workers/crawler.ts (limited to 'packages/workers/crawler.ts') diff --git a/packages/workers/crawler.ts b/packages/workers/crawler.ts new file mode 100644 index 00000000..817bba45 --- /dev/null +++ b/packages/workers/crawler.ts @@ -0,0 +1,78 @@ +import logger from "@remember/shared/logger"; +import { + OpenAIQueue, + ZCrawlLinkRequest, + zCrawlLinkRequestSchema, +} from "@remember/shared/queues"; +import { Job } from "bullmq"; + +import prisma from "@remember/db"; + +import metascraper from "metascraper"; + +const metascraperParser = metascraper([ + require("metascraper-description")(), + require("metascraper-image")(), + require("metascraper-logo-favicon")(), + require("metascraper-title")(), + require("metascraper-url")(), +]); + +export default async function runCrawler(job: Job) { + const jobId = job.id || "unknown"; + + const request = zCrawlLinkRequestSchema.safeParse(job.data); + if (!request.success) { + logger.error( + `[Crawler][${jobId}] Got malformed job request: ${request.error.toString()}`, + ); + return; + } + + const { url, linkId } = request.data; + + logger.info( + `[Crawler][${jobId}] Will crawl "${url}" for link with id "${linkId}"`, + ); + // TODO(IMPORTANT): Run security validations on the input URL (e.g. deny localhost, etc) + + const resp = await fetch(url); + const respBody = await resp.text(); + + const meta = await metascraperParser({ + url, + html: respBody, + }); + + await prisma.bookmarkedLink.update({ + where: { + id: linkId, + }, + data: { + details: { + upsert: { + create: { + title: meta.title, + description: meta.description, + imageUrl: meta.image, + favicon: meta.logo, + }, + update: { + title: meta.title, + description: meta.description, + imageUrl: meta.image, + favicon: meta.logo, + }, + }, + }, + }, + include: { + details: true, + }, + }); + + // Enqueue openai job + OpenAIQueue.add("openai", { + linkId, + }); +} -- cgit v1.2.3-70-g09d2