diff options
| author | MohamedBassem <me@mbassem.com> | 2024-02-08 15:14:23 +0000 |
|---|---|---|
| committer | MohamedBassem <me@mbassem.com> | 2024-02-08 15:15:21 +0000 |
| commit | 80bb8a108f29331cdb2f2695f6801beee104dc89 (patch) | |
| tree | b1ae2a512963a9c916c4bfed71f7633f508de131 /workers/crawler.ts | |
| parent | 333429adbaaa592cc96b480a5228f0e3f1de4cc2 (diff) | |
| download | karakeep-80bb8a108f29331cdb2f2695f6801beee104dc89.tar.zst | |
[refactor] Move the different packages to the package subdir
Diffstat (limited to 'workers/crawler.ts')
| -rw-r--r-- | workers/crawler.ts | 78 |
1 files changed, 0 insertions, 78 deletions
diff --git a/workers/crawler.ts b/workers/crawler.ts deleted file mode 100644 index 817bba45..00000000 --- a/workers/crawler.ts +++ /dev/null @@ -1,78 +0,0 @@ -import logger from "@remember/shared/logger"; -import { - OpenAIQueue, - ZCrawlLinkRequest, - zCrawlLinkRequestSchema, -} from "@remember/shared/queues"; -import { Job } from "bullmq"; - -import prisma from "@remember/db"; - -import metascraper from "metascraper"; - -const metascraperParser = metascraper([ - require("metascraper-description")(), - require("metascraper-image")(), - require("metascraper-logo-favicon")(), - require("metascraper-title")(), - require("metascraper-url")(), -]); - -export default async function runCrawler(job: Job<ZCrawlLinkRequest, void>) { - const jobId = job.id || "unknown"; - - const request = zCrawlLinkRequestSchema.safeParse(job.data); - if (!request.success) { - logger.error( - `[Crawler][${jobId}] Got malformed job request: ${request.error.toString()}`, - ); - return; - } - - const { url, linkId } = request.data; - - logger.info( - `[Crawler][${jobId}] Will crawl "${url}" for link with id "${linkId}"`, - ); - // TODO(IMPORTANT): Run security validations on the input URL (e.g. deny localhost, etc) - - const resp = await fetch(url); - const respBody = await resp.text(); - - const meta = await metascraperParser({ - url, - html: respBody, - }); - - await prisma.bookmarkedLink.update({ - where: { - id: linkId, - }, - data: { - details: { - upsert: { - create: { - title: meta.title, - description: meta.description, - imageUrl: meta.image, - favicon: meta.logo, - }, - update: { - title: meta.title, - description: meta.description, - imageUrl: meta.image, - favicon: meta.logo, - }, - }, - }, - }, - include: { - details: true, - }, - }); - - // Enqueue openai job - OpenAIQueue.add("openai", { - linkId, - }); -} |
