1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
import logger from "@remember/shared/logger";
import {
OpenAIQueue,
ZCrawlLinkRequest,
zCrawlLinkRequestSchema,
} from "@remember/shared/queues";
import { Job } from "bullmq";
import prisma from "@remember/db";
import metascraper from "metascraper";
const metascraperParser = metascraper([
require("metascraper-description")(),
require("metascraper-image")(),
require("metascraper-logo-favicon")(),
require("metascraper-title")(),
require("metascraper-url")(),
]);
export default async function runCrawler(job: Job<ZCrawlLinkRequest, void>) {
const jobId = job.id || "unknown";
const request = zCrawlLinkRequestSchema.safeParse(job.data);
if (!request.success) {
logger.error(
`[Crawler][${jobId}] Got malformed job request: ${request.error.toString()}`,
);
return;
}
const { url, linkId } = request.data;
logger.info(
`[Crawler][${jobId}] Will crawl "${url}" for link with id "${linkId}"`,
);
// TODO(IMPORTANT): Run security validations on the input URL (e.g. deny localhost, etc)
const resp = await fetch(url);
const respBody = await resp.text();
const meta = await metascraperParser({
url,
html: respBody,
});
await prisma.bookmarkedLink.update({
where: {
id: linkId,
},
data: {
details: {
upsert: {
create: {
title: meta.title,
description: meta.description,
imageUrl: meta.image,
favicon: meta.logo,
},
update: {
title: meta.title,
description: meta.description,
imageUrl: meta.image,
favicon: meta.logo,
},
},
},
},
include: {
details: true,
},
});
// Enqueue openai job
OpenAIQueue.add("openai", {
linkId,
});
}
|