aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMohamedBassem <me@mbassem.com>2024-02-11 16:53:17 +0000
committerMohamedBassem <me@mbassem.com>2024-02-11 17:57:46 +0000
commit230cafb6dfc8d3bad57d84ef13c3669f5bf5331a (patch)
treeb59f4b386201f9fedde3c7b7546f32c2ed3f61cb
parent2c2d05fd0a2c3c26d765f8a6beb88d907a097c1d (diff)
downloadkarakeep-230cafb6dfc8d3bad57d84ef13c3669f5bf5331a.tar.zst
fix: Fix build for workers package and add it to CI
-rw-r--r--.github/workflows/main.yml3
-rw-r--r--packages/db/index.ts2
-rw-r--r--packages/workers/crawler.ts50
-rw-r--r--packages/workers/index.ts67
-rw-r--r--packages/workers/openai.ts34
-rw-r--r--packages/workers/package.json5
-rw-r--r--packages/workers/tsconfig.json6
-rw-r--r--yarn.lock9
8 files changed, 106 insertions, 70 deletions
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index e0d26326..cc8a45d0 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -26,6 +26,9 @@ jobs:
- name: Prisma
working-directory: packages/db
run: yarn prisma generate
+ - name: Build the workers package
+ working-directory: packages/workers
+ run: yarn typecheck
- name: Build web app
working-directory: packages/web
run: yarn run build
diff --git a/packages/db/index.ts b/packages/db/index.ts
index e87b9515..31ebeec2 100644
--- a/packages/db/index.ts
+++ b/packages/db/index.ts
@@ -12,3 +12,5 @@ export const prisma =
? ["query", "error", "warn"]
: ["error"],
});
+
+export * from "@prisma/client";
diff --git a/packages/workers/crawler.ts b/packages/workers/crawler.ts
index 1cb82f31..45d2f530 100644
--- a/packages/workers/crawler.ts
+++ b/packages/workers/crawler.ts
@@ -1,12 +1,16 @@
import logger from "@remember/shared/logger";
import {
+ LinkCrawlerQueue,
OpenAIQueue,
ZCrawlLinkRequest,
+ queueConnectionDetails,
zCrawlLinkRequestSchema,
} from "@remember/shared/queues";
+
+import { Worker } from "bullmq";
import { Job } from "bullmq";
-import prisma from "@remember/db";
+import { prisma } from "@remember/db";
import { Browser } from "puppeteer";
import puppeteer from "puppeteer-extra";
@@ -32,14 +36,44 @@ const metascraperParser = metascraper([
metascraperUrl(),
]);
-let browser: Browser;
-(async () => {
- puppeteer.use(StealthPlugin());
- // TODO: Configure the browser mode via an env variable
- browser = await puppeteer.launch({ headless: true });
-})();
+let browser: Browser | undefined;
+
+export class CrawlerWorker {
+ static async build() {
+ if (!browser) {
+ puppeteer.use(StealthPlugin());
+ console.log("HERE");
+ browser = await puppeteer.launch({ headless: true });
+ }
+
+ logger.info("Starting crawler worker ...");
+ const worker = new Worker<ZCrawlLinkRequest, void>(
+ LinkCrawlerQueue.name,
+ runCrawler,
+ {
+ connection: queueConnectionDetails,
+ autorun: false,
+ },
+ );
+
+ worker.on("completed", (job) => {
+ const jobId = job?.id || "unknown";
+ logger.info(`[Crawler][${jobId}] Completed successfully`);
+ });
+
+ worker.on("failed", (job, error) => {
+ const jobId = job?.id || "unknown";
+ logger.error(`[Crawler][${jobId}] Crawling job failed: ${error}`);
+ });
+
+ return worker;
+ }
+}
async function crawlPage(url: string) {
+ if (!browser) {
+ throw new Error("The browser must have been initalized by this point.");
+ }
const context = await browser.createBrowserContext();
const page = await context.newPage();
@@ -53,7 +87,7 @@ async function crawlPage(url: string) {
return htmlContent;
}
-export default async function runCrawler(job: Job<ZCrawlLinkRequest, void>) {
+async function runCrawler(job: Job<ZCrawlLinkRequest, void>) {
const jobId = job.id || "unknown";
const request = zCrawlLinkRequestSchema.safeParse(job.data);
diff --git a/packages/workers/index.ts b/packages/workers/index.ts
index d16c42eb..a58b2edf 100644
--- a/packages/workers/index.ts
+++ b/packages/workers/index.ts
@@ -1,65 +1,16 @@
-import { Worker } from "bullmq";
-
import dotenv from "dotenv";
-
-import {
- LinkCrawlerQueue,
- OpenAIQueue,
- ZCrawlLinkRequest,
- ZOpenAIRequest,
- queueConnectionDetails,
-} from "@remember/shared/queues";
-import logger from "@remember/shared/logger";
-import runCrawler from "./crawler";
-import runOpenAI from "./openai";
-
-function crawlerWorker() {
- logger.info("Starting crawler worker ...");
- const worker = new Worker<ZCrawlLinkRequest, void>(
- LinkCrawlerQueue.name,
- runCrawler,
- {
- connection: queueConnectionDetails,
- autorun: false,
- },
- );
-
- worker.on("completed", (job) => {
- const jobId = job?.id || "unknown";
- logger.info(`[Crawler][${jobId}] Completed successfully`);
- });
-
- worker.on("failed", (job, error) => {
- const jobId = job?.id || "unknown";
- logger.error(`[Crawler][${jobId}] Crawling job failed: ${error}`);
- });
-
- return worker;
-}
-
-function openaiWorker() {
- logger.info("Starting openai worker ...");
- const worker = new Worker<ZOpenAIRequest, void>(OpenAIQueue.name, runOpenAI, {
- connection: queueConnectionDetails,
- autorun: false,
- });
-
- worker.on("completed", (job) => {
- const jobId = job?.id || "unknown";
- logger.info(`[openai][${jobId}] Completed successfully`);
- });
-
- worker.on("failed", (job, error) => {
- const jobId = job?.id || "unknown";
- logger.error(`[openai][${jobId}] openai job failed: ${error}`);
- });
-
- return worker;
-}
+import { CrawlerWorker } from "./crawler";
+import { OpenAiWorker } from "./openai";
async function main() {
dotenv.config();
- await Promise.all([crawlerWorker().run(), openaiWorker().run()]);
+
+ const [crawler, openai] = [
+ await CrawlerWorker.build(),
+ await OpenAiWorker.build(),
+ ];
+
+ await Promise.all([crawler.run(), openai.run()]);
}
main();
diff --git a/packages/workers/openai.ts b/packages/workers/openai.ts
index a2f90c8a..999f2827 100644
--- a/packages/workers/openai.ts
+++ b/packages/workers/openai.ts
@@ -1,13 +1,41 @@
-import prisma, { BookmarkedLink } from "@remember/db";
+import { prisma, BookmarkedLink } from "@remember/db";
import logger from "@remember/shared/logger";
-import { ZOpenAIRequest, zOpenAIRequestSchema } from "@remember/shared/queues";
+import { OpenAIQueue, ZOpenAIRequest, queueConnectionDetails, zOpenAIRequestSchema } from "@remember/shared/queues";
import { Job } from "bullmq";
import OpenAI from "openai";
import { z } from "zod";
+import { Worker } from "bullmq";
const openAIResponseSchema = z.object({
tags: z.array(z.string()),
});
+
+
+export class OpenAiWorker {
+ static async build() {
+ logger.info("Starting openai worker ...");
+ const worker = new Worker<ZOpenAIRequest, void>(
+ OpenAIQueue.name,
+ runOpenAI,
+ {
+ connection: queueConnectionDetails,
+ autorun: false,
+ },
+ );
+
+ worker.on("completed", (job) => {
+ const jobId = job?.id || "unknown";
+ logger.info(`[openai][${jobId}] Completed successfully`);
+ });
+
+ worker.on("failed", (job, error) => {
+ const jobId = job?.id || "unknown";
+ logger.error(`[openai][${jobId}] openai job failed: ${error}`);
+ });
+
+ return worker;
+ }
+}
function buildPrompt(url: string, description: string) {
return `
@@ -121,7 +149,7 @@ async function connectTags(bookmarkId: string, tagIds: string[]) {
);
}
-export default async function runOpenAI(job: Job<ZOpenAIRequest, void>) {
+async function runOpenAI(job: Job<ZOpenAIRequest, void>) {
const jobId = job.id || "unknown";
if (!process.env.OPENAI_API_KEY || !process.env.OPENAI_ENABLED) {
diff --git a/packages/workers/package.json b/packages/workers/package.json
index 65648f4e..4c012143 100644
--- a/packages/workers/package.json
+++ b/packages/workers/package.json
@@ -4,6 +4,7 @@
"version": "0.1.0",
"private": true,
"dependencies": {
+ "@remember/db": "0.1.0",
"@remember/shared": "0.1.0",
"dotenv": "^16.4.1",
"metascraper": "^5.43.4",
@@ -21,10 +22,12 @@
"puppeteer-extra-plugin-stealth": "^2.11.2"
},
"devDependencies": {
+ "@tsconfig/node21": "^21.0.1",
"@types/metascraper": "^5.14.3",
"ts-node": "^10.9.2"
},
"scripts": {
- "start": "ts-node index.ts"
+ "start": "ts-node index.ts",
+ "typecheck": "tsc --noEmit"
}
}
diff --git a/packages/workers/tsconfig.json b/packages/workers/tsconfig.json
new file mode 100644
index 00000000..5ab467a9
--- /dev/null
+++ b/packages/workers/tsconfig.json
@@ -0,0 +1,6 @@
+{
+ "$schema": "https://json.schemastore.org/tsconfig",
+ "extends": "@tsconfig/node21/tsconfig.json",
+ "include": ["**/*.ts"],
+ "exclude": ["node_modules"]
+}
diff --git a/yarn.lock b/yarn.lock
index 03654916..32152d0b 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -1199,7 +1199,9 @@ __metadata:
version: 0.0.0-use.local
resolution: "@remember/workers@workspace:packages/workers"
dependencies:
+ "@remember/db": "npm:0.1.0"
"@remember/shared": "npm:0.1.0"
+ "@tsconfig/node21": "npm:^21.0.1"
"@types/metascraper": "npm:^5.14.3"
dotenv: "npm:^16.4.1"
metascraper: "npm:^5.43.4"
@@ -1322,6 +1324,13 @@ __metadata:
languageName: node
linkType: hard
+"@tsconfig/node21@npm:^21.0.1":
+ version: 21.0.1
+ resolution: "@tsconfig/node21@npm:21.0.1"
+ checksum: 10c0/435335bea2e7c5ecf48e4a3c3f297632f4f21a22c4630b16b02a6869a03e60e952451bee9b82b5e6a3de44c5199fdb4cb1a6e437693074edd862deaa79d5e27d
+ languageName: node
+ linkType: hard
+
"@types/cacheable-request@npm:^6.0.1":
version: 6.0.3
resolution: "@types/cacheable-request@npm:6.0.3"