diff options
| -rw-r--r-- | .github/workflows/main.yml | 7 | ||||
| -rw-r--r-- | Makefile | 20 | ||||
| -rwxr-xr-x | bun.lockb | bin | 179480 -> 241424 bytes | |||
| -rw-r--r-- | crawler/crawler.ts | 70 | ||||
| -rw-r--r-- | crawler/index.ts | 32 | ||||
| -rw-r--r-- | crawler/main.ts | 17 | ||||
| -rw-r--r-- | crawler/package.json | 13 | ||||
| -rw-r--r-- | db/index.ts (renamed from web/lib/prisma.ts) | 0 | ||||
| -rw-r--r-- | db/package.json | 8 | ||||
| -rw-r--r-- | db/prisma/migrations/20240205153748_add_users/migration.sql (renamed from web/prisma/migrations/20240205153748_add_users/migration.sql) | 0 | ||||
| -rw-r--r-- | db/prisma/migrations/20240206000813_add_links/migration.sql (renamed from web/prisma/migrations/20240206000813_add_links/migration.sql) | 0 | ||||
| -rw-r--r-- | db/prisma/migrations/20240206192241_add_favicon/migration.sql | 16 | ||||
| -rw-r--r-- | db/prisma/migrations/migration_lock.toml (renamed from web/prisma/migrations/migration_lock.toml) | 0 | ||||
| -rw-r--r-- | db/prisma/schema.prisma (renamed from web/prisma/schema.prisma) | 7 | ||||
| -rw-r--r-- | package.json | 6 | ||||
| -rw-r--r-- | shared/index.ts | 2 | ||||
| -rw-r--r-- | shared/logger.ts | 16 | ||||
| -rw-r--r-- | shared/package.json | 2 | ||||
| -rw-r--r-- | shared/queues.ts | 16 | ||||
| -rw-r--r-- | web/app/api/v1/links/route.ts | 18 | ||||
| -rw-r--r-- | web/app/page.tsx | 12 | ||||
| -rw-r--r-- | web/lib/auth.ts | 2 | ||||
| -rw-r--r-- | web/lib/types/api/links.ts | 19 | ||||
| -rw-r--r-- | web/package.json | 2 |
24 files changed, 230 insertions, 55 deletions
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 13d49c33..4c441557 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,8 +16,9 @@ jobs: run: bunx eslint . - name: Format run: bunx prettier . --check + - name: Prisma + working-directory: db + run: bunx prisma generate - name: Build web app working-directory: web - run: | - bunx prisma generate - bun run build + run: bun run build diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..c37d7541 --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +MAKEFLAGS += --always-make + +format: + bunx prettier . --write && bunx eslint . + +prisma: + cd db; \ + bunx prisma migrate dev; \ + bunx prisma generate + +worker: + cd crawler; \ + bun --watch index.ts +web: + cd web; \ + bun run dev + +studio: + cd db; \ + bunx prisma studio Binary files differdiff --git a/crawler/crawler.ts b/crawler/crawler.ts index 58127331..c0f433af 100644 --- a/crawler/crawler.ts +++ b/crawler/crawler.ts @@ -1,6 +1,72 @@ import logger from "@remember/shared/logger"; +import { + ZCrawlLinkRequest, + zCrawlLinkRequestSchema, +} from "@remember/shared/queues"; import { Job } from "bullmq"; -export default async function runCrawler(job: Job) { - logger.info(`[Crawler] Got a new job: ${job.name}`); +import prisma from "@remember/db"; + +import metascraper from "metascraper"; + +const metascraperParser = metascraper([ + require("metascraper-description")(), + require("metascraper-image")(), + require("metascraper-logo-favicon")(), + require("metascraper-title")(), + require("metascraper-url")(), +]); + +export default async function runCrawler(job: Job<ZCrawlLinkRequest, void>) { + const jobId = job.id || "unknown"; + + const request = zCrawlLinkRequestSchema.safeParse(job.data); + if (!request.success) { + logger.error( + `[Crawler][${jobId}] Got malformed job request: ${request.error.toString()}`, + ); + return; + } + + const { url, linkId } = request.data; + + logger.info( + `[Crawler][${jobId}] Will crawl "${url}" for link with id "${linkId}"`, + ); + // TODO(IMPORTANT): Run security validations on the input URL (e.g. deny localhost, etc) + + const resp = await fetch(url); + const respBody = await resp.text(); + + const meta = await metascraperParser({ + url, + html: respBody, + }); + + await prisma.bookmarkedLink.update({ + where: { + id: linkId, + }, + data: { + details: { + upsert: { + create: { + title: meta.title, + description: meta.description, + imageUrl: meta.image, + favicon: meta.logo, + }, + update: { + title: meta.title, + description: meta.description, + imageUrl: meta.image, + favicon: meta.logo, + }, + }, + }, + }, + include: { + details: true, + }, + }); } diff --git a/crawler/index.ts b/crawler/index.ts new file mode 100644 index 00000000..76c6f03f --- /dev/null +++ b/crawler/index.ts @@ -0,0 +1,32 @@ +import { Worker } from "bullmq"; + +import { + LinkCrawlerQueue, + ZCrawlLinkRequest, + queueConnectionDetails, +} from "@remember/shared/queues"; +import logger from "@remember/shared/logger"; +import runCrawler from "./crawler"; + +logger.info("Starting crawler worker ..."); + +const crawlerWorker = new Worker<ZCrawlLinkRequest, void>( + LinkCrawlerQueue.name, + runCrawler, + { + connection: queueConnectionDetails, + autorun: false, + }, +); + +crawlerWorker.on("completed", (job) => { + const jobId = job?.id || "unknown"; + logger.info(`[Crawler][${jobId}] Completed successfully`); +}); + +crawlerWorker.on("failed", (job, error) => { + const jobId = job?.id || "unknown"; + logger.error(`[Crawler][${jobId}] Crawling job failed: ${error}`); +}); + +await Promise.all([crawlerWorker.run()]); diff --git a/crawler/main.ts b/crawler/main.ts deleted file mode 100644 index 7d1c0f11..00000000 --- a/crawler/main.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { Worker } from "bullmq"; - -import { - LinkCrawlerQueue, - queueConnectionDetails, -} from "@remember/shared/queues"; -import logger from "@remember/shared/logger"; -import runCrawler from "./crawler"; - -logger.info("Starting crawler worker ..."); - -const crawlerWorker = new Worker(LinkCrawlerQueue.name, runCrawler, { - connection: queueConnectionDetails, - autorun: false, -}); - -await Promise.all([crawlerWorker]); diff --git a/crawler/package.json b/crawler/package.json index 67e38cff..9b590eb8 100644 --- a/crawler/package.json +++ b/crawler/package.json @@ -1,8 +1,19 @@ { + "$schema": "https://json.schemastore.org/package.json", "name": "@remember/crawler", "version": "0.1.0", "private": true, "dependencies": { - "@remember/shared": "workspace:*" + "@remember/shared": "workspace:*", + "metascraper": "^5.43.4", + "metascraper-description": "^5.43.4", + "metascraper-image": "^5.43.4", + "metascraper-logo": "^5.43.4", + "metascraper-title": "^5.43.4", + "metascraper-url": "^5.43.4", + "metascraper-logo-favicon": "^5.43.4" + }, + "devDependencies": { + "@types/metascraper": "^5.14.3" } } diff --git a/web/lib/prisma.ts b/db/index.ts index b5bf6ce8..b5bf6ce8 100644 --- a/web/lib/prisma.ts +++ b/db/index.ts diff --git a/db/package.json b/db/package.json new file mode 100644 index 00000000..a10a450b --- /dev/null +++ b/db/package.json @@ -0,0 +1,8 @@ +{ + "$schema": "https://json.schemastore.org/package.json", + "name": "@remember/db", + "version": "0.1.0", + "private": true, + "main": "index.ts", + "dependencies": {} +} diff --git a/web/prisma/migrations/20240205153748_add_users/migration.sql b/db/prisma/migrations/20240205153748_add_users/migration.sql index cbf47073..cbf47073 100644 --- a/web/prisma/migrations/20240205153748_add_users/migration.sql +++ b/db/prisma/migrations/20240205153748_add_users/migration.sql diff --git a/web/prisma/migrations/20240206000813_add_links/migration.sql b/db/prisma/migrations/20240206000813_add_links/migration.sql index 38c8d938..38c8d938 100644 --- a/web/prisma/migrations/20240206000813_add_links/migration.sql +++ b/db/prisma/migrations/20240206000813_add_links/migration.sql diff --git a/db/prisma/migrations/20240206192241_add_favicon/migration.sql b/db/prisma/migrations/20240206192241_add_favicon/migration.sql new file mode 100644 index 00000000..330575e9 --- /dev/null +++ b/db/prisma/migrations/20240206192241_add_favicon/migration.sql @@ -0,0 +1,16 @@ +-- RedefineTables +PRAGMA foreign_keys=OFF; +CREATE TABLE "new_BookmarkedLinkDetails" ( + "id" TEXT NOT NULL PRIMARY KEY, + "title" TEXT, + "description" TEXT, + "imageUrl" TEXT, + "favicon" TEXT, + "createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + CONSTRAINT "BookmarkedLinkDetails_id_fkey" FOREIGN KEY ("id") REFERENCES "BookmarkedLink" ("id") ON DELETE CASCADE ON UPDATE CASCADE +); +INSERT INTO "new_BookmarkedLinkDetails" ("createdAt", "description", "id", "imageUrl", "title") SELECT "createdAt", "description", "id", "imageUrl", "title" FROM "BookmarkedLinkDetails"; +DROP TABLE "BookmarkedLinkDetails"; +ALTER TABLE "new_BookmarkedLinkDetails" RENAME TO "BookmarkedLinkDetails"; +PRAGMA foreign_key_check; +PRAGMA foreign_keys=ON; diff --git a/web/prisma/migrations/migration_lock.toml b/db/prisma/migrations/migration_lock.toml index e5e5c470..e5e5c470 100644 --- a/web/prisma/migrations/migration_lock.toml +++ b/db/prisma/migrations/migration_lock.toml diff --git a/web/prisma/schema.prisma b/db/prisma/schema.prisma index 54be3eae..f5b83b66 100644 --- a/web/prisma/schema.prisma +++ b/db/prisma/schema.prisma @@ -72,9 +72,10 @@ model BookmarkedLink { model BookmarkedLinkDetails { id String @id - title String - description String - imageUrl String + title String? + description String? + imageUrl String? + favicon String? createdAt DateTime @default(now()) link BookmarkedLink @relation(fields: [id], references: [id], onDelete: Cascade) diff --git a/package.json b/package.json index e4b183f0..8e6b98ac 100644 --- a/package.json +++ b/package.json @@ -1,11 +1,13 @@ { + "$schema": "https://json.schemastore.org/package.json", "name": "remember", "version": "0.1.0", "private": true, "workspaces": [ "web", "crawler", - "shared" + "shared", + "db" ], "dependencies": { "@next/eslint-plugin-next": "^14.1.0", @@ -16,7 +18,7 @@ "eslint-config-prettier": "^9.1.0", "eslint-plugin-react": "^7.33.2", "eslint-plugin-react-hooks": "^4.6.0", - "winston": "^3.11.0" + "prisma": "^5.9.1" }, "devDependencies": { "typescript": "^5", diff --git a/shared/index.ts b/shared/index.ts index 633b9287..8b93520f 100644 --- a/shared/index.ts +++ b/shared/index.ts @@ -1 +1 @@ -export * as Queues from './queues.ts'; +export * as Queues from "./queues.ts"; diff --git a/shared/logger.ts b/shared/logger.ts index 442304d7..8cd2f808 100644 --- a/shared/logger.ts +++ b/shared/logger.ts @@ -1,15 +1,15 @@ import winston from "winston"; const logger = winston.createLogger({ - level: process.env.LOG_LEVEL || "debug", - format: winston.format.combine( - winston.format.timestamp(), - winston.format.colorize(), - winston.format.printf( - (info) => `${info.timestamp} ${info.level}: ${info.message}`, - ), + level: process.env.LOG_LEVEL || "debug", + format: winston.format.combine( + winston.format.timestamp(), + winston.format.colorize(), + winston.format.printf( + (info) => `${info.timestamp} ${info.level}: ${info.message}`, ), - transports: [new winston.transports.Console()], + ), + transports: [new winston.transports.Console()], }); export default logger; diff --git a/shared/package.json b/shared/package.json index 9f5ee37b..b75b3ac3 100644 --- a/shared/package.json +++ b/shared/package.json @@ -1,8 +1,10 @@ { + "$schema": "https://json.schemastore.org/package.json", "name": "@remember/shared", "version": "0.1.0", "private": true, "dependencies": { + "winston": "^3.11.0" }, "main": "index.ts" } diff --git a/shared/queues.ts b/shared/queues.ts index 4303eaa2..ac5acc57 100644 --- a/shared/queues.ts +++ b/shared/queues.ts @@ -1,10 +1,18 @@ import { Queue } from "bullmq"; +import { z } from "zod"; export const queueConnectionDetails = { - host: process.env.REDIS_HOST || "localhost", - port: parseInt(process.env.REDIS_PORT || "6379"), + host: process.env.REDIS_HOST || "localhost", + port: parseInt(process.env.REDIS_PORT || "6379"), }; -export const LinkCrawlerQueue = new Queue("link_crawler_queue", { connection: queueConnectionDetails }); - +export const zCrawlLinkRequestSchema = z.object({ + linkId: z.string(), + url: z.string().url(), +}); +export type ZCrawlLinkRequest = z.infer<typeof zCrawlLinkRequestSchema>; +export const LinkCrawlerQueue = new Queue<ZCrawlLinkRequest, void>( + "link_crawler_queue", + { connection: queueConnectionDetails }, +); diff --git a/web/app/api/v1/links/route.ts b/web/app/api/v1/links/route.ts index 97bfa3de..990b6c02 100644 --- a/web/app/api/v1/links/route.ts +++ b/web/app/api/v1/links/route.ts @@ -1,7 +1,9 @@ import { authOptions } from "@/lib/auth"; -import prisma from "@/lib/prisma"; +import { LinkCrawlerQueue } from "@remember/shared/queues"; +import prisma from "@remember/db"; + import { - ZNewBookmarkedLinkRequest, + zNewBookmarkedLinkRequestSchema, ZGetLinksResponse, ZBookmarkedLink, } from "@/lib/types/api/links"; @@ -15,7 +17,9 @@ export async function POST(request: NextRequest) { return new Response(null, { status: 401 }); } - const linkRequest = ZNewBookmarkedLinkRequest.safeParse(await request.json()); + const linkRequest = zNewBookmarkedLinkRequestSchema.safeParse( + await request.json(), + ); if (!linkRequest.success) { return NextResponse.json( @@ -33,8 +37,13 @@ export async function POST(request: NextRequest) { }, }); - let response: ZBookmarkedLink = { ...link }; + // Enqueue crawling request + await LinkCrawlerQueue.add("crawl", { + linkId: link.id, + url: link.url, + }); + let response: ZBookmarkedLink = { ...link }; return NextResponse.json(response, { status: 201 }); } @@ -57,6 +66,7 @@ export async function GET() { title: true, description: true, imageUrl: true, + favicon: true, }, }, }, diff --git a/web/app/page.tsx b/web/app/page.tsx index 2df40508..b78fe389 100644 --- a/web/app/page.tsx +++ b/web/app/page.tsx @@ -1,7 +1,16 @@ +"use client"; + +import { useCallback } from "react"; import { LoginButton } from "../components/auth/login"; import { LogoutButton } from "../components/auth/logout"; export default function Home() { + const addUrl = useCallback(async () => { + await fetch("/api/v1/links", { + method: "POST", + body: JSON.stringify({ url: "https://news.ycombinator.com/news" }), + }); + }, []); return ( <main className="flex min-h-screen flex-col items-center justify-between p-24"> <div> @@ -9,6 +18,9 @@ export default function Home() { <br /> <br /> <LogoutButton /> + <br /> + <br /> + <button onClick={addUrl}>Add URL</button> </div> </main> ); diff --git a/web/lib/auth.ts b/web/lib/auth.ts index 8b6527ec..cd6404de 100644 --- a/web/lib/auth.ts +++ b/web/lib/auth.ts @@ -2,7 +2,7 @@ import NextAuth, { NextAuthOptions } from "next-auth"; import { PrismaAdapter } from "@next-auth/prisma-adapter"; import AuthentikProvider from "next-auth/providers/authentik"; import serverConfig from "@/lib/config"; -import prisma from "@/lib/prisma"; +import prisma from "@remember/db"; let providers = []; diff --git a/web/lib/types/api/links.ts b/web/lib/types/api/links.ts index 465fe133..48214f9a 100644 --- a/web/lib/types/api/links.ts +++ b/web/lib/types/api/links.ts @@ -1,6 +1,6 @@ import { z } from "zod"; -export const ZBookmarkedLink = z.object({ +export const zBookmarkedLinkSchema = z.object({ id: z.string(), url: z.string().url(), createdAt: z.coerce.date(), @@ -8,18 +8,21 @@ export const ZBookmarkedLink = z.object({ details: z .object({ title: z.string(), - description: z.string(), - imageUrl: z.string().url(), + description: z.string().optional(), + imageUrl: z.string().url().optional(), + favicon: z.string().url().optional(), }) .nullish(), }); -export type ZBookmarkedLink = z.infer<typeof ZBookmarkedLink>; +export type ZBookmarkedLink = z.infer<typeof zBookmarkedLinkSchema>; // POST /v1/links -export const ZNewBookmarkedLinkRequest = ZBookmarkedLink.pick({ url: true }); +export const zNewBookmarkedLinkRequestSchema = zBookmarkedLinkSchema.pick({ + url: true, +}); // GET /v1/links -export const ZGetLinksResponse = z.object({ - links: z.array(ZBookmarkedLink), +export const zGetLinksResponseSchema = z.object({ + links: z.array(zBookmarkedLinkSchema), }); -export type ZGetLinksResponse = z.infer<typeof ZGetLinksResponse>; +export type ZGetLinksResponse = z.infer<typeof zGetLinksResponseSchema>; diff --git a/web/package.json b/web/package.json index 6dcbff47..6a043a77 100644 --- a/web/package.json +++ b/web/package.json @@ -1,4 +1,5 @@ { + "$schema": "https://json.schemastore.org/package.json", "name": "@remember/web", "version": "0.1.0", "private": true, @@ -18,7 +19,6 @@ "next": "14.1.0", "next-auth": "^4.24.5", "prettier": "^3.2.5", - "prisma": "^5.9.1", "react": "^18", "react-dom": "^18", "tailwind-merge": "^2.2.1", |
