From c70d64d4cde2bf2acc5c4164eef79c40fd58aa42 Mon Sep 17 00:00:00 2001 From: Mael Date: Sun, 22 Jun 2025 19:08:21 +0200 Subject: feat(workers): migrate from puppeteer to playwright (#1296) * feat: convert to playwright Convert crawling to use Playwright instead of Chrome. - Update Dockerfile to include Playwright - Update crawler worker to use Playwright API - Update dependencies * feat: convert from Puppeteer to Playwright for crawling * feat: update docker-compose * use separate browser context for better isolation * skip chrome download in linux script * readd the stealth plugin --------- Co-authored-by: Mohamed Bassem --- apps/workers/package.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'apps/workers/package.json') diff --git a/apps/workers/package.json b/apps/workers/package.json index c888350e..2ed6f9df 100644 --- a/apps/workers/package.json +++ b/apps/workers/package.json @@ -4,7 +4,7 @@ "version": "0.1.0", "private": true, "dependencies": { - "@ghostery/adblocker-puppeteer": "^2.5.1", + "@ghostery/adblocker-playwright": "^2.5.1", "@karakeep/db": "workspace:^0.1.0", "@karakeep/shared": "workspace:^0.1.0", "@karakeep/trpc": "workspace:^0.1.0", @@ -36,8 +36,8 @@ "pdf2json": "^3.1.5", "pdf2pic": "^3.1.3", "pdfjs-dist": "^4.2.67", - "puppeteer": "^22.0.0", - "puppeteer-extra": "^3.3.6", + "playwright": "^1.42.1", + "playwright-extra": "^4.3.6", "puppeteer-extra-plugin-stealth": "^2.11.2", "rss-parser": "^3.13.0", "tesseract.js": "^5.1.1", -- cgit v1.2.3-70-g09d2