diff options
| author | Mohamed Bassem <me@mbassem.com> | 2025-09-28 15:21:13 +0000 |
|---|---|---|
| committer | Mohamed Bassem <me@mbassem.com> | 2025-09-28 15:21:13 +0000 |
| commit | 37845f994ab74d92d2099760e07806bfd03156b1 (patch) | |
| tree | 142713b305bb24eac5fd5b39719b54164963fc44 | |
| parent | 9eecda184018a0f50cd42b9b791f5c4efc6024fd (diff) | |
| download | karakeep-37845f994ab74d92d2099760e07806bfd03156b1.tar.zst | |
feat: Stop downloading video/audio in playwright
| -rw-r--r-- | apps/workers/workers/crawlerWorker.ts | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts index 65130a01..e6ae201e 100644 --- a/apps/workers/workers/crawlerWorker.ts +++ b/apps/workers/workers/crawlerWorker.ts @@ -452,6 +452,25 @@ async function crawlPage( await globalBlocker.enableBlockingInPage(page); } + // Block audio/video resources + await page.route("**/*", (route) => { + const request = route.request(); + const resourceType = request.resourceType(); + + // Block audio/video resources + if ( + resourceType === "media" || + request.headers()["content-type"]?.includes("video/") || + request.headers()["content-type"]?.includes("audio/") + ) { + route.abort(); + return; + } + + // Continue with other requests + route.continue(); + }); + // Navigate to the target URL logger.info(`[Crawler][${jobId}] Navigating to "${url}"`); const response = await Promise.race([ |
