From 8dd84ef58b8da920f3e7718cfb5129a44437e53d Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sun, 28 Sep 2025 11:30:35 +0000 Subject: fix: Cleanup temp assets on monolith timeout --- apps/workers/workers/crawlerWorker.ts | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'apps/workers') diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts index baea1346..d8e75aba 100644 --- a/apps/workers/workers/crawlerWorker.ts +++ b/apps/workers/workers/crawlerWorker.ts @@ -671,11 +671,27 @@ async function archiveWebpage( const assetId = newAssetId(); const assetPath = `/tmp/${assetId}`; - await execa({ + let res = await execa({ input: html, cancelSignal: abortSignal, })("monolith", ["-", "-Ije", "-t", "5", "-b", url, "-o", assetPath]); + if (res.isCanceled) { + logger.error( + `[Crawler][${jobId}] Canceled archiving the page as we hit global timeout.`, + ); + await tryCatch(fs.unlink(assetPath)); + return null; + } + + if (res.exitCode !== 0) { + logger.error( + `[Crawler][${jobId}] Failed to archive the page as the command exited with code ${res.exitCode}`, + ); + await tryCatch(fs.unlink(assetPath)); + return null; + } + const contentType = "text/html"; // Get file size and check quota before saving -- cgit v1.2.3-70-g09d2