diff options
Diffstat (limited to 'apps')
| -rw-r--r-- | apps/workers/workers/crawlerWorker.ts | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts index baea1346..d8e75aba 100644 --- a/apps/workers/workers/crawlerWorker.ts +++ b/apps/workers/workers/crawlerWorker.ts @@ -671,11 +671,27 @@ async function archiveWebpage( const assetId = newAssetId(); const assetPath = `/tmp/${assetId}`; - await execa({ + let res = await execa({ input: html, cancelSignal: abortSignal, })("monolith", ["-", "-Ije", "-t", "5", "-b", url, "-o", assetPath]); + if (res.isCanceled) { + logger.error( + `[Crawler][${jobId}] Canceled archiving the page as we hit global timeout.`, + ); + await tryCatch(fs.unlink(assetPath)); + return null; + } + + if (res.exitCode !== 0) { + logger.error( + `[Crawler][${jobId}] Failed to archive the page as the command exited with code ${res.exitCode}`, + ); + await tryCatch(fs.unlink(assetPath)); + return null; + } + const contentType = "text/html"; // Get file size and check quota before saving |
