aboutsummaryrefslogtreecommitdiffstats
path: root/apps
diff options
context:
space:
mode:
Diffstat (limited to 'apps')
-rw-r--r--apps/workers/workers/crawlerWorker.ts18
1 files changed, 17 insertions, 1 deletions
diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts
index baea1346..d8e75aba 100644
--- a/apps/workers/workers/crawlerWorker.ts
+++ b/apps/workers/workers/crawlerWorker.ts
@@ -671,11 +671,27 @@ async function archiveWebpage(
const assetId = newAssetId();
const assetPath = `/tmp/${assetId}`;
- await execa({
+ let res = await execa({
input: html,
cancelSignal: abortSignal,
})("monolith", ["-", "-Ije", "-t", "5", "-b", url, "-o", assetPath]);
+ if (res.isCanceled) {
+ logger.error(
+ `[Crawler][${jobId}] Canceled archiving the page as we hit global timeout.`,
+ );
+ await tryCatch(fs.unlink(assetPath));
+ return null;
+ }
+
+ if (res.exitCode !== 0) {
+ logger.error(
+ `[Crawler][${jobId}] Failed to archive the page as the command exited with code ${res.exitCode}`,
+ );
+ await tryCatch(fs.unlink(assetPath));
+ return null;
+ }
+
const contentType = "text/html";
// Get file size and check quota before saving