diff options
| author | Mohamed Bassem <me@mbassem.com> | 2025-09-28 11:30:35 +0000 |
|---|---|---|
| committer | Mohamed Bassem <me@mbassem.com> | 2025-09-28 11:32:50 +0000 |
| commit | 8dd84ef58b8da920f3e7718cfb5129a44437e53d (patch) | |
| tree | b1f5e9f2ba4310be00a58711f09a7340662def82 /apps | |
| parent | cdbedf6c2f5480a32f077c70fb614fdca6282db4 (diff) | |
| download | karakeep-8dd84ef58b8da920f3e7718cfb5129a44437e53d.tar.zst | |
fix: Cleanup temp assets on monolith timeout
Diffstat (limited to 'apps')
| -rw-r--r-- | apps/workers/workers/crawlerWorker.ts | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts index baea1346..d8e75aba 100644 --- a/apps/workers/workers/crawlerWorker.ts +++ b/apps/workers/workers/crawlerWorker.ts @@ -671,11 +671,27 @@ async function archiveWebpage( const assetId = newAssetId(); const assetPath = `/tmp/${assetId}`; - await execa({ + let res = await execa({ input: html, cancelSignal: abortSignal, })("monolith", ["-", "-Ije", "-t", "5", "-b", url, "-o", assetPath]); + if (res.isCanceled) { + logger.error( + `[Crawler][${jobId}] Canceled archiving the page as we hit global timeout.`, + ); + await tryCatch(fs.unlink(assetPath)); + return null; + } + + if (res.exitCode !== 0) { + logger.error( + `[Crawler][${jobId}] Failed to archive the page as the command exited with code ${res.exitCode}`, + ); + await tryCatch(fs.unlink(assetPath)); + return null; + } + const contentType = "text/html"; // Get file size and check quota before saving |
