diff options
| author | MohamedBassem <me@mbassem.com> | 2024-05-26 10:18:42 +0000 |
|---|---|---|
| committer | MohamedBassem <me@mbassem.com> | 2024-05-26 10:18:42 +0000 |
| commit | 9d89f987ba0748bbf978d17b815040d316b19620 (patch) | |
| tree | 076280bf43d8a4d96af54067f8e11533582f62e5 /apps | |
| parent | 9198c1b7e15c79a9b0452e8c2a6b702df6a37b60 (diff) | |
| download | karakeep-9d89f987ba0748bbf978d17b815040d316b19620.tar.zst | |
fix(crawler): Only update the database if full page archival is enabled
Diffstat (limited to 'apps')
| -rw-r--r-- | apps/workers/crawlerWorker.ts | 38 |
1 files changed, 19 insertions, 19 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index 87632019..f2e0e4a8 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -388,9 +388,6 @@ async function archiveWebpage( userId: string, jobId: string, ) { - if (!serverConfig.crawler.fullPageArchive) { - return; - } logger.info(`[Crawler][${jobId}] Will attempt to archive page ...`); const urlParsed = new URL(url); const baseUrl = `${urlParsed.protocol}//${urlParsed.host}`; @@ -499,22 +496,25 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) { }); // Do the archival as a separate last step as it has the potential for failure - const fullPageArchiveAssetId = await archiveWebpage( - htmlContent, - browserUrl, - userId, - jobId, - ); - await db - .update(bookmarkLinks) - .set({ - fullPageArchiveAssetId, - }) - .where(eq(bookmarkLinks.id, bookmarkId)); - - if (oldFullPageArchiveAssetId) { - deleteAsset({ userId, assetId: oldFullPageArchiveAssetId }).catch( - () => ({}), + if (serverConfig.crawler.fullPageArchive) { + const fullPageArchiveAssetId = await archiveWebpage( + htmlContent, + browserUrl, + userId, + jobId, ); + + await db + .update(bookmarkLinks) + .set({ + fullPageArchiveAssetId, + }) + .where(eq(bookmarkLinks.id, bookmarkId)); + + if (oldFullPageArchiveAssetId) { + deleteAsset({ userId, assetId: oldFullPageArchiveAssetId }).catch( + () => ({}), + ); + } } } |
