From ccbff18a9763e458c07f46cb3a331062df14a9b9 Mon Sep 17 00:00:00 2001 From: kamtschatka Date: Sun, 30 Jun 2024 00:34:43 +0200 Subject: refactor: remove redundant code from crawler worker and refactor handling of asset types (#253) * refactoring asset types Extracted out functions to silently delete assets and to update them after crawling Generalized the mapping of assets to bookmark fields to make extending them easier * revert silentDeleteAsset and hide better-sqlite3 --------- Co-authored-by: MohamedBassem --- apps/workers/crawlerWorker.ts | 81 ++++++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 32 deletions(-) (limited to 'apps/workers') diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index e297c404..c0e1bd1b 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -26,7 +26,7 @@ import StealthPlugin from "puppeteer-extra-plugin-stealth"; import { withTimeout } from "utils"; import type { ZCrawlLinkRequest } from "@hoarder/shared/queues"; -import { db } from "@hoarder/db"; +import { db, HoarderDBTransaction } from "@hoarder/db"; import { assets, AssetTypes, @@ -544,27 +544,20 @@ async function crawlAndParseUrl( }) .where(eq(bookmarkLinks.id, bookmarkId)); - if (screenshotAssetId) { - if (oldScreenshotAssetId) { - await txn.delete(assets).where(eq(assets.id, oldScreenshotAssetId)); - } - await txn.insert(assets).values({ - id: screenshotAssetId, - assetType: AssetTypes.LINK_SCREENSHOT, - bookmarkId, - }); - } - - if (imageAssetId) { - if (oldImageAssetId) { - await txn.delete(assets).where(eq(assets.id, oldImageAssetId)); - } - await txn.insert(assets).values({ - id: imageAssetId, - assetType: AssetTypes.LINK_BANNER_IMAGE, - bookmarkId, - }); - } + await updateAsset( + screenshotAssetId, + oldScreenshotAssetId, + bookmarkId, + AssetTypes.LINK_SCREENSHOT, + txn, + ); + await updateAsset( + imageAssetId, + oldImageAssetId, + bookmarkId, + AssetTypes.LINK_BANNER_IMAGE, + txn, + ); }); // Delete the old assets if any @@ -587,19 +580,16 @@ async function crawlAndParseUrl( ); await db.transaction(async (txn) => { - if (oldFullPageArchiveAssetId) { - await txn - .delete(assets) - .where(eq(assets.id, oldFullPageArchiveAssetId)); - } - await txn.insert(assets).values({ - id: fullPageArchiveAssetId, - assetType: AssetTypes.LINK_FULL_PAGE_ARCHIVE, + await updateAsset( + fullPageArchiveAssetId, + oldFullPageArchiveAssetId, bookmarkId, - }); + AssetTypes.LINK_FULL_PAGE_ARCHIVE, + txn, + ); }); if (oldFullPageArchiveAssetId) { - deleteAsset({ userId, assetId: oldFullPageArchiveAssetId }).catch( + await deleteAsset({ userId, assetId: oldFullPageArchiveAssetId }).catch( () => ({}), ); } @@ -673,3 +663,30 @@ async function runCrawler(job: Job) { // Do the archival as a separate last step as it has the potential for failure await archivalLogic(); } + +/** + * Removes the old asset and adds a new one instead + * @param newAssetId the new assetId to add + * @param oldAssetId the old assetId to remove (if it exists) + * @param bookmarkId the id of the bookmark the asset belongs to + * @param assetType the type of the asset + * @param txn the transaction where this update should happen in + */ +async function updateAsset( + newAssetId: string | null, + oldAssetId: string | undefined, + bookmarkId: string, + assetType: AssetTypes, + txn: HoarderDBTransaction, +) { + if (newAssetId) { + if (oldAssetId) { + await txn.delete(assets).where(eq(assets.id, oldAssetId)); + } + await txn.insert(assets).values({ + id: newAssetId, + assetType, + bookmarkId, + }); + } +} -- cgit v1.2.3-70-g09d2