diff options
| author | kamtschatka <simon.schatka@gmx.at> | 2024-06-30 00:34:43 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-06-29 23:34:43 +0100 |
| commit | ccbff18a9763e458c07f46cb3a331062df14a9b9 (patch) | |
| tree | 0cefc8cb49ea70594c6af7a947f3c21def1e7846 /apps/workers | |
| parent | e107f8b6c250759ab0f884b2fdd0283fae15cfe5 (diff) | |
| download | karakeep-ccbff18a9763e458c07f46cb3a331062df14a9b9.tar.zst | |
refactor: remove redundant code from crawler worker and refactor handling of asset types (#253)
* refactoring asset types
Extracted out functions to silently delete assets and to update them after crawling
Generalized the mapping of assets to bookmark fields to make extending them easier
* revert silentDeleteAsset and hide better-sqlite3
---------
Co-authored-by: MohamedBassem <me@mbassem.com>
Diffstat (limited to 'apps/workers')
| -rw-r--r-- | apps/workers/crawlerWorker.ts | 81 |
1 files changed, 49 insertions, 32 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index e297c404..c0e1bd1b 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -26,7 +26,7 @@ import StealthPlugin from "puppeteer-extra-plugin-stealth"; import { withTimeout } from "utils"; import type { ZCrawlLinkRequest } from "@hoarder/shared/queues"; -import { db } from "@hoarder/db"; +import { db, HoarderDBTransaction } from "@hoarder/db"; import { assets, AssetTypes, @@ -544,27 +544,20 @@ async function crawlAndParseUrl( }) .where(eq(bookmarkLinks.id, bookmarkId)); - if (screenshotAssetId) { - if (oldScreenshotAssetId) { - await txn.delete(assets).where(eq(assets.id, oldScreenshotAssetId)); - } - await txn.insert(assets).values({ - id: screenshotAssetId, - assetType: AssetTypes.LINK_SCREENSHOT, - bookmarkId, - }); - } - - if (imageAssetId) { - if (oldImageAssetId) { - await txn.delete(assets).where(eq(assets.id, oldImageAssetId)); - } - await txn.insert(assets).values({ - id: imageAssetId, - assetType: AssetTypes.LINK_BANNER_IMAGE, - bookmarkId, - }); - } + await updateAsset( + screenshotAssetId, + oldScreenshotAssetId, + bookmarkId, + AssetTypes.LINK_SCREENSHOT, + txn, + ); + await updateAsset( + imageAssetId, + oldImageAssetId, + bookmarkId, + AssetTypes.LINK_BANNER_IMAGE, + txn, + ); }); // Delete the old assets if any @@ -587,19 +580,16 @@ async function crawlAndParseUrl( ); await db.transaction(async (txn) => { - if (oldFullPageArchiveAssetId) { - await txn - .delete(assets) - .where(eq(assets.id, oldFullPageArchiveAssetId)); - } - await txn.insert(assets).values({ - id: fullPageArchiveAssetId, - assetType: AssetTypes.LINK_FULL_PAGE_ARCHIVE, + await updateAsset( + fullPageArchiveAssetId, + oldFullPageArchiveAssetId, bookmarkId, - }); + AssetTypes.LINK_FULL_PAGE_ARCHIVE, + txn, + ); }); if (oldFullPageArchiveAssetId) { - deleteAsset({ userId, assetId: oldFullPageArchiveAssetId }).catch( + await deleteAsset({ userId, assetId: oldFullPageArchiveAssetId }).catch( () => ({}), ); } @@ -673,3 +663,30 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) { // Do the archival as a separate last step as it has the potential for failure await archivalLogic(); } + +/** + * Removes the old asset and adds a new one instead + * @param newAssetId the new assetId to add + * @param oldAssetId the old assetId to remove (if it exists) + * @param bookmarkId the id of the bookmark the asset belongs to + * @param assetType the type of the asset + * @param txn the transaction where this update should happen in + */ +async function updateAsset( + newAssetId: string | null, + oldAssetId: string | undefined, + bookmarkId: string, + assetType: AssetTypes, + txn: HoarderDBTransaction, +) { + if (newAssetId) { + if (oldAssetId) { + await txn.delete(assets).where(eq(assets.id, oldAssetId)); + } + await txn.insert(assets).values({ + id: newAssetId, + assetType, + bookmarkId, + }); + } +} |
