diff options
| author | kamtschatka <simon.schatka@gmx.at> | 2024-09-30 02:02:48 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-09-30 01:02:48 +0100 |
| commit | 8b69cddfb92b3b7548d3f90dbec1038c728ea5d9 (patch) | |
| tree | 7d6dcbdc6c40aac0c98dce48a573c560f2891a90 /apps/workers/crawlerWorker.ts | |
| parent | 5281531d6f4aab4605c407d5167dd8e44f237f0d (diff) | |
| download | karakeep-8b69cddfb92b3b7548d3f90dbec1038c728ea5d9.tar.zst | |
feature(web): Add ability to manually trigger full page archives. Fixes #398 (#418)
* [Feature Request] Ability to select what to "crawl full page archive" #398
Added the ability to start a full page crawl for links and also in bulk operations
added the ability to refresh links as a bulk operation as well
* minor icon and wording changes
---------
Co-authored-by: MohamedBassem <me@mbassem.com>
Diffstat (limited to 'apps/workers/crawlerWorker.ts')
| -rw-r--r-- | apps/workers/crawlerWorker.ts | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index 49e9495d..246e9050 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -214,7 +214,7 @@ async function getBookmarkDetails(bookmarkId: string) { }); if (!bookmark || !bookmark.link) { - throw new Error("The bookmark either doesn't exist or not a link"); + throw new Error("The bookmark either doesn't exist or is not a link"); } return { url: bookmark.link.url, @@ -519,6 +519,7 @@ async function crawlAndParseUrl( oldScreenshotAssetId: string | undefined, oldImageAssetId: string | undefined, oldFullPageArchiveAssetId: string | undefined, + archiveFullPage: boolean, ) { const { htmlContent, @@ -578,7 +579,7 @@ async function crawlAndParseUrl( ]); return async () => { - if (serverConfig.crawler.fullPageArchive) { + if (serverConfig.crawler.fullPageArchive || archiveFullPage) { const fullPageArchiveAssetId = await archiveWebpage( htmlContent, browserUrl, @@ -615,7 +616,7 @@ async function runCrawler(job: DequeuedJob<ZCrawlLinkRequest>) { return; } - const { bookmarkId } = request.data; + const { bookmarkId, archiveFullPage } = request.data; const { url, userId, @@ -654,6 +655,7 @@ async function runCrawler(job: DequeuedJob<ZCrawlLinkRequest>) { oldScreenshotAssetId, oldImageAssetId, oldFullPageArchiveAssetId, + archiveFullPage, ); } |
