feature(web): Add ability to manually trigger full page archives. Fixes #398 (#418)

* [Feature Request] Ability to select what to "crawl full page archive" #398 Added the ability to start a full page crawl for links and also in bulk operations added the ability to refresh links as a bulk operation as well * minor icon and wording changes --------- Co-authored-by: MohamedBassem <me@mbassem.com>
author: kamtschatka <simon.schatka@gmx.at> 2024-09-30 02:02:48 +0200
committer: GitHub <noreply@github.com> 2024-09-30 01:02:48 +0100
commit: 8b69cddfb92b3b7548d3f90dbec1038c728ea5d9 (patch)
tree: 7d6dcbdc6c40aac0c98dce48a573c560f2891a90 /apps/workers/crawlerWorker.ts
parent: 5281531d6f4aab4605c407d5167dd8e44f237f0d (diff)
download: karakeep-8b69cddfb92b3b7548d3f90dbec1038c728ea5d9.tar.zst
1 files changed, 5 insertions, 3 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts
index 49e9495d..246e9050 100644
--- a/apps/workers/crawlerWorker.ts
+++ b/apps/workers/crawlerWorker.ts
@@ -214,7 +214,7 @@ async function getBookmarkDetails(bookmarkId: string) {
   });
 
   if (!bookmark || !bookmark.link) {
-    throw new Error("The bookmark either doesn't exist or not a link");
+    throw new Error("The bookmark either doesn't exist or is not a link");
   }
   return {
     url: bookmark.link.url,
@@ -519,6 +519,7 @@ async function crawlAndParseUrl(
   oldScreenshotAssetId: string | undefined,
   oldImageAssetId: string | undefined,
   oldFullPageArchiveAssetId: string | undefined,
+  archiveFullPage: boolean,
 ) {
   const {
     htmlContent,
@@ -578,7 +579,7 @@ async function crawlAndParseUrl(
   ]);
 
   return async () => {
-    if (serverConfig.crawler.fullPageArchive) {
+    if (serverConfig.crawler.fullPageArchive || archiveFullPage) {
       const fullPageArchiveAssetId = await archiveWebpage(
         htmlContent,
         browserUrl,
@@ -615,7 +616,7 @@ async function runCrawler(job: DequeuedJob<ZCrawlLinkRequest>) {
     return;
   }
 
-  const { bookmarkId } = request.data;
+  const { bookmarkId, archiveFullPage } = request.data;
   const {
     url,
     userId,
@@ -654,6 +655,7 @@ async function runCrawler(job: DequeuedJob<ZCrawlLinkRequest>) {
       oldScreenshotAssetId,
       oldImageAssetId,
       oldFullPageArchiveAssetId,
+      archiveFullPage,
     );
   }
author	kamtschatka <simon.schatka@gmx.at>	2024-09-30 02:02:48 +0200
committer	GitHub <noreply@github.com>	2024-09-30 01:02:48 +0100
commit	8b69cddfb92b3b7548d3f90dbec1038c728ea5d9 (patch)
tree	7d6dcbdc6c40aac0c98dce48a573c560f2891a90 /apps/workers/crawlerWorker.ts
parent	5281531d6f4aab4605c407d5167dd8e44f237f0d (diff)
download	karakeep-8b69cddfb92b3b7548d3f90dbec1038c728ea5d9.tar.zst