aboutsummaryrefslogtreecommitdiffstats
path: root/apps/workers/crawlerWorker.ts
diff options
context:
space:
mode:
authorkamtschatka <simon.schatka@gmx.at>2024-09-30 02:02:48 +0200
committerGitHub <noreply@github.com>2024-09-30 01:02:48 +0100
commit8b69cddfb92b3b7548d3f90dbec1038c728ea5d9 (patch)
tree7d6dcbdc6c40aac0c98dce48a573c560f2891a90 /apps/workers/crawlerWorker.ts
parent5281531d6f4aab4605c407d5167dd8e44f237f0d (diff)
downloadkarakeep-8b69cddfb92b3b7548d3f90dbec1038c728ea5d9.tar.zst
feature(web): Add ability to manually trigger full page archives. Fixes #398 (#418)
* [Feature Request] Ability to select what to "crawl full page archive" #398 Added the ability to start a full page crawl for links and also in bulk operations added the ability to refresh links as a bulk operation as well * minor icon and wording changes --------- Co-authored-by: MohamedBassem <me@mbassem.com>
Diffstat (limited to 'apps/workers/crawlerWorker.ts')
-rw-r--r--apps/workers/crawlerWorker.ts8
1 files changed, 5 insertions, 3 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts
index 49e9495d..246e9050 100644
--- a/apps/workers/crawlerWorker.ts
+++ b/apps/workers/crawlerWorker.ts
@@ -214,7 +214,7 @@ async function getBookmarkDetails(bookmarkId: string) {
});
if (!bookmark || !bookmark.link) {
- throw new Error("The bookmark either doesn't exist or not a link");
+ throw new Error("The bookmark either doesn't exist or is not a link");
}
return {
url: bookmark.link.url,
@@ -519,6 +519,7 @@ async function crawlAndParseUrl(
oldScreenshotAssetId: string | undefined,
oldImageAssetId: string | undefined,
oldFullPageArchiveAssetId: string | undefined,
+ archiveFullPage: boolean,
) {
const {
htmlContent,
@@ -578,7 +579,7 @@ async function crawlAndParseUrl(
]);
return async () => {
- if (serverConfig.crawler.fullPageArchive) {
+ if (serverConfig.crawler.fullPageArchive || archiveFullPage) {
const fullPageArchiveAssetId = await archiveWebpage(
htmlContent,
browserUrl,
@@ -615,7 +616,7 @@ async function runCrawler(job: DequeuedJob<ZCrawlLinkRequest>) {
return;
}
- const { bookmarkId } = request.data;
+ const { bookmarkId, archiveFullPage } = request.data;
const {
url,
userId,
@@ -654,6 +655,7 @@ async function runCrawler(job: DequeuedJob<ZCrawlLinkRequest>) {
oldScreenshotAssetId,
oldImageAssetId,
oldFullPageArchiveAssetId,
+ archiveFullPage,
);
}