aboutsummaryrefslogtreecommitdiffstats
path: root/apps
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--apps/workers/constants.ts1
-rw-r--r--apps/workers/workers/adminMaintenance/tasks/migrateLinkHtmlContent.ts8
-rw-r--r--apps/workers/workers/crawlerWorker.ts3
3 files changed, 5 insertions, 7 deletions
diff --git a/apps/workers/constants.ts b/apps/workers/constants.ts
deleted file mode 100644
index 954466bf..00000000
--- a/apps/workers/constants.ts
+++ /dev/null
@@ -1 +0,0 @@
-export const HTML_CONTENT_SIZE_THRESHOLD = 50 * 1024; // 50KB
diff --git a/apps/workers/workers/adminMaintenance/tasks/migrateLinkHtmlContent.ts b/apps/workers/workers/adminMaintenance/tasks/migrateLinkHtmlContent.ts
index c5336bce..467f2aa7 100644
--- a/apps/workers/workers/adminMaintenance/tasks/migrateLinkHtmlContent.ts
+++ b/apps/workers/workers/adminMaintenance/tasks/migrateLinkHtmlContent.ts
@@ -11,10 +11,10 @@ import {
newAssetId,
saveAsset,
} from "@karakeep/shared/assetdb";
+import serverConfig from "@karakeep/shared/config";
import logger from "@karakeep/shared/logger";
import { tryCatch } from "@karakeep/shared/tryCatch";
-import { HTML_CONTENT_SIZE_THRESHOLD } from "../../../constants";
import { updateAsset } from "../../../workerUtils";
const BATCH_SIZE = 25;
@@ -40,12 +40,12 @@ async function getBookmarksWithLargeInlineHtml(limit: number, cursor?: string) {
gt(bookmarkLinks.id, cursor),
isNotNull(bookmarkLinks.htmlContent),
isNull(bookmarkLinks.contentAssetId),
- sql`length(CAST(${bookmarkLinks.htmlContent} AS BLOB)) > ${HTML_CONTENT_SIZE_THRESHOLD}`,
+ sql`length(CAST(${bookmarkLinks.htmlContent} AS BLOB)) > ${serverConfig.crawler.htmlContentSizeThreshold}`,
)
: and(
isNotNull(bookmarkLinks.htmlContent),
isNull(bookmarkLinks.contentAssetId),
- sql`length(CAST(${bookmarkLinks.htmlContent} AS BLOB)) > ${HTML_CONTENT_SIZE_THRESHOLD}`,
+ sql`length(CAST(${bookmarkLinks.htmlContent} AS BLOB)) > ${serverConfig.crawler.htmlContentSizeThreshold}`,
),
)
.orderBy(asc(bookmarkLinks.id))
@@ -62,7 +62,7 @@ async function migrateBookmarkHtml(
const contentSize = Buffer.byteLength(htmlContent, "utf8");
- if (contentSize <= HTML_CONTENT_SIZE_THRESHOLD) {
+ if (contentSize <= serverConfig.crawler.htmlContentSizeThreshold) {
logger.debug(
`[adminMaintenance:migrate_large_link_html][${jobId}] Bookmark ${bookmarkId} inline HTML (${contentSize} bytes) below threshold, skipping`,
);
diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts
index 4e02d73a..def0ae88 100644
--- a/apps/workers/workers/crawlerWorker.ts
+++ b/apps/workers/workers/crawlerWorker.ts
@@ -73,7 +73,6 @@ import {
import { tryCatch } from "@karakeep/shared/tryCatch";
import { BookmarkTypes } from "@karakeep/shared/types/bookmarks";
-import { HTML_CONTENT_SIZE_THRESHOLD } from "../constants";
import metascraperReddit from "../metascraper-plugins/metascraper-reddit";
function abortPromise(signal: AbortSignal): Promise<never> {
@@ -934,7 +933,7 @@ async function storeHtmlContent(
const contentSize = contentBuffer.byteLength;
// Only store in assets if content is >= 50KB
- if (contentSize < HTML_CONTENT_SIZE_THRESHOLD) {
+ if (contentSize < serverConfig.crawler.htmlContentSizeThreshold) {
logger.info(
`[Crawler][${jobId}] HTML content size (${contentSize} bytes) is below threshold, storing inline`,
);