diff options
Diffstat (limited to 'apps')
| -rw-r--r-- | apps/workers/constants.ts | 1 | ||||
| -rw-r--r-- | apps/workers/workers/adminMaintenance/tasks/migrateLinkHtmlContent.ts | 8 | ||||
| -rw-r--r-- | apps/workers/workers/crawlerWorker.ts | 3 |
3 files changed, 5 insertions, 7 deletions
diff --git a/apps/workers/constants.ts b/apps/workers/constants.ts deleted file mode 100644 index 954466bf..00000000 --- a/apps/workers/constants.ts +++ /dev/null @@ -1 +0,0 @@ -export const HTML_CONTENT_SIZE_THRESHOLD = 50 * 1024; // 50KB diff --git a/apps/workers/workers/adminMaintenance/tasks/migrateLinkHtmlContent.ts b/apps/workers/workers/adminMaintenance/tasks/migrateLinkHtmlContent.ts index c5336bce..467f2aa7 100644 --- a/apps/workers/workers/adminMaintenance/tasks/migrateLinkHtmlContent.ts +++ b/apps/workers/workers/adminMaintenance/tasks/migrateLinkHtmlContent.ts @@ -11,10 +11,10 @@ import { newAssetId, saveAsset, } from "@karakeep/shared/assetdb"; +import serverConfig from "@karakeep/shared/config"; import logger from "@karakeep/shared/logger"; import { tryCatch } from "@karakeep/shared/tryCatch"; -import { HTML_CONTENT_SIZE_THRESHOLD } from "../../../constants"; import { updateAsset } from "../../../workerUtils"; const BATCH_SIZE = 25; @@ -40,12 +40,12 @@ async function getBookmarksWithLargeInlineHtml(limit: number, cursor?: string) { gt(bookmarkLinks.id, cursor), isNotNull(bookmarkLinks.htmlContent), isNull(bookmarkLinks.contentAssetId), - sql`length(CAST(${bookmarkLinks.htmlContent} AS BLOB)) > ${HTML_CONTENT_SIZE_THRESHOLD}`, + sql`length(CAST(${bookmarkLinks.htmlContent} AS BLOB)) > ${serverConfig.crawler.htmlContentSizeThreshold}`, ) : and( isNotNull(bookmarkLinks.htmlContent), isNull(bookmarkLinks.contentAssetId), - sql`length(CAST(${bookmarkLinks.htmlContent} AS BLOB)) > ${HTML_CONTENT_SIZE_THRESHOLD}`, + sql`length(CAST(${bookmarkLinks.htmlContent} AS BLOB)) > ${serverConfig.crawler.htmlContentSizeThreshold}`, ), ) .orderBy(asc(bookmarkLinks.id)) @@ -62,7 +62,7 @@ async function migrateBookmarkHtml( const contentSize = Buffer.byteLength(htmlContent, "utf8"); - if (contentSize <= HTML_CONTENT_SIZE_THRESHOLD) { + if (contentSize <= serverConfig.crawler.htmlContentSizeThreshold) { logger.debug( `[adminMaintenance:migrate_large_link_html][${jobId}] Bookmark ${bookmarkId} inline HTML (${contentSize} bytes) below threshold, skipping`, ); diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts index 4e02d73a..def0ae88 100644 --- a/apps/workers/workers/crawlerWorker.ts +++ b/apps/workers/workers/crawlerWorker.ts @@ -73,7 +73,6 @@ import { import { tryCatch } from "@karakeep/shared/tryCatch"; import { BookmarkTypes } from "@karakeep/shared/types/bookmarks"; -import { HTML_CONTENT_SIZE_THRESHOLD } from "../constants"; import metascraperReddit from "../metascraper-plugins/metascraper-reddit"; function abortPromise(signal: AbortSignal): Promise<never> { @@ -934,7 +933,7 @@ async function storeHtmlContent( const contentSize = contentBuffer.byteLength; // Only store in assets if content is >= 50KB - if (contentSize < HTML_CONTENT_SIZE_THRESHOLD) { + if (contentSize < serverConfig.crawler.htmlContentSizeThreshold) { logger.info( `[Crawler][${jobId}] HTML content size (${contentSize} bytes) is below threshold, storing inline`, ); |
