aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMohamed Bassem <me@mbassem.com>2025-07-06 21:50:23 +0000
committerMohamed Bassem <me@mbassem.com>2025-07-06 22:04:56 +0000
commitdee3a4d44ddb1999e7dec383889246e87f202d92 (patch)
tree1984234f17eed886bc834543e1505ddbfb43228f
parent362be3008aa8b036c4c448a86e459044af8784c2 (diff)
downloadkarakeep-dee3a4d44ddb1999e7dec383889246e87f202d92.tar.zst
feat: Store large html content in the asset db
-rw-r--r--apps/web/lib/attachments.tsx10
-rw-r--r--apps/workers/workerUtils.ts3
-rw-r--r--apps/workers/workers/crawlerWorker.ts109
-rw-r--r--apps/workers/workers/inference/summarize.ts12
-rw-r--r--apps/workers/workers/inference/tagging.ts11
-rw-r--r--apps/workers/workers/searchWorker.ts9
-rw-r--r--packages/db/drizzle/0055_content_asset_id.sql2
-rw-r--r--packages/db/drizzle/meta/0055_snapshot.json2051
-rw-r--r--packages/db/drizzle/meta/_journal.json7
-rw-r--r--packages/db/schema.ts4
-rw-r--r--packages/shared/package.json2
-rw-r--r--packages/shared/types/bookmarks.ts2
-rw-r--r--packages/shared/utils/htmlUtils.ts17
-rw-r--r--packages/trpc/lib/attachments.ts5
-rw-r--r--packages/trpc/models/bookmarks.ts80
-rw-r--r--packages/trpc/routers/bookmarks.ts21
-rw-r--r--pnpm-lock.yaml56
17 files changed, 2383 insertions, 18 deletions
diff --git a/apps/web/lib/attachments.tsx b/apps/web/lib/attachments.tsx
index 62848d33..ce34b295 100644
--- a/apps/web/lib/attachments.tsx
+++ b/apps/web/lib/attachments.tsx
@@ -1,4 +1,11 @@
-import { Archive, Camera, Image, Paperclip, Video } from "lucide-react";
+import {
+ Archive,
+ Camera,
+ FileCode,
+ Image,
+ Paperclip,
+ Video,
+} from "lucide-react";
import { ZAssetType } from "@karakeep/shared/types/bookmarks";
@@ -10,5 +17,6 @@ export const ASSET_TYPE_TO_ICON: Record<ZAssetType, React.ReactNode> = {
bannerImage: <Image className="size-4" />,
video: <Video className="size-4" />,
bookmarkAsset: <Paperclip className="size-4" />,
+ linkHtmlContent: <FileCode className="size-4" />,
unknown: <Paperclip className="size-4" />,
};
diff --git a/apps/workers/workerUtils.ts b/apps/workers/workerUtils.ts
index d41df578..3eaf5b4b 100644
--- a/apps/workers/workerUtils.ts
+++ b/apps/workers/workerUtils.ts
@@ -46,5 +46,8 @@ export async function getBookmarkDetails(bookmarkId: string) {
precrawledArchiveAssetId: bookmark.assets
.filter((a) => a.assetType == AssetTypes.LINK_PRECRAWLED_ARCHIVE)
.at(-1)?.id,
+ contentAssetId: bookmark.assets.find(
+ (a) => a.assetType == AssetTypes.LINK_HTML_CONTENT,
+ )?.id,
};
}
diff --git a/apps/workers/workers/crawlerWorker.ts b/apps/workers/workers/crawlerWorker.ts
index 10106a3b..428ec0f5 100644
--- a/apps/workers/workers/crawlerWorker.ts
+++ b/apps/workers/workers/crawlerWorker.ts
@@ -696,6 +696,75 @@ async function handleAsAssetBookmark(
});
}
+const HTML_CONTENT_SIZE_THRESHOLD = 50 * 1024; // 50KB
+
+type StoreHtmlResult =
+ | { result: "stored"; assetId: string; size: number }
+ | { result: "store_inline" }
+ | { result: "not_stored" };
+
+async function storeHtmlContent(
+ htmlContent: string | undefined,
+ userId: string,
+ jobId: string,
+): Promise<StoreHtmlResult> {
+ if (!htmlContent) {
+ return { result: "not_stored" };
+ }
+
+ const contentBuffer = Buffer.from(htmlContent, "utf8");
+ const contentSize = contentBuffer.byteLength;
+
+ // Only store in assets if content is >= 50KB
+ if (contentSize < HTML_CONTENT_SIZE_THRESHOLD) {
+ logger.info(
+ `[Crawler][${jobId}] HTML content size (${contentSize} bytes) is below threshold, storing inline`,
+ );
+ return { result: "store_inline" };
+ }
+
+ try {
+ const quotaApproved = await checkStorageQuota(
+ db,
+ userId,
+ contentBuffer.byteLength,
+ );
+ const assetId = newAssetId();
+
+ await saveAsset({
+ userId,
+ assetId,
+ asset: contentBuffer,
+ metadata: {
+ contentType: ASSET_TYPES.TEXT_HTML,
+ fileName: null,
+ },
+ quotaApproved,
+ });
+
+ logger.info(
+ `[Crawler][${jobId}] Stored large HTML content (${contentSize} bytes) as asset: ${assetId}`,
+ );
+
+ return {
+ result: "stored",
+ assetId,
+ size: contentSize,
+ };
+ } catch (error) {
+ if (error instanceof StorageQuotaError) {
+ logger.warn(
+ `[Crawler][${jobId}] Skipping HTML content storage due to quota exceeded: ${error.message}`,
+ );
+ return { result: "not_stored" };
+ }
+ logger.error(
+ `[Crawler][${jobId}] Failed to store HTML content as asset: ${error}`,
+ );
+ throw error;
+ }
+}
+
async function crawlAndParseUrl(
url: string,
userId: string,
@@ -704,6 +773,7 @@ async function crawlAndParseUrl(
oldScreenshotAssetId: string | undefined,
oldImageAssetId: string | undefined,
oldFullPageArchiveAssetId: string | undefined,
+ oldContentAssetId: string | undefined,
precrawledArchiveAssetId: string | undefined,
archiveFullPage: boolean,
abortSignal: AbortSignal,
@@ -741,6 +811,12 @@ async function crawlAndParseUrl(
extractReadableContent(htmlContent, browserUrl, jobId),
storeScreenshot(screenshot, userId, jobId),
]);
+
+ const htmlContentAssetInfo = await storeHtmlContent(
+ readableContent?.content,
+ userId,
+ jobId,
+ );
abortSignal.throwIfAborted();
let imageAssetInfo: DBAssetType | null = null;
if (meta.image) {
@@ -784,8 +860,14 @@ async function crawlAndParseUrl(
// Don't store data URIs as they're not valid URLs and are usually quite large
imageUrl: meta.image?.startsWith("data:") ? null : meta.image,
favicon: meta.logo,
- content: readableContent?.textContent,
- htmlContent: readableContent?.content,
+ htmlContent:
+ htmlContentAssetInfo.result === "store_inline"
+ ? readableContent?.content
+ : null,
+ contentAssetId:
+ htmlContentAssetInfo.result === "stored"
+ ? htmlContentAssetInfo.assetId
+ : null,
crawledAt: new Date(),
crawlStatusCode: statusCode,
author: meta.author,
@@ -813,12 +895,31 @@ async function crawlAndParseUrl(
if (imageAssetInfo) {
await updateAsset(oldImageAssetId, imageAssetInfo, txn);
}
+ if (htmlContentAssetInfo.result === "stored") {
+ await updateAsset(
+ oldContentAssetId,
+ {
+ id: htmlContentAssetInfo.assetId,
+ bookmarkId,
+ userId,
+ assetType: AssetTypes.LINK_HTML_CONTENT,
+ contentType: ASSET_TYPES.TEXT_HTML,
+ size: htmlContentAssetInfo.size,
+ fileName: null,
+ },
+ txn,
+ );
+ } else if (oldContentAssetId) {
+ // Unlink the old content asset
+ await txn.delete(assets).where(eq(assets.id, oldContentAssetId));
+ }
});
// Delete the old assets if any
await Promise.all([
silentDeleteAsset(userId, oldScreenshotAssetId),
silentDeleteAsset(userId, oldImageAssetId),
+ silentDeleteAsset(userId, oldContentAssetId),
]);
return async () => {
@@ -857,7 +958,7 @@ async function crawlAndParseUrl(
);
});
if (oldFullPageArchiveAssetId) {
- silentDeleteAsset(userId, oldFullPageArchiveAssetId);
+ await silentDeleteAsset(userId, oldFullPageArchiveAssetId);
}
}
}
@@ -882,6 +983,7 @@ async function runCrawler(job: DequeuedJob<ZCrawlLinkRequest>) {
screenshotAssetId: oldScreenshotAssetId,
imageAssetId: oldImageAssetId,
fullPageArchiveAssetId: oldFullPageArchiveAssetId,
+ contentAssetId: oldContentAssetId,
precrawledArchiveAssetId,
} = await getBookmarkDetails(bookmarkId);
@@ -926,6 +1028,7 @@ async function runCrawler(job: DequeuedJob<ZCrawlLinkRequest>) {
oldScreenshotAssetId,
oldImageAssetId,
oldFullPageArchiveAssetId,
+ oldContentAssetId,
precrawledArchiveAssetId,
archiveFullPage,
job.abortSignal,
diff --git a/apps/workers/workers/inference/summarize.ts b/apps/workers/workers/inference/summarize.ts
index a832fe0a..2d9c8ef7 100644
--- a/apps/workers/workers/inference/summarize.ts
+++ b/apps/workers/workers/inference/summarize.ts
@@ -9,6 +9,7 @@ import logger from "@karakeep/shared/logger";
import { buildSummaryPrompt } from "@karakeep/shared/prompts";
import { triggerSearchReindex, ZOpenAIRequest } from "@karakeep/shared/queues";
import { BookmarkTypes } from "@karakeep/shared/types/bookmarks";
+import { Bookmark } from "@karakeep/trpc/models/bookmarks";
async function fetchBookmarkDetailsForSummary(bookmarkId: string) {
const bookmark = await db.query.bookmarks.findFirst({
@@ -19,7 +20,8 @@ async function fetchBookmarkDetailsForSummary(bookmarkId: string) {
columns: {
title: true,
description: true,
- content: true,
+ htmlContent: true,
+ contentAssetId: true,
publisher: true,
author: true,
url: true,
@@ -57,10 +59,16 @@ export async function runSummarization(
let textToSummarize = "";
if (bookmarkData.type === BookmarkTypes.LINK && bookmarkData.link) {
const link = bookmarkData.link;
+
+ // Extract plain text content from HTML for summarization
+ let content =
+ (await Bookmark.getBookmarkPlainTextContent(link, bookmarkData.userId)) ??
+ "";
+
textToSummarize = `
Title: ${link.title ?? ""}
Description: ${link.description ?? ""}
-Content: ${link.content ?? ""}
+Content: ${content}
Publisher: ${link.publisher ?? ""}
Author: ${link.author ?? ""}
URL: ${link.url ?? ""}
diff --git a/apps/workers/workers/inference/tagging.ts b/apps/workers/workers/inference/tagging.ts
index d666b3d9..271eea4b 100644
--- a/apps/workers/workers/inference/tagging.ts
+++ b/apps/workers/workers/inference/tagging.ts
@@ -21,6 +21,7 @@ import {
triggerSearchReindex,
triggerWebhook,
} from "@karakeep/shared/queues";
+import { Bookmark } from "@karakeep/trpc/models/bookmarks";
const openAIResponseSchema = z.object({
tags: z.array(z.string()),
@@ -77,13 +78,17 @@ async function buildPrompt(
) {
const prompts = await fetchCustomPrompts(bookmark.userId, "text");
if (bookmark.link) {
- if (!bookmark.link.description && !bookmark.link.content) {
+ let content =
+ (await Bookmark.getBookmarkPlainTextContent(
+ bookmark.link,
+ bookmark.userId,
+ )) ?? "";
+
+ if (!bookmark.link.description && !content) {
throw new Error(
`No content found for link "${bookmark.id}". Skipping ...`,
);
}
-
- const content = bookmark.link.content;
return buildTextPrompt(
serverConfig.inference.inferredTagLang,
prompts,
diff --git a/apps/workers/workers/searchWorker.ts b/apps/workers/workers/searchWorker.ts
index 13243152..177ca0db 100644
--- a/apps/workers/workers/searchWorker.ts
+++ b/apps/workers/workers/searchWorker.ts
@@ -11,6 +11,7 @@ import {
zSearchIndexingRequestSchema,
} from "@karakeep/shared/queues";
import { getSearchIdxClient } from "@karakeep/shared/search";
+import { Bookmark } from "@karakeep/trpc/models/bookmarks";
export class SearchIndexingWorker {
static build() {
@@ -75,6 +76,12 @@ async function runIndex(
throw new Error(`Bookmark ${bookmarkId} not found`);
}
+ // Extract plain text content from HTML for search indexing
+ const content = await Bookmark.getBookmarkPlainTextContent(
+ bookmark.link,
+ bookmark.userId,
+ );
+
const task = await searchClient.addDocuments(
[
{
@@ -85,7 +92,7 @@ async function runIndex(
url: bookmark.link.url,
linkTitle: bookmark.link.title,
description: bookmark.link.description,
- content: bookmark.link.content,
+ content,
publisher: bookmark.link.publisher,
author: bookmark.link.author,
datePublished: bookmark.link.datePublished,
diff --git a/packages/db/drizzle/0055_content_asset_id.sql b/packages/db/drizzle/0055_content_asset_id.sql
new file mode 100644
index 00000000..533792d3
--- /dev/null
+++ b/packages/db/drizzle/0055_content_asset_id.sql
@@ -0,0 +1,2 @@
+ALTER TABLE `bookmarkLinks` ADD `contentAssetId` text;--> statement-breakpoint
+ALTER TABLE `bookmarkLinks` DROP COLUMN `content`; \ No newline at end of file
diff --git a/packages/db/drizzle/meta/0055_snapshot.json b/packages/db/drizzle/meta/0055_snapshot.json
new file mode 100644
index 00000000..024aba47
--- /dev/null
+++ b/packages/db/drizzle/meta/0055_snapshot.json
@@ -0,0 +1,2051 @@
+{
+ "version": "6",
+ "dialect": "sqlite",
+ "id": "a7674152-1484-4144-9faa-2f4597ba619e",
+ "prevId": "ac5b67e5-b9a8-413b-bba7-80280d8ebfc8",
+ "tables": {
+ "account": {
+ "name": "account",
+ "columns": {
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "type": {
+ "name": "type",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "provider": {
+ "name": "provider",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "providerAccountId": {
+ "name": "providerAccountId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "refresh_token": {
+ "name": "refresh_token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "access_token": {
+ "name": "access_token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "expires_at": {
+ "name": "expires_at",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "token_type": {
+ "name": "token_type",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "scope": {
+ "name": "scope",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "id_token": {
+ "name": "id_token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "session_state": {
+ "name": "session_state",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "account_userId_user_id_fk": {
+ "name": "account_userId_user_id_fk",
+ "tableFrom": "account",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "account_provider_providerAccountId_pk": {
+ "columns": [
+ "provider",
+ "providerAccountId"
+ ],
+ "name": "account_provider_providerAccountId_pk"
+ }
+ },
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "apiKey": {
+ "name": "apiKey",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "createdAt": {
+ "name": "createdAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "keyId": {
+ "name": "keyId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "keyHash": {
+ "name": "keyHash",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "apiKey_keyId_unique": {
+ "name": "apiKey_keyId_unique",
+ "columns": [
+ "keyId"
+ ],
+ "isUnique": true
+ },
+ "apiKey_name_userId_unique": {
+ "name": "apiKey_name_userId_unique",
+ "columns": [
+ "name",
+ "userId"
+ ],
+ "isUnique": true
+ }
+ },
+ "foreignKeys": {
+ "apiKey_userId_user_id_fk": {
+ "name": "apiKey_userId_user_id_fk",
+ "tableFrom": "apiKey",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "assets": {
+ "name": "assets",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "assetType": {
+ "name": "assetType",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "size": {
+ "name": "size",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false,
+ "default": 0
+ },
+ "contentType": {
+ "name": "contentType",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "fileName": {
+ "name": "fileName",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "bookmarkId": {
+ "name": "bookmarkId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "assets_bookmarkId_idx": {
+ "name": "assets_bookmarkId_idx",
+ "columns": [
+ "bookmarkId"
+ ],
+ "isUnique": false
+ },
+ "assets_assetType_idx": {
+ "name": "assets_assetType_idx",
+ "columns": [
+ "assetType"
+ ],
+ "isUnique": false
+ },
+ "assets_userId_idx": {
+ "name": "assets_userId_idx",
+ "columns": [
+ "userId"
+ ],
+ "isUnique": false
+ }
+ },
+ "foreignKeys": {
+ "assets_bookmarkId_bookmarks_id_fk": {
+ "name": "assets_bookmarkId_bookmarks_id_fk",
+ "tableFrom": "assets",
+ "tableTo": "bookmarks",
+ "columnsFrom": [
+ "bookmarkId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "assets_userId_user_id_fk": {
+ "name": "assets_userId_user_id_fk",
+ "tableFrom": "assets",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "bookmarkAssets": {
+ "name": "bookmarkAssets",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "assetType": {
+ "name": "assetType",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "assetId": {
+ "name": "assetId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "content": {
+ "name": "content",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "metadata": {
+ "name": "metadata",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "fileName": {
+ "name": "fileName",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "sourceUrl": {
+ "name": "sourceUrl",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "bookmarkAssets_id_bookmarks_id_fk": {
+ "name": "bookmarkAssets_id_bookmarks_id_fk",
+ "tableFrom": "bookmarkAssets",
+ "tableTo": "bookmarks",
+ "columnsFrom": [
+ "id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "bookmarkLinks": {
+ "name": "bookmarkLinks",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "url": {
+ "name": "url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "title": {
+ "name": "title",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "description": {
+ "name": "description",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "author": {
+ "name": "author",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "publisher": {
+ "name": "publisher",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "datePublished": {
+ "name": "datePublished",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "dateModified": {
+ "name": "dateModified",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "imageUrl": {
+ "name": "imageUrl",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "favicon": {
+ "name": "favicon",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "htmlContent": {
+ "name": "htmlContent",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "contentAssetId": {
+ "name": "contentAssetId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "crawledAt": {
+ "name": "crawledAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "crawlStatus": {
+ "name": "crawlStatus",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false,
+ "default": "'pending'"
+ },
+ "crawlStatusCode": {
+ "name": "crawlStatusCode",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false,
+ "default": 200
+ }
+ },
+ "indexes": {
+ "bookmarkLinks_url_idx": {
+ "name": "bookmarkLinks_url_idx",
+ "columns": [
+ "url"
+ ],
+ "isUnique": false
+ }
+ },
+ "foreignKeys": {
+ "bookmarkLinks_id_bookmarks_id_fk": {
+ "name": "bookmarkLinks_id_bookmarks_id_fk",
+ "tableFrom": "bookmarkLinks",
+ "tableTo": "bookmarks",
+ "columnsFrom": [
+ "id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "bookmarkLists": {
+ "name": "bookmarkLists",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "description": {
+ "name": "description",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "icon": {
+ "name": "icon",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "createdAt": {
+ "name": "createdAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "type": {
+ "name": "type",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "query": {
+ "name": "query",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "parentId": {
+ "name": "parentId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "rssToken": {
+ "name": "rssToken",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "public": {
+ "name": "public",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false,
+ "default": false
+ }
+ },
+ "indexes": {
+ "bookmarkLists_userId_idx": {
+ "name": "bookmarkLists_userId_idx",
+ "columns": [
+ "userId"
+ ],
+ "isUnique": false
+ },
+ "bookmarkLists_userId_id_idx": {
+ "name": "bookmarkLists_userId_id_idx",
+ "columns": [
+ "userId",
+ "id"
+ ],
+ "isUnique": true
+ }
+ },
+ "foreignKeys": {
+ "bookmarkLists_userId_user_id_fk": {
+ "name": "bookmarkLists_userId_user_id_fk",
+ "tableFrom": "bookmarkLists",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "bookmarkLists_parentId_bookmarkLists_id_fk": {
+ "name": "bookmarkLists_parentId_bookmarkLists_id_fk",
+ "tableFrom": "bookmarkLists",
+ "tableTo": "bookmarkLists",
+ "columnsFrom": [
+ "parentId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "set null",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "bookmarkTags": {
+ "name": "bookmarkTags",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "createdAt": {
+ "name": "createdAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "bookmarkTags_name_idx": {
+ "name": "bookmarkTags_name_idx",
+ "columns": [
+ "name"
+ ],
+ "isUnique": false
+ },
+ "bookmarkTags_userId_idx": {
+ "name": "bookmarkTags_userId_idx",
+ "columns": [
+ "userId"
+ ],
+ "isUnique": false
+ },
+ "bookmarkTags_userId_name_unique": {
+ "name": "bookmarkTags_userId_name_unique",
+ "columns": [
+ "userId",
+ "name"
+ ],
+ "isUnique": true
+ },
+ "bookmarkTags_userId_id_idx": {
+ "name": "bookmarkTags_userId_id_idx",
+ "columns": [
+ "userId",
+ "id"
+ ],
+ "isUnique": true
+ }
+ },
+ "foreignKeys": {
+ "bookmarkTags_userId_user_id_fk": {
+ "name": "bookmarkTags_userId_user_id_fk",
+ "tableFrom": "bookmarkTags",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "bookmarkTexts": {
+ "name": "bookmarkTexts",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "text": {
+ "name": "text",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "sourceUrl": {
+ "name": "sourceUrl",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "bookmarkTexts_id_bookmarks_id_fk": {
+ "name": "bookmarkTexts_id_bookmarks_id_fk",
+ "tableFrom": "bookmarkTexts",
+ "tableTo": "bookmarks",
+ "columnsFrom": [
+ "id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "bookmarks": {
+ "name": "bookmarks",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "createdAt": {
+ "name": "createdAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "modifiedAt": {
+ "name": "modifiedAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "title": {
+ "name": "title",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "archived": {
+ "name": "archived",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false,
+ "default": false
+ },
+ "favourited": {
+ "name": "favourited",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false,
+ "default": false
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "taggingStatus": {
+ "name": "taggingStatus",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false,
+ "default": "'pending'"
+ },
+ "summarizationStatus": {
+ "name": "summarizationStatus",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false,
+ "default": "'pending'"
+ },
+ "summary": {
+ "name": "summary",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "note": {
+ "name": "note",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "type": {
+ "name": "type",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "bookmarks_userId_idx": {
+ "name": "bookmarks_userId_idx",
+ "columns": [
+ "userId"
+ ],
+ "isUnique": false
+ },
+ "bookmarks_archived_idx": {
+ "name": "bookmarks_archived_idx",
+ "columns": [
+ "archived"
+ ],
+ "isUnique": false
+ },
+ "bookmarks_favourited_idx": {
+ "name": "bookmarks_favourited_idx",
+ "columns": [
+ "favourited"
+ ],
+ "isUnique": false
+ },
+ "bookmarks_createdAt_idx": {
+ "name": "bookmarks_createdAt_idx",
+ "columns": [
+ "createdAt"
+ ],
+ "isUnique": false
+ }
+ },
+ "foreignKeys": {
+ "bookmarks_userId_user_id_fk": {
+ "name": "bookmarks_userId_user_id_fk",
+ "tableFrom": "bookmarks",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "bookmarksInLists": {
+ "name": "bookmarksInLists",
+ "columns": {
+ "bookmarkId": {
+ "name": "bookmarkId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "listId": {
+ "name": "listId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "addedAt": {
+ "name": "addedAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "bookmarksInLists_bookmarkId_idx": {
+ "name": "bookmarksInLists_bookmarkId_idx",
+ "columns": [
+ "bookmarkId"
+ ],
+ "isUnique": false
+ },
+ "bookmarksInLists_listId_idx": {
+ "name": "bookmarksInLists_listId_idx",
+ "columns": [
+ "listId"
+ ],
+ "isUnique": false
+ }
+ },
+ "foreignKeys": {
+ "bookmarksInLists_bookmarkId_bookmarks_id_fk": {
+ "name": "bookmarksInLists_bookmarkId_bookmarks_id_fk",
+ "tableFrom": "bookmarksInLists",
+ "tableTo": "bookmarks",
+ "columnsFrom": [
+ "bookmarkId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "bookmarksInLists_listId_bookmarkLists_id_fk": {
+ "name": "bookmarksInLists_listId_bookmarkLists_id_fk",
+ "tableFrom": "bookmarksInLists",
+ "tableTo": "bookmarkLists",
+ "columnsFrom": [
+ "listId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "bookmarksInLists_bookmarkId_listId_pk": {
+ "columns": [
+ "bookmarkId",
+ "listId"
+ ],
+ "name": "bookmarksInLists_bookmarkId_listId_pk"
+ }
+ },
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "config": {
+ "name": "config",
+ "columns": {
+ "key": {
+ "name": "key",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "value": {
+ "name": "value",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "customPrompts": {
+ "name": "customPrompts",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "text": {
+ "name": "text",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "enabled": {
+ "name": "enabled",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "appliesTo": {
+ "name": "appliesTo",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "createdAt": {
+ "name": "createdAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "customPrompts_userId_idx": {
+ "name": "customPrompts_userId_idx",
+ "columns": [
+ "userId"
+ ],
+ "isUnique": false
+ }
+ },
+ "foreignKeys": {
+ "customPrompts_userId_user_id_fk": {
+ "name": "customPrompts_userId_user_id_fk",
+ "tableFrom": "customPrompts",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "highlights": {
+ "name": "highlights",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "bookmarkId": {
+ "name": "bookmarkId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "startOffset": {
+ "name": "startOffset",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "endOffset": {
+ "name": "endOffset",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "color": {
+ "name": "color",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false,
+ "default": "'yellow'"
+ },
+ "text": {
+ "name": "text",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "note": {
+ "name": "note",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "createdAt": {
+ "name": "createdAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "highlights_bookmarkId_idx": {
+ "name": "highlights_bookmarkId_idx",
+ "columns": [
+ "bookmarkId"
+ ],
+ "isUnique": false
+ },
+ "highlights_userId_idx": {
+ "name": "highlights_userId_idx",
+ "columns": [
+ "userId"
+ ],
+ "isUnique": false
+ }
+ },
+ "foreignKeys": {
+ "highlights_bookmarkId_bookmarks_id_fk": {
+ "name": "highlights_bookmarkId_bookmarks_id_fk",
+ "tableFrom": "highlights",
+ "tableTo": "bookmarks",
+ "columnsFrom": [
+ "bookmarkId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "highlights_userId_user_id_fk": {
+ "name": "highlights_userId_user_id_fk",
+ "tableFrom": "highlights",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "rssFeedImports": {
+ "name": "rssFeedImports",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "createdAt": {
+ "name": "createdAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "entryId": {
+ "name": "entryId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "rssFeedId": {
+ "name": "rssFeedId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "bookmarkId": {
+ "name": "bookmarkId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "rssFeedImports_feedIdIdx_idx": {
+ "name": "rssFeedImports_feedIdIdx_idx",
+ "columns": [
+ "rssFeedId"
+ ],
+ "isUnique": false
+ },
+ "rssFeedImports_entryIdIdx_idx": {
+ "name": "rssFeedImports_entryIdIdx_idx",
+ "columns": [
+ "entryId"
+ ],
+ "isUnique": false
+ },
+ "rssFeedImports_rssFeedId_entryId_unique": {
+ "name": "rssFeedImports_rssFeedId_entryId_unique",
+ "columns": [
+ "rssFeedId",
+ "entryId"
+ ],
+ "isUnique": true
+ }
+ },
+ "foreignKeys": {
+ "rssFeedImports_rssFeedId_rssFeeds_id_fk": {
+ "name": "rssFeedImports_rssFeedId_rssFeeds_id_fk",
+ "tableFrom": "rssFeedImports",
+ "tableTo": "rssFeeds",
+ "columnsFrom": [
+ "rssFeedId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "rssFeedImports_bookmarkId_bookmarks_id_fk": {
+ "name": "rssFeedImports_bookmarkId_bookmarks_id_fk",
+ "tableFrom": "rssFeedImports",
+ "tableTo": "bookmarks",
+ "columnsFrom": [
+ "bookmarkId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "set null",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "rssFeeds": {
+ "name": "rssFeeds",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "url": {
+ "name": "url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "enabled": {
+ "name": "enabled",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false,
+ "default": true
+ },
+ "createdAt": {
+ "name": "createdAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "lastFetchedAt": {
+ "name": "lastFetchedAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "lastFetchedStatus": {
+ "name": "lastFetchedStatus",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false,
+ "default": "'pending'"
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "rssFeeds_userId_idx": {
+ "name": "rssFeeds_userId_idx",
+ "columns": [
+ "userId"
+ ],
+ "isUnique": false
+ }
+ },
+ "foreignKeys": {
+ "rssFeeds_userId_user_id_fk": {
+ "name": "rssFeeds_userId_user_id_fk",
+ "tableFrom": "rssFeeds",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "ruleEngineActions": {
+ "name": "ruleEngineActions",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "ruleId": {
+ "name": "ruleId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "action": {
+ "name": "action",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "listId": {
+ "name": "listId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "tagId": {
+ "name": "tagId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "ruleEngineActions_userId_idx": {
+ "name": "ruleEngineActions_userId_idx",
+ "columns": [
+ "userId"
+ ],
+ "isUnique": false
+ },
+ "ruleEngineActions_ruleId_idx": {
+ "name": "ruleEngineActions_ruleId_idx",
+ "columns": [
+ "ruleId"
+ ],
+ "isUnique": false
+ }
+ },
+ "foreignKeys": {
+ "ruleEngineActions_userId_user_id_fk": {
+ "name": "ruleEngineActions_userId_user_id_fk",
+ "tableFrom": "ruleEngineActions",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "ruleEngineActions_ruleId_ruleEngineRules_id_fk": {
+ "name": "ruleEngineActions_ruleId_ruleEngineRules_id_fk",
+ "tableFrom": "ruleEngineActions",
+ "tableTo": "ruleEngineRules",
+ "columnsFrom": [
+ "ruleId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "ruleEngineActions_userId_tagId_fk": {
+ "name": "ruleEngineActions_userId_tagId_fk",
+ "tableFrom": "ruleEngineActions",
+ "tableTo": "bookmarkTags",
+ "columnsFrom": [
+ "userId",
+ "tagId"
+ ],
+ "columnsTo": [
+ "userId",
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "ruleEngineActions_userId_listId_fk": {
+ "name": "ruleEngineActions_userId_listId_fk",
+ "tableFrom": "ruleEngineActions",
+ "tableTo": "bookmarkLists",
+ "columnsFrom": [
+ "userId",
+ "listId"
+ ],
+ "columnsTo": [
+ "userId",
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "ruleEngineRules": {
+ "name": "ruleEngineRules",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "enabled": {
+ "name": "enabled",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false,
+ "default": true
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "description": {
+ "name": "description",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "event": {
+ "name": "event",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "condition": {
+ "name": "condition",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "listId": {
+ "name": "listId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "tagId": {
+ "name": "tagId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "ruleEngine_userId_idx": {
+ "name": "ruleEngine_userId_idx",
+ "columns": [
+ "userId"
+ ],
+ "isUnique": false
+ }
+ },
+ "foreignKeys": {
+ "ruleEngineRules_userId_user_id_fk": {
+ "name": "ruleEngineRules_userId_user_id_fk",
+ "tableFrom": "ruleEngineRules",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "ruleEngineRules_userId_tagId_fk": {
+ "name": "ruleEngineRules_userId_tagId_fk",
+ "tableFrom": "ruleEngineRules",
+ "tableTo": "bookmarkTags",
+ "columnsFrom": [
+ "userId",
+ "tagId"
+ ],
+ "columnsTo": [
+ "userId",
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "ruleEngineRules_userId_listId_fk": {
+ "name": "ruleEngineRules_userId_listId_fk",
+ "tableFrom": "ruleEngineRules",
+ "tableTo": "bookmarkLists",
+ "columnsFrom": [
+ "userId",
+ "listId"
+ ],
+ "columnsTo": [
+ "userId",
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "session": {
+ "name": "session",
+ "columns": {
+ "sessionToken": {
+ "name": "sessionToken",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "expires": {
+ "name": "expires",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "session_userId_user_id_fk": {
+ "name": "session_userId_user_id_fk",
+ "tableFrom": "session",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "tagsOnBookmarks": {
+ "name": "tagsOnBookmarks",
+ "columns": {
+ "bookmarkId": {
+ "name": "bookmarkId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "tagId": {
+ "name": "tagId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "attachedAt": {
+ "name": "attachedAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "attachedBy": {
+ "name": "attachedBy",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "tagsOnBookmarks_tagId_idx": {
+ "name": "tagsOnBookmarks_tagId_idx",
+ "columns": [
+ "tagId"
+ ],
+ "isUnique": false
+ },
+ "tagsOnBookmarks_bookmarkId_idx": {
+ "name": "tagsOnBookmarks_bookmarkId_idx",
+ "columns": [
+ "bookmarkId"
+ ],
+ "isUnique": false
+ }
+ },
+ "foreignKeys": {
+ "tagsOnBookmarks_bookmarkId_bookmarks_id_fk": {
+ "name": "tagsOnBookmarks_bookmarkId_bookmarks_id_fk",
+ "tableFrom": "tagsOnBookmarks",
+ "tableTo": "bookmarks",
+ "columnsFrom": [
+ "bookmarkId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "tagsOnBookmarks_tagId_bookmarkTags_id_fk": {
+ "name": "tagsOnBookmarks_tagId_bookmarkTags_id_fk",
+ "tableFrom": "tagsOnBookmarks",
+ "tableTo": "bookmarkTags",
+ "columnsFrom": [
+ "tagId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "tagsOnBookmarks_bookmarkId_tagId_pk": {
+ "columns": [
+ "bookmarkId",
+ "tagId"
+ ],
+ "name": "tagsOnBookmarks_bookmarkId_tagId_pk"
+ }
+ },
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "userSettings": {
+ "name": "userSettings",
+ "columns": {
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "bookmarkClickAction": {
+ "name": "bookmarkClickAction",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false,
+ "default": "'open_original_link'"
+ },
+ "archiveDisplayBehaviour": {
+ "name": "archiveDisplayBehaviour",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false,
+ "default": "'show'"
+ },
+ "timezone": {
+ "name": "timezone",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false,
+ "default": "'UTC'"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "userSettings_userId_user_id_fk": {
+ "name": "userSettings_userId_user_id_fk",
+ "tableFrom": "userSettings",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "user": {
+ "name": "user",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "email": {
+ "name": "email",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "emailVerified": {
+ "name": "emailVerified",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "image": {
+ "name": "image",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "password": {
+ "name": "password",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "salt": {
+ "name": "salt",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false,
+ "default": "''"
+ },
+ "role": {
+ "name": "role",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false,
+ "default": "'user'"
+ },
+ "bookmarkQuota": {
+ "name": "bookmarkQuota",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "storageQuota": {
+ "name": "storageQuota",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "user_email_unique": {
+ "name": "user_email_unique",
+ "columns": [
+ "email"
+ ],
+ "isUnique": true
+ }
+ },
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "verificationToken": {
+ "name": "verificationToken",
+ "columns": {
+ "identifier": {
+ "name": "identifier",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "token": {
+ "name": "token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "expires": {
+ "name": "expires",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {
+ "verificationToken_identifier_token_pk": {
+ "columns": [
+ "identifier",
+ "token"
+ ],
+ "name": "verificationToken_identifier_token_pk"
+ }
+ },
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ },
+ "webhooks": {
+ "name": "webhooks",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "createdAt": {
+ "name": "createdAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "url": {
+ "name": "url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "events": {
+ "name": "events",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "token": {
+ "name": "token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "webhooks_userId_idx": {
+ "name": "webhooks_userId_idx",
+ "columns": [
+ "userId"
+ ],
+ "isUnique": false
+ }
+ },
+ "foreignKeys": {
+ "webhooks_userId_user_id_fk": {
+ "name": "webhooks_userId_user_id_fk",
+ "tableFrom": "webhooks",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "checkConstraints": {}
+ }
+ },
+ "views": {},
+ "enums": {},
+ "_meta": {
+ "schemas": {},
+ "tables": {},
+ "columns": {}
+ },
+ "internal": {
+ "indexes": {}
+ }
+} \ No newline at end of file
diff --git a/packages/db/drizzle/meta/_journal.json b/packages/db/drizzle/meta/_journal.json
index ea4e30b1..6c509b15 100644
--- a/packages/db/drizzle/meta/_journal.json
+++ b/packages/db/drizzle/meta/_journal.json
@@ -386,6 +386,13 @@
"when": 1751826417328,
"tag": "0054_add_timezone",
"breakpoints": true
+ },
+ {
+ "idx": 55,
+ "version": "6",
+ "when": 1751839469055,
+ "tag": "0055_content_asset_id",
+ "breakpoints": true
}
]
} \ No newline at end of file
diff --git a/packages/db/schema.ts b/packages/db/schema.ts
index ed9d586c..4375b201 100644
--- a/packages/db/schema.ts
+++ b/packages/db/schema.ts
@@ -161,8 +161,8 @@ export const bookmarkLinks = sqliteTable(
dateModified: integer("dateModified", { mode: "timestamp" }),
imageUrl: text("imageUrl"),
favicon: text("favicon"),
- content: text("content"),
htmlContent: text("htmlContent"),
+ contentAssetId: text("contentAssetId"),
crawledAt: integer("crawledAt", { mode: "timestamp" }),
crawlStatus: text("crawlStatus", {
enum: ["pending", "failure", "success"],
@@ -179,6 +179,7 @@ export const enum AssetTypes {
LINK_FULL_PAGE_ARCHIVE = "linkFullPageArchive",
LINK_PRECRAWLED_ARCHIVE = "linkPrecrawledArchive",
LINK_VIDEO = "linkVideo",
+ LINK_HTML_CONTENT = "linkHtmlContent",
BOOKMARK_ASSET = "bookmarkAsset",
UNKNOWN = "unknown",
}
@@ -196,6 +197,7 @@ export const assets = sqliteTable(
AssetTypes.LINK_FULL_PAGE_ARCHIVE,
AssetTypes.LINK_PRECRAWLED_ARCHIVE,
AssetTypes.LINK_VIDEO,
+ AssetTypes.LINK_HTML_CONTENT,
AssetTypes.BOOKMARK_ASSET,
AssetTypes.UNKNOWN,
],
diff --git a/packages/shared/package.json b/packages/shared/package.json
index f4e521b6..6f22865f 100644
--- a/packages/shared/package.json
+++ b/packages/shared/package.json
@@ -7,6 +7,7 @@
"dependencies": {
"@aws-sdk/client-s3": "^3.842.0",
"glob": "^11.0.0",
+ "html-to-text": "^9.0.5",
"js-tiktoken": "^1.0.20",
"liteque": "^0.3.2",
"meilisearch": "^0.37.0",
@@ -20,6 +21,7 @@
"devDependencies": {
"@karakeep/prettier-config": "workspace:^0.1.0",
"@karakeep/tsconfig": "workspace:^0.1.0",
+ "@types/html-to-text": "^9.0.4",
"vitest": "^1.6.1"
},
"scripts": {
diff --git a/packages/shared/types/bookmarks.ts b/packages/shared/types/bookmarks.ts
index ea1ab717..f648bce5 100644
--- a/packages/shared/types/bookmarks.ts
+++ b/packages/shared/types/bookmarks.ts
@@ -16,6 +16,7 @@ export const zSortOrder = z.enum(["asc", "desc", "relevance"]);
export type ZSortOrder = z.infer<typeof zSortOrder>;
export const zAssetTypesSchema = z.enum([
+ "linkHtmlContent",
"screenshot",
"assetScreenshot",
"bannerImage",
@@ -45,6 +46,7 @@ export const zBookmarkedLinkSchema = z.object({
videoAssetId: z.string().nullish(),
favicon: z.string().nullish(),
htmlContent: z.string().nullish(),
+ contentAssetId: z.string().nullish(),
crawledAt: z.date().nullish(),
author: z.string().nullish(),
publisher: z.string().nullish(),
diff --git a/packages/shared/utils/htmlUtils.ts b/packages/shared/utils/htmlUtils.ts
new file mode 100644
index 00000000..60272899
--- /dev/null
+++ b/packages/shared/utils/htmlUtils.ts
@@ -0,0 +1,17 @@
+import { compile } from "html-to-text";
+
+const compiledConvert = compile({
+ selectors: [{ selector: "img", format: "skip" }],
+});
+
+/**
+ * Converts HTML content to plain text
+ */
+export function htmlToPlainText(htmlContent: string): string {
+ if (!htmlContent) {
+ return "";
+ }
+
+ // TODO, we probably should also remove singlefile inline images from the content
+ return compiledConvert(htmlContent);
+}
diff --git a/packages/trpc/lib/attachments.ts b/packages/trpc/lib/attachments.ts
index 739aa8f5..e886b821 100644
--- a/packages/trpc/lib/attachments.ts
+++ b/packages/trpc/lib/attachments.ts
@@ -14,6 +14,7 @@ export function mapDBAssetTypeToUserType(assetType: AssetTypes): ZAssetType {
[AssetTypes.LINK_PRECRAWLED_ARCHIVE]: "precrawledArchive",
[AssetTypes.LINK_BANNER_IMAGE]: "bannerImage",
[AssetTypes.LINK_VIDEO]: "video",
+ [AssetTypes.LINK_HTML_CONTENT]: "linkHtmlContent",
[AssetTypes.BOOKMARK_ASSET]: "bookmarkAsset",
[AssetTypes.UNKNOWN]: "bannerImage",
};
@@ -31,6 +32,7 @@ export function mapSchemaAssetTypeToDB(
bannerImage: AssetTypes.LINK_BANNER_IMAGE,
video: AssetTypes.LINK_VIDEO,
bookmarkAsset: AssetTypes.BOOKMARK_ASSET,
+ linkHtmlContent: AssetTypes.LINK_HTML_CONTENT,
unknown: AssetTypes.UNKNOWN,
};
return map[assetType];
@@ -45,6 +47,7 @@ export function humanFriendlyNameForAssertType(type: ZAssetType) {
bannerImage: "Banner Image",
video: "Video",
bookmarkAsset: "Bookmark Asset",
+ linkHtmlContent: "HTML Content",
unknown: "Unknown",
};
return map[type];
@@ -59,6 +62,7 @@ export function isAllowedToAttachAsset(type: ZAssetType) {
bannerImage: true,
video: false,
bookmarkAsset: false,
+ linkHtmlContent: false,
unknown: false,
};
return map[type];
@@ -73,6 +77,7 @@ export function isAllowedToDetachAsset(type: ZAssetType) {
bannerImage: true,
video: true,
bookmarkAsset: false,
+ linkHtmlContent: false,
unknown: false,
};
return map[type];
diff --git a/packages/trpc/models/bookmarks.ts b/packages/trpc/models/bookmarks.ts
index 986fca58..07b3832d 100644
--- a/packages/trpc/models/bookmarks.ts
+++ b/packages/trpc/models/bookmarks.ts
@@ -27,6 +27,7 @@ import {
rssFeedImportsTable,
tagsOnBookmarks,
} from "@karakeep/db/schema";
+import { readAsset } from "@karakeep/shared/assetdb";
import serverConfig from "@karakeep/shared/config";
import {
createSignedToken,
@@ -46,6 +47,7 @@ import {
getBookmarkLinkAssetIdOrUrl,
getBookmarkTitle,
} from "@karakeep/shared/utils/bookmarkUtils";
+import { htmlToPlainText } from "@karakeep/shared/utils/htmlUtils";
import { AuthedContext } from "..";
import { mapDBAssetTypeToUserType } from "../lib/attachments";
@@ -202,8 +204,11 @@ export class Bookmark implements PrivacyAware {
imageUrl: row.bookmarkLinks.imageUrl,
favicon: row.bookmarkLinks.favicon,
htmlContent: input.includeContent
- ? row.bookmarkLinks.htmlContent
+ ? row.bookmarkLinks.contentAssetId
+ ? null // Will be populated later from asset
+ : row.bookmarkLinks.htmlContent
: null,
+ contentAssetId: row.bookmarkLinks.contentAssetId,
crawledAt: row.bookmarkLinks.crawledAt,
author: row.bookmarkLinks.author,
publisher: row.bookmarkLinks.publisher,
@@ -300,6 +305,33 @@ export class Bookmark implements PrivacyAware {
const bookmarksArr = Object.values(bookmarksRes);
+ // Fetch HTML content from assets for bookmarks that have contentAssetId (large content)
+ if (input.includeContent) {
+ await Promise.all(
+ bookmarksArr.map(async (bookmark) => {
+ if (
+ bookmark.content.type === BookmarkTypes.LINK &&
+ bookmark.content.contentAssetId &&
+ !bookmark.content.htmlContent // Only fetch if not already inline
+ ) {
+ try {
+ const asset = await readAsset({
+ userId: ctx.user.id,
+ assetId: bookmark.content.contentAssetId,
+ });
+ bookmark.content.htmlContent = asset.asset.toString("utf8");
+ } catch (error) {
+ // If asset reading fails, keep htmlContent as null
+ console.warn(
+ `Failed to read HTML content asset ${bookmark.content.contentAssetId}:`,
+ error,
+ );
+ }
+ }
+ }),
+ );
+ }
+
bookmarksArr.sort((a, b) => {
if (a.createdAt != b.createdAt) {
return input.sortOrder === "asc"
@@ -427,4 +459,50 @@ export class Bookmark implements PrivacyAware {
bannerImageUrl: getBannerImageUrl(this.bookmark.content),
};
}
+
+ static async getBookmarkHtmlContent(
+ {
+ contentAssetId,
+ htmlContent,
+ }: {
+ contentAssetId: string | null;
+ htmlContent: string | null;
+ },
+ userId: string,
+ ): Promise<string | null> {
+ if (contentAssetId) {
+ // Read large HTML content from asset
+ const asset = await readAsset({
+ userId,
+ assetId: contentAssetId,
+ });
+ return asset.asset.toString("utf8");
+ } else if (htmlContent) {
+ return htmlContent;
+ }
+ return null;
+ }
+
+ static async getBookmarkPlainTextContent(
+ {
+ contentAssetId,
+ htmlContent,
+ }: {
+ contentAssetId: string | null;
+ htmlContent: string | null;
+ },
+ userId: string,
+ ): Promise<string | null> {
+ const content = await this.getBookmarkHtmlContent(
+ {
+ contentAssetId,
+ htmlContent,
+ },
+ userId,
+ );
+ if (!content) {
+ return null;
+ }
+ return htmlToPlainText(content);
+ }
}
diff --git a/packages/trpc/routers/bookmarks.ts b/packages/trpc/routers/bookmarks.ts
index f1fe10d7..77f40878 100644
--- a/packages/trpc/routers/bookmarks.ts
+++ b/packages/trpc/routers/bookmarks.ts
@@ -118,7 +118,7 @@ async function getBookmark(
});
}
- return toZodSchema(bookmark, includeContent);
+ return await toZodSchema(bookmark, includeContent);
}
async function attemptToDedupLink(ctx: AuthedContext, url: string) {
@@ -177,10 +177,10 @@ async function cleanupAssetForBookmark(
);
}
-function toZodSchema(
+async function toZodSchema(
bookmark: BookmarkQueryReturnType,
includeContent: boolean,
-): ZBookmark {
+): Promise<ZBookmark> {
const { tagsOnBookmarks, link, text, asset, assets, ...rest } = bookmark;
let content: ZBookmarkContent = {
@@ -208,7 +208,9 @@ function toZodSchema(
description: link.description,
imageUrl: link.imageUrl,
favicon: link.favicon,
- htmlContent: includeContent ? link.htmlContent : null,
+ htmlContent: includeContent
+ ? await Bookmark.getBookmarkHtmlContent(link, bookmark.userId)
+ : null,
crawledAt: link.crawledAt,
author: link.author,
publisher: link.publisher,
@@ -806,7 +808,9 @@ export const bookmarksAppRouter = router({
}
return {
- bookmarks: results.map((b) => toZodSchema(b, input.includeContent)),
+ bookmarks: await Promise.all(
+ results.map((b) => toZodSchema(b, input.includeContent)),
+ ),
nextCursor:
resp.hits.length + resp.offset >= resp.estimatedTotalHits
? null
@@ -1052,10 +1056,15 @@ export const bookmarksAppRouter = router({
});
}
+ const content = await Bookmark.getBookmarkPlainTextContent(
+ bookmark,
+ ctx.user.id,
+ );
+
const bookmarkDetails = `
Title: ${bookmark.title ?? ""}
Description: ${bookmark.description ?? ""}
-Content: ${bookmark.content ?? ""}
+Content: ${content}
Publisher: ${bookmark.publisher ?? ""}
Author: ${bookmark.author ?? ""}
`;
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 08fe922c..3649d5b7 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -1109,6 +1109,9 @@ importers:
glob:
specifier: ^11.0.0
version: 11.0.2
+ html-to-text:
+ specifier: ^9.0.5
+ version: 9.0.5
js-tiktoken:
specifier: ^1.0.20
version: 1.0.20
@@ -1143,6 +1146,9 @@ importers:
'@karakeep/tsconfig':
specifier: workspace:^0.1.0
version: link:../../tooling/typescript
+ '@types/html-to-text':
+ specifier: ^9.0.4
+ version: 9.0.4
vitest:
specifier: ^1.6.1
version: 1.6.1(@types/node@22.15.30)(happy-dom@17.4.9)(jsdom@26.1.0)(lightningcss@1.30.1)(sass@1.89.1)(terser@5.41.0)
@@ -4782,6 +4788,9 @@ packages:
'@segment/loosely-validate-event@2.0.0':
resolution: {integrity: sha512-ZMCSfztDBqwotkl848ODgVcAmN4OItEWDCkshcKz0/W6gGSQayuuCtWV/MlodFivAZD793d6UgANd6wCXUfrIw==}
+ '@selderee/plugin-htmlparser2@0.11.0':
+ resolution: {integrity: sha512-P33hHGdldxGabLFjPPpaTxVolMrzrcegejx+0GxjrIb9Zv48D8yAIA/QTDR2dFl7Uz7urX8aX6+5bCZslr+gWQ==}
+
'@sideway/address@4.1.5':
resolution: {integrity: sha512-IqO/DUQHUkPeixNQ8n0JA6102hT9CmaljNTPmQ1u8MEhBo/R4Q8eKLN/vGZxuebwOroDB4cbpjheD4+/sKFK4Q==}
@@ -5385,6 +5394,9 @@ packages:
'@types/html-minifier-terser@6.1.0':
resolution: {integrity: sha512-oh/6byDPnL1zeNXFrDXFLyZjkr1MsBG667IM792caf1L2UPOOMf65NFzjUH/ltyfwjAGfs1rsX1eftK0jC/KIg==}
+ '@types/html-to-text@9.0.4':
+ resolution: {integrity: sha512-pUY3cKH/Nm2yYrEmDlPR1mR7yszjGx4DrwPjQ702C4/D5CwHuZTgZdIdwPkRbcuhs7BAh2L5rg3CL5cbRiGTCQ==}
+
'@types/http-cache-semantics@4.0.4':
resolution: {integrity: sha512-1m0bIFVc7eJWyve9S0RnuRgcQqF/Xd5QsUZAZeQFr1Q3/p9JWoQQEqmVy+DPTNpGXwhgIetAoYF8JSc33q29QA==}
@@ -8604,6 +8616,10 @@ packages:
resolution: {integrity: sha512-ztqyC3kLto0e9WbNp0aeP+M3kTt+nbaIveGmUxAtZa+8iFgKLUOD4YKM5j+f3QD89bra7UeumolZHKuOXnTmeQ==}
engines: {node: '>=8'}
+ html-to-text@9.0.5:
+ resolution: {integrity: sha512-qY60FjREgVZL03vJU6IfMV4GDjGBIoOyvuFdpBDIX9yTlDw0TjxVBQp+P8NvpdIXNJvfWBTNul7fsAQJq2FNpg==}
+ engines: {node: '>=14'}
+
html-url-attributes@3.0.1:
resolution: {integrity: sha512-ol6UPyBWqsrO6EJySPz2O7ZSr856WDrEzM5zMqp+FJJLGMW35cLYmmZnl0vztAZxRUoNZJFTCohfjuIJ8I4QBQ==}
@@ -9407,6 +9423,9 @@ packages:
resolution: {integrity: sha512-RE2g0b5VGZsOCFOCgP7omTRYFqydmZkBwl5oNnQ1lDYC57uyO9KqNnNVxT7COSHTxrRCWVcAVOcbjk+tvh/rgQ==}
engines: {node: '>=0.10.0'}
+ leac@0.6.0:
+ resolution: {integrity: sha512-y+SqErxb8h7nE/fiEX07jsbuhrpO9lL8eca7/Y1nuWV2moNlXhyd59iDGcRf6moVyDMbmTNzL40SUyrFU/yDpg==}
+
leven@3.1.0:
resolution: {integrity: sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==}
engines: {node: '>=6'}
@@ -10959,6 +10978,9 @@ packages:
parse5@7.3.0:
resolution: {integrity: sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==}
+ parseley@0.12.1:
+ resolution: {integrity: sha512-e6qHKe3a9HWr0oMRVDTRhKce+bRO8VGQR3NyVwcjwrbhMmFCX9KszEV35+rn4AdilFAq9VPxP/Fe1wC9Qjd2lw==}
+
parseurl@1.3.3:
resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==}
engines: {node: '>= 0.8'}
@@ -11055,6 +11077,9 @@ packages:
resolution: {integrity: sha512-/Y3fcFrXEAsMjJXeL9J8+ZG9U01LbuWaYypvDW2ycW1jL269L3js3DVBjDJ0Up9Np1uqDXsDrRihHANhZOlwdQ==}
engines: {node: '>=20'}
+ peberminta@0.9.0:
+ resolution: {integrity: sha512-XIxfHpEuSJbITd1H3EeQwpcZbTLHc+VVr8ANI9t5sit565tsI4/xK3KWTUFE2e6QiangUkh3B0jihzmGnNrRsQ==}
+
picocolors@1.1.1:
resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==}
@@ -12655,6 +12680,9 @@ packages:
resolution: {integrity: sha512-vfD3pmTzGpufjScBh50YHKzEu2lxBWhVEHsNGoEXmCmn2hKGfeNLYMzCJpe8cD7gqX7TJluOVpBkAequ6dgMmA==}
engines: {node: '>=4'}
+ selderee@0.11.0:
+ resolution: {integrity: sha512-5TF+l7p4+OsnP8BCCvSyZiSPc4x4//p5uPwK8TCnVPJYRmU2aYKMpOXvw8zM5a5JvuuCGN1jmsMwuU2W02ukfA==}
+
select-hose@2.0.0:
resolution: {integrity: sha512-mEugaLK+YfkijB4fx0e6kImuJdCIt2LxCRcbEYPqRGCs4F2ogyfZU5IAZRdjCP8JPq2AtdNoC/Dux63d9Kiryg==}
@@ -20180,6 +20208,11 @@ snapshots:
component-type: 1.2.2
join-component: 1.1.0
+ '@selderee/plugin-htmlparser2@0.11.0':
+ dependencies:
+ domhandler: 5.0.3
+ selderee: 0.11.0
+
'@sideway/address@4.1.5':
dependencies:
'@hapi/hoek': 9.3.0
@@ -20929,6 +20962,8 @@ snapshots:
'@types/html-minifier-terser@6.1.0': {}
+ '@types/html-to-text@9.0.4': {}
+
'@types/http-cache-semantics@4.0.4': {}
'@types/http-errors@2.0.4': {}
@@ -24800,6 +24835,14 @@ snapshots:
html-tags@3.3.1: {}
+ html-to-text@9.0.5:
+ dependencies:
+ '@selderee/plugin-htmlparser2': 0.11.0
+ deepmerge: 4.3.1
+ dom-serializer: 2.0.0
+ htmlparser2: 8.0.2
+ selderee: 0.11.0
+
html-url-attributes@3.0.1: {}
html-void-elements@2.0.1: {}
@@ -25630,6 +25673,8 @@ snapshots:
lazy-cache@1.0.4: {}
+ leac@0.6.0: {}
+
leven@3.1.0: {}
levn@0.4.1:
@@ -27891,6 +27936,11 @@ snapshots:
dependencies:
entities: 6.0.0
+ parseley@0.12.1:
+ dependencies:
+ leac: 0.6.0
+ peberminta: 0.9.0
+
parseurl@1.3.3: {}
pascal-case@3.1.2:
@@ -27963,6 +28013,8 @@ snapshots:
optionalDependencies:
'@napi-rs/canvas': 0.1.70
+ peberminta@0.9.0: {}
+
picocolors@1.1.1: {}
picomatch@2.3.1: {}
@@ -29903,6 +29955,10 @@ snapshots:
extend-shallow: 2.0.1
kind-of: 6.0.3
+ selderee@0.11.0:
+ dependencies:
+ parseley: 0.12.1
+
select-hose@2.0.0: {}
selfsigned@2.4.1: