From be1b7f7e1c0cb3d905e13aa1a95e295b816cbdeb Mon Sep 17 00:00:00 2001 From: kamtschatka Date: Sat, 22 Jun 2024 18:52:40 +0200 Subject: feature: add support for PDF links. Fixes #28 (#216) * feature request: pdf support #28 Added a new sourceUrl column to the asset bookmarks Added transforming a link bookmark pointing at a pdf to an asset bookmark made sure the "View Original" link is also shown for asset bookmarks that have a sourceURL updated gitignore for IDEA * remove pdf parsing from the crawler * extract the http logic into its own function to avoid duplicating the post-processing actions (openai/index) * Add 5s timeout to the content type fetch --------- Co-authored-by: MohamedBassem --- .gitignore | 4 + .../dashboard/preview/BookmarkPreview.tsx | 16 +- apps/workers/crawlerWorker.ts | 220 +++-- packages/db/drizzle/0023_late_night_nurse.sql | 1 + packages/db/drizzle/meta/0023_snapshot.json | 1022 ++++++++++++++++++++ packages/db/drizzle/meta/_journal.json | 7 + packages/db/schema.ts | 63 +- packages/shared/assetdb.ts | 20 +- packages/shared/types/bookmarks.ts | 1 + packages/trpc/routers/bookmarks.ts | 2 + 10 files changed, 1263 insertions(+), 93 deletions(-) create mode 100644 packages/db/drizzle/0023_late_night_nurse.sql create mode 100644 packages/db/drizzle/meta/0023_snapshot.json diff --git a/.gitignore b/.gitignore index ad3a3468..eca8879e 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,7 @@ data # Turbo .turbo + +# Idea +.idea +*.iml \ No newline at end of file diff --git a/apps/web/components/dashboard/preview/BookmarkPreview.tsx b/apps/web/components/dashboard/preview/BookmarkPreview.tsx index be11b47b..6a1068af 100644 --- a/apps/web/components/dashboard/preview/BookmarkPreview.tsx +++ b/apps/web/components/dashboard/preview/BookmarkPreview.tsx @@ -65,6 +65,16 @@ function CreationTime({ createdAt }: { createdAt: Date }) { ); } +function getSourceUrl(bookmark: ZBookmark) { + if (bookmark.content.type === "link") { + return bookmark.content.url; + } + if (bookmark.content.type === "asset") { + return bookmark.content.sourceUrl; + } + return null; +} + export default function BookmarkPreview({ bookmarkId, initialData, @@ -112,6 +122,8 @@ export default function BookmarkPreview({ } } + const sourceUrl = getSourceUrl(bookmark); + return (
@@ -120,9 +132,9 @@ export default function BookmarkPreview({
- {bookmark.content.type == "link" && ( + {sourceUrl && ( View Original diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index 58f1aa85..eedb7b1e 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -1,5 +1,6 @@ import assert from "assert"; import * as dns from "dns"; +import * as path from "node:path"; import type { Job } from "bullmq"; import type { Browser } from "puppeteer"; import { Readability } from "@mozilla/readability"; @@ -26,8 +27,9 @@ import { withTimeout } from "utils"; import type { ZCrawlLinkRequest } from "@hoarder/shared/queues"; import { db } from "@hoarder/db"; -import { bookmarkLinks, bookmarks } from "@hoarder/db/schema"; +import { bookmarkAssets, bookmarkLinks, bookmarks } from "@hoarder/db/schema"; import { + ASSET_TYPES, deleteAsset, newAssetId, saveAsset, @@ -68,7 +70,7 @@ async function startBrowserInstance() { logger.info( `[Crawler] Connecting to existing browser websocket address: ${serverConfig.crawler.browserWebSocketUrl}`, ); - return await puppeteer.connect({ + return puppeteer.connect({ browserWSEndpoint: serverConfig.crawler.browserWebSocketUrl, defaultViewport, }); @@ -83,13 +85,13 @@ async function startBrowserInstance() { logger.info( `[Crawler] Successfully resolved IP address, new address: ${webUrl.toString()}`, ); - return await puppeteer.connect({ + return puppeteer.connect({ browserURL: webUrl.toString(), defaultViewport, }); } else { logger.info(`Launching a new browser instance`); - return await puppeteer.launch({ + return puppeteer.launch({ headless: serverConfig.crawler.headlessBrowser, defaultViewport, }); @@ -271,7 +273,11 @@ async function crawlPage(jobId: string, url: string) { logger.info( `[Crawler][${jobId}] Finished capturing page content and a screenshot. FullPageScreenshot: ${serverConfig.crawler.fullPageScreenshot}`, ); - return { htmlContent, screenshot, url: page.url() }; + return { + htmlContent, + screenshot, + url: page.url(), + }; } finally { await context.close(); } @@ -337,22 +343,17 @@ async function storeScreenshot( return assetId; } -async function downloadAndStoreImage( +async function downloadAndStoreFile( url: string, userId: string, jobId: string, + fileType: string, ) { - if (!serverConfig.crawler.downloadBannerImage) { - logger.info( - `[Crawler][${jobId}] Skipping downloading the image as per the config.`, - ); - return null; - } try { - logger.info(`[Crawler][${jobId}] Downloading image from "${url}"`); + logger.info(`[Crawler][${jobId}] Downloading ${fileType} from "${url}"`); const response = await fetch(url); if (!response.ok) { - throw new Error(`Failed to download image: ${response.status}`); + throw new Error(`Failed to download ${fileType}: ${response.status}`); } const buffer = await response.arrayBuffer(); const assetId = newAssetId(); @@ -370,18 +371,32 @@ async function downloadAndStoreImage( }); logger.info( - `[Crawler][${jobId}] Downloaded the image as assetId: ${assetId}`, + `[Crawler][${jobId}] Downloaded ${fileType} as assetId: ${assetId}`, ); return assetId; } catch (e) { logger.error( - `[Crawler][${jobId}] Failed to download and store image: ${e}`, + `[Crawler][${jobId}] Failed to download and store ${fileType}: ${e}`, ); return null; } } +async function downloadAndStoreImage( + url: string, + userId: string, + jobId: string, +) { + if (!serverConfig.crawler.downloadBannerImage) { + logger.info( + `[Crawler][${jobId}] Skipping downloading the image as per the config.`, + ); + return null; + } + return downloadAndStoreFile(url, userId, jobId, "image"); +} + async function archiveWebpage( html: string, url: string, @@ -415,31 +430,70 @@ async function archiveWebpage( return assetId; } -async function runCrawler(job: Job) { - const jobId = job.id ?? "unknown"; - - const request = zCrawlLinkRequestSchema.safeParse(job.data); - if (!request.success) { +async function getContentType( + url: string, + jobId: string, +): Promise { + try { + logger.info( + `[Crawler][${jobId}] Attempting to determine the content-type for the url ${url}`, + ); + const response = await fetch(url, { + method: "HEAD", + signal: AbortSignal.timeout(5000), + }); + const contentType = response.headers.get("content-type"); + logger.info( + `[Crawler][${jobId}] Content-type for the url ${url} is "${contentType}"`, + ); + return contentType; + } catch (e) { logger.error( - `[Crawler][${jobId}] Got malformed job request: ${request.error.toString()}`, + `[Crawler][${jobId}] Failed to determine the content-type for the url ${url}: ${e}`, ); - return; + return null; } +} - const { bookmarkId } = request.data; - const { - url, - userId, - screenshotAssetId: oldScreenshotAssetId, - imageAssetId: oldImageAssetId, - fullPageArchiveAssetId: oldFullPageArchiveAssetId, - } = await getBookmarkDetails(bookmarkId); - - logger.info( - `[Crawler][${jobId}] Will crawl "${url}" for link with id "${bookmarkId}"`, - ); - validateUrl(url); +/** + * Downloads the pdf asset from the URL and transforms the linkBookmark to an assetBookmark + * @param url the url the user provided + * @param userId the id of the user + * @param jobId the id of the job for logging + * @param bookmarkId the id of the bookmark + */ +async function handlePDFAsAssetBookmark( + url: string, + userId: string, + jobId: string, + bookmarkId: string, +) { + const assetId = await downloadAndStoreFile(url, userId, jobId, "pdf"); + if (!assetId) { + return; + } + await db.transaction(async (trx) => { + await trx.insert(bookmarkAssets).values({ + id: bookmarkId, + assetType: "pdf", + assetId, + content: null, + fileName: path.basename(new URL(url).pathname), + sourceUrl: url, + }); + await trx.delete(bookmarkLinks).where(eq(bookmarkLinks.id, bookmarkId)); + }); +} +async function crawlAndParseUrl( + url: string, + userId: string, + jobId: string, + bookmarkId: string, + oldScreenshotAssetId: string | null, + oldImageAssetId: string | null, + oldFullPageArchiveAssetId: string | null, +) { const { htmlContent, screenshot, @@ -482,6 +536,78 @@ async function runCrawler(job: Job) { : {}, ]); + return async () => { + if (serverConfig.crawler.fullPageArchive) { + const fullPageArchiveAssetId = await archiveWebpage( + htmlContent, + browserUrl, + userId, + jobId, + ); + + await db + .update(bookmarkLinks) + .set({ + fullPageArchiveAssetId, + }) + .where(eq(bookmarkLinks.id, bookmarkId)); + + if (oldFullPageArchiveAssetId) { + deleteAsset({ userId, assetId: oldFullPageArchiveAssetId }).catch( + () => ({}), + ); + } + } + }; +} + +async function runCrawler(job: Job) { + const jobId = job.id ?? "unknown"; + + const request = zCrawlLinkRequestSchema.safeParse(job.data); + if (!request.success) { + logger.error( + `[Crawler][${jobId}] Got malformed job request: ${request.error.toString()}`, + ); + return; + } + + const { bookmarkId } = request.data; + const { + url, + userId, + screenshotAssetId: oldScreenshotAssetId, + imageAssetId: oldImageAssetId, + fullPageArchiveAssetId: oldFullPageArchiveAssetId, + } = await getBookmarkDetails(bookmarkId); + + logger.info( + `[Crawler][${jobId}] Will crawl "${url}" for link with id "${bookmarkId}"`, + ); + validateUrl(url); + + const contentType = await getContentType(url, jobId); + + // Link bookmarks get transformed into asset bookmarks if they point to a pdf asset instead of a webpage + const isPdf = contentType === ASSET_TYPES.APPLICATION_PDF; + + let archivalLogic: () => Promise = () => { + return Promise.resolve(); + }; + if (isPdf) { + await handlePDFAsAssetBookmark(url, userId, jobId, bookmarkId); + } else { + archivalLogic = await crawlAndParseUrl( + url, + userId, + jobId, + bookmarkId, + oldScreenshotAssetId, + oldImageAssetId, + oldFullPageArchiveAssetId, + ); + } + // Enqueue openai job (if not set, assume it's true for backward compatibility) if (job.data.runInference !== false) { OpenAIQueue.add("openai", { @@ -493,25 +619,5 @@ async function runCrawler(job: Job) { triggerSearchReindex(bookmarkId); // Do the archival as a separate last step as it has the potential for failure - if (serverConfig.crawler.fullPageArchive) { - const fullPageArchiveAssetId = await archiveWebpage( - htmlContent, - browserUrl, - userId, - jobId, - ); - - await db - .update(bookmarkLinks) - .set({ - fullPageArchiveAssetId, - }) - .where(eq(bookmarkLinks.id, bookmarkId)); - - if (oldFullPageArchiveAssetId) { - deleteAsset({ userId, assetId: oldFullPageArchiveAssetId }).catch( - () => ({}), - ); - } - } + await archivalLogic(); } diff --git a/packages/db/drizzle/0023_late_night_nurse.sql b/packages/db/drizzle/0023_late_night_nurse.sql new file mode 100644 index 00000000..6740f4e5 --- /dev/null +++ b/packages/db/drizzle/0023_late_night_nurse.sql @@ -0,0 +1 @@ +ALTER TABLE bookmarkAssets ADD `sourceUrl` text; \ No newline at end of file diff --git a/packages/db/drizzle/meta/0023_snapshot.json b/packages/db/drizzle/meta/0023_snapshot.json new file mode 100644 index 00000000..8cb65488 --- /dev/null +++ b/packages/db/drizzle/meta/0023_snapshot.json @@ -0,0 +1,1022 @@ +{ + "version": "5", + "dialect": "sqlite", + "id": "d33de747-6acb-4160-a5ec-a4a7adee3023", + "prevId": "f2897961-faba-4fc4-9d82-85e7cf316218", + "tables": { + "account": { + "name": "account", + "columns": { + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "provider": { + "name": "provider", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "providerAccountId": { + "name": "providerAccountId", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "refresh_token": { + "name": "refresh_token", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "access_token": { + "name": "access_token", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "expires_at": { + "name": "expires_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "token_type": { + "name": "token_type", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "scope": { + "name": "scope", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "id_token": { + "name": "id_token", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "session_state": { + "name": "session_state", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "account_userId_user_id_fk": { + "name": "account_userId_user_id_fk", + "tableFrom": "account", + "tableTo": "user", + "columnsFrom": [ + "userId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "account_provider_providerAccountId_pk": { + "columns": [ + "provider", + "providerAccountId" + ], + "name": "account_provider_providerAccountId_pk" + } + }, + "uniqueConstraints": {} + }, + "apiKey": { + "name": "apiKey", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "createdAt": { + "name": "createdAt", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "keyId": { + "name": "keyId", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "keyHash": { + "name": "keyHash", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": { + "apiKey_keyId_unique": { + "name": "apiKey_keyId_unique", + "columns": [ + "keyId" + ], + "isUnique": true + }, + "apiKey_name_userId_unique": { + "name": "apiKey_name_userId_unique", + "columns": [ + "name", + "userId" + ], + "isUnique": true + } + }, + "foreignKeys": { + "apiKey_userId_user_id_fk": { + "name": "apiKey_userId_user_id_fk", + "tableFrom": "apiKey", + "tableTo": "user", + "columnsFrom": [ + "userId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "bookmarkAssets": { + "name": "bookmarkAssets", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "assetType": { + "name": "assetType", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "assetId": { + "name": "assetId", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "metadata": { + "name": "metadata", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "fileName": { + "name": "fileName", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "sourceUrl": { + "name": "sourceUrl", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "bookmarkAssets_id_bookmarks_id_fk": { + "name": "bookmarkAssets_id_bookmarks_id_fk", + "tableFrom": "bookmarkAssets", + "tableTo": "bookmarks", + "columnsFrom": [ + "id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "bookmarkLinks": { + "name": "bookmarkLinks", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "imageUrl": { + "name": "imageUrl", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "favicon": { + "name": "favicon", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "htmlContent": { + "name": "htmlContent", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "screenshotAssetId": { + "name": "screenshotAssetId", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "fullPageArchiveAssetId": { + "name": "fullPageArchiveAssetId", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "imageAssetId": { + "name": "imageAssetId", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "crawledAt": { + "name": "crawledAt", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "crawlStatus": { + "name": "crawlStatus", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": "'pending'" + } + }, + "indexes": { + "bookmarkLinks_url_idx": { + "name": "bookmarkLinks_url_idx", + "columns": [ + "url" + ], + "isUnique": false + } + }, + "foreignKeys": { + "bookmarkLinks_id_bookmarks_id_fk": { + "name": "bookmarkLinks_id_bookmarks_id_fk", + "tableFrom": "bookmarkLinks", + "tableTo": "bookmarks", + "columnsFrom": [ + "id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "bookmarkLists": { + "name": "bookmarkLists", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "icon": { + "name": "icon", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "createdAt": { + "name": "createdAt", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "parentId": { + "name": "parentId", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": { + "bookmarkLists_userId_idx": { + "name": "bookmarkLists_userId_idx", + "columns": [ + "userId" + ], + "isUnique": false + } + }, + "foreignKeys": { + "bookmarkLists_userId_user_id_fk": { + "name": "bookmarkLists_userId_user_id_fk", + "tableFrom": "bookmarkLists", + "tableTo": "user", + "columnsFrom": [ + "userId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "bookmarkLists_parentId_bookmarkLists_id_fk": { + "name": "bookmarkLists_parentId_bookmarkLists_id_fk", + "tableFrom": "bookmarkLists", + "tableTo": "bookmarkLists", + "columnsFrom": [ + "parentId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "set null", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "bookmarkTags": { + "name": "bookmarkTags", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "createdAt": { + "name": "createdAt", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": { + "bookmarkTags_name_idx": { + "name": "bookmarkTags_name_idx", + "columns": [ + "name" + ], + "isUnique": false + }, + "bookmarkTags_userId_idx": { + "name": "bookmarkTags_userId_idx", + "columns": [ + "userId" + ], + "isUnique": false + }, + "bookmarkTags_userId_name_unique": { + "name": "bookmarkTags_userId_name_unique", + "columns": [ + "userId", + "name" + ], + "isUnique": true + } + }, + "foreignKeys": { + "bookmarkTags_userId_user_id_fk": { + "name": "bookmarkTags_userId_user_id_fk", + "tableFrom": "bookmarkTags", + "tableTo": "user", + "columnsFrom": [ + "userId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "bookmarkTexts": { + "name": "bookmarkTexts", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "text": { + "name": "text", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "bookmarkTexts_id_bookmarks_id_fk": { + "name": "bookmarkTexts_id_bookmarks_id_fk", + "tableFrom": "bookmarkTexts", + "tableTo": "bookmarks", + "columnsFrom": [ + "id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "bookmarks": { + "name": "bookmarks", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "createdAt": { + "name": "createdAt", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "archived": { + "name": "archived", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": false + }, + "favourited": { + "name": "favourited", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": false + }, + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "taggingStatus": { + "name": "taggingStatus", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": "'pending'" + }, + "note": { + "name": "note", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": { + "bookmarks_userId_idx": { + "name": "bookmarks_userId_idx", + "columns": [ + "userId" + ], + "isUnique": false + }, + "bookmarks_archived_idx": { + "name": "bookmarks_archived_idx", + "columns": [ + "archived" + ], + "isUnique": false + }, + "bookmarks_favourited_idx": { + "name": "bookmarks_favourited_idx", + "columns": [ + "favourited" + ], + "isUnique": false + }, + "bookmarks_createdAt_idx": { + "name": "bookmarks_createdAt_idx", + "columns": [ + "createdAt" + ], + "isUnique": false + } + }, + "foreignKeys": { + "bookmarks_userId_user_id_fk": { + "name": "bookmarks_userId_user_id_fk", + "tableFrom": "bookmarks", + "tableTo": "user", + "columnsFrom": [ + "userId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "bookmarksInLists": { + "name": "bookmarksInLists", + "columns": { + "bookmarkId": { + "name": "bookmarkId", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "listId": { + "name": "listId", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "addedAt": { + "name": "addedAt", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": { + "bookmarksInLists_bookmarkId_idx": { + "name": "bookmarksInLists_bookmarkId_idx", + "columns": [ + "bookmarkId" + ], + "isUnique": false + }, + "bookmarksInLists_listId_idx": { + "name": "bookmarksInLists_listId_idx", + "columns": [ + "listId" + ], + "isUnique": false + } + }, + "foreignKeys": { + "bookmarksInLists_bookmarkId_bookmarks_id_fk": { + "name": "bookmarksInLists_bookmarkId_bookmarks_id_fk", + "tableFrom": "bookmarksInLists", + "tableTo": "bookmarks", + "columnsFrom": [ + "bookmarkId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "bookmarksInLists_listId_bookmarkLists_id_fk": { + "name": "bookmarksInLists_listId_bookmarkLists_id_fk", + "tableFrom": "bookmarksInLists", + "tableTo": "bookmarkLists", + "columnsFrom": [ + "listId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "bookmarksInLists_bookmarkId_listId_pk": { + "columns": [ + "bookmarkId", + "listId" + ], + "name": "bookmarksInLists_bookmarkId_listId_pk" + } + }, + "uniqueConstraints": {} + }, + "session": { + "name": "session", + "columns": { + "sessionToken": { + "name": "sessionToken", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "expires": { + "name": "expires", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "session_userId_user_id_fk": { + "name": "session_userId_user_id_fk", + "tableFrom": "session", + "tableTo": "user", + "columnsFrom": [ + "userId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "tagsOnBookmarks": { + "name": "tagsOnBookmarks", + "columns": { + "bookmarkId": { + "name": "bookmarkId", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "tagId": { + "name": "tagId", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "attachedAt": { + "name": "attachedAt", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "attachedBy": { + "name": "attachedBy", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": { + "tagsOnBookmarks_tagId_idx": { + "name": "tagsOnBookmarks_tagId_idx", + "columns": [ + "bookmarkId" + ], + "isUnique": false + }, + "tagsOnBookmarks_bookmarkId_idx": { + "name": "tagsOnBookmarks_bookmarkId_idx", + "columns": [ + "bookmarkId" + ], + "isUnique": false + } + }, + "foreignKeys": { + "tagsOnBookmarks_bookmarkId_bookmarks_id_fk": { + "name": "tagsOnBookmarks_bookmarkId_bookmarks_id_fk", + "tableFrom": "tagsOnBookmarks", + "tableTo": "bookmarks", + "columnsFrom": [ + "bookmarkId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "tagsOnBookmarks_tagId_bookmarkTags_id_fk": { + "name": "tagsOnBookmarks_tagId_bookmarkTags_id_fk", + "tableFrom": "tagsOnBookmarks", + "tableTo": "bookmarkTags", + "columnsFrom": [ + "tagId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "tagsOnBookmarks_bookmarkId_tagId_pk": { + "columns": [ + "bookmarkId", + "tagId" + ], + "name": "tagsOnBookmarks_bookmarkId_tagId_pk" + } + }, + "uniqueConstraints": {} + }, + "user": { + "name": "user", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "emailVerified": { + "name": "emailVerified", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "image": { + "name": "image", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "password": { + "name": "password", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "role": { + "name": "role", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": "'user'" + } + }, + "indexes": { + "user_email_unique": { + "name": "user_email_unique", + "columns": [ + "email" + ], + "isUnique": true + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "verificationToken": { + "name": "verificationToken", + "columns": { + "identifier": { + "name": "identifier", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "token": { + "name": "token", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "expires": { + "name": "expires", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": { + "verificationToken_identifier_token_pk": { + "columns": [ + "identifier", + "token" + ], + "name": "verificationToken_identifier_token_pk" + } + }, + "uniqueConstraints": {} + } + }, + "enums": {}, + "_meta": { + "schemas": {}, + "tables": {}, + "columns": {} + } +} \ No newline at end of file diff --git a/packages/db/drizzle/meta/_journal.json b/packages/db/drizzle/meta/_journal.json index 29fa84f0..75cbfc09 100644 --- a/packages/db/drizzle/meta/_journal.json +++ b/packages/db/drizzle/meta/_journal.json @@ -162,6 +162,13 @@ "when": 1716679762529, "tag": "0022_tough_nextwave", "breakpoints": true + }, + { + "idx": 23, + "version": "5", + "when": 1717960986361, + "tag": "0023_late_night_nurse", + "breakpoints": true } ] } \ No newline at end of file diff --git a/packages/db/schema.ts b/packages/db/schema.ts index 3fd7897f..07f1686e 100644 --- a/packages/db/schema.ts +++ b/packages/db/schema.ts @@ -126,33 +126,37 @@ export const bookmarks = sqliteTable( }), ); -export const bookmarkLinks = sqliteTable("bookmarkLinks", { - id: text("id") - .notNull() - .primaryKey() - .$defaultFn(() => createId()) - .references(() => bookmarks.id, { onDelete: "cascade" }), - url: text("url").notNull(), +export const bookmarkLinks = sqliteTable( + "bookmarkLinks", + { + id: text("id") + .notNull() + .primaryKey() + .$defaultFn(() => createId()) + .references(() => bookmarks.id, { onDelete: "cascade" }), + url: text("url").notNull(), - // Crawled info - title: text("title"), - description: text("description"), - imageUrl: text("imageUrl"), - favicon: text("favicon"), - content: text("content"), - htmlContent: text("htmlContent"), - screenshotAssetId: text("screenshotAssetId"), - fullPageArchiveAssetId: text("fullPageArchiveAssetId"), - imageAssetId: text("imageAssetId"), - crawledAt: integer("crawledAt", { mode: "timestamp" }), - crawlStatus: text("crawlStatus", { - enum: ["pending", "failure", "success"], - }).default("pending"), -}, (bl) => { - return { - urlIdx: index("bookmarkLinks_url_idx").on(bl.url), - }; -}); + // Crawled info + title: text("title"), + description: text("description"), + imageUrl: text("imageUrl"), + favicon: text("favicon"), + content: text("content"), + htmlContent: text("htmlContent"), + screenshotAssetId: text("screenshotAssetId"), + fullPageArchiveAssetId: text("fullPageArchiveAssetId"), + imageAssetId: text("imageAssetId"), + crawledAt: integer("crawledAt", { mode: "timestamp" }), + crawlStatus: text("crawlStatus", { + enum: ["pending", "failure", "success"], + }).default("pending"), + }, + (bl) => { + return { + urlIdx: index("bookmarkLinks_url_idx").on(bl.url), + }; + }, +); export const bookmarkTexts = sqliteTable("bookmarkTexts", { id: text("id") @@ -174,6 +178,7 @@ export const bookmarkAssets = sqliteTable("bookmarkAssets", { content: text("content"), metadata: text("metadata"), fileName: text("fileName"), + sourceUrl: text("sourceUrl"), }); export const bookmarkTags = sqliteTable( @@ -231,8 +236,10 @@ export const bookmarkLists = sqliteTable( userId: text("userId") .notNull() .references(() => users.id, { onDelete: "cascade" }), - parentId: text("parentId") - .references((): AnySQLiteColumn => bookmarkLists.id, { onDelete: "set null" }), + parentId: text("parentId").references( + (): AnySQLiteColumn => bookmarkLists.id, + { onDelete: "set null" }, + ), }, (bl) => ({ userIdIdx: index("bookmarkLists_userId_idx").on(bl.userId), diff --git a/packages/shared/assetdb.ts b/packages/shared/assetdb.ts index 4cea06b0..fb625af8 100644 --- a/packages/shared/assetdb.ts +++ b/packages/shared/assetdb.ts @@ -6,18 +6,26 @@ import serverConfig from "./config"; const ROOT_PATH = path.join(serverConfig.dataDir, "assets"); +export const enum ASSET_TYPES { + IMAGE_JPEG = "image/jpeg", + IMAGE_PNG = "image/png", + IMAGE_WEBP = "image/webp", + APPLICATION_PDF = "application/pdf", + TEXT_HTML = "text/html", +} + // The assets that we allow the users to upload -export const SUPPORTED_UPLOAD_ASSET_TYPES = new Set([ - "image/jpeg", - "image/png", - "image/webp", - "application/pdf", +export const SUPPORTED_UPLOAD_ASSET_TYPES: Set = new Set([ + ASSET_TYPES.IMAGE_JPEG, + ASSET_TYPES.IMAGE_PNG, + ASSET_TYPES.IMAGE_WEBP, + ASSET_TYPES.APPLICATION_PDF, ]); // The assets that we support saving in the asset db export const SUPPORTED_ASSET_TYPES = new Set([ ...SUPPORTED_UPLOAD_ASSET_TYPES, - "text/html", + ASSET_TYPES.TEXT_HTML, ]); function getAssetDir(userId: string, assetId: string) { diff --git a/packages/shared/types/bookmarks.ts b/packages/shared/types/bookmarks.ts index 06cd632e..c9e3e1a5 100644 --- a/packages/shared/types/bookmarks.ts +++ b/packages/shared/types/bookmarks.ts @@ -30,6 +30,7 @@ export const zBookmarkedAssetSchema = z.object({ assetType: z.enum(["image", "pdf"]), assetId: z.string(), fileName: z.string().nullish(), + sourceUrl: z.string().nullish(), }); export type ZBookmarkedAsset = z.infer; diff --git a/packages/trpc/routers/bookmarks.ts b/packages/trpc/routers/bookmarks.ts index 57463177..e083f83c 100644 --- a/packages/trpc/routers/bookmarks.ts +++ b/packages/trpc/routers/bookmarks.ts @@ -172,6 +172,7 @@ function toZodSchema(bookmark: BookmarkQueryReturnType): ZBookmark { assetType: asset.assetType, assetId: asset.assetId, fileName: asset.fileName, + sourceUrl: asset.sourceUrl, }; } else { content = { type: "unknown" }; @@ -257,6 +258,7 @@ export const bookmarksAppRouter = router({ content: null, metadata: null, fileName: input.fileName ?? null, + sourceUrl: null, }) .returning(); content = { -- cgit v1.2.3-70-g09d2