diff options
| -rw-r--r-- | .gitignore | 4 | ||||
| -rw-r--r-- | apps/web/components/dashboard/preview/BookmarkPreview.tsx | 16 | ||||
| -rw-r--r-- | apps/workers/crawlerWorker.ts | 220 | ||||
| -rw-r--r-- | packages/db/drizzle/0023_late_night_nurse.sql | 1 | ||||
| -rw-r--r-- | packages/db/drizzle/meta/0023_snapshot.json | 1022 | ||||
| -rw-r--r-- | packages/db/drizzle/meta/_journal.json | 7 | ||||
| -rw-r--r-- | packages/db/schema.ts | 63 | ||||
| -rw-r--r-- | packages/shared/assetdb.ts | 20 | ||||
| -rw-r--r-- | packages/shared/types/bookmarks.ts | 1 | ||||
| -rw-r--r-- | packages/trpc/routers/bookmarks.ts | 2 |
10 files changed, 1263 insertions, 93 deletions
@@ -49,3 +49,7 @@ data # Turbo .turbo + +# Idea +.idea +*.iml
\ No newline at end of file diff --git a/apps/web/components/dashboard/preview/BookmarkPreview.tsx b/apps/web/components/dashboard/preview/BookmarkPreview.tsx index be11b47b..6a1068af 100644 --- a/apps/web/components/dashboard/preview/BookmarkPreview.tsx +++ b/apps/web/components/dashboard/preview/BookmarkPreview.tsx @@ -65,6 +65,16 @@ function CreationTime({ createdAt }: { createdAt: Date }) { ); } +function getSourceUrl(bookmark: ZBookmark) { + if (bookmark.content.type === "link") { + return bookmark.content.url; + } + if (bookmark.content.type === "asset") { + return bookmark.content.sourceUrl; + } + return null; +} + export default function BookmarkPreview({ bookmarkId, initialData, @@ -112,6 +122,8 @@ export default function BookmarkPreview({ } } + const sourceUrl = getSourceUrl(bookmark); + return ( <div className="grid h-full grid-rows-3 gap-2 overflow-hidden bg-background lg:grid-cols-3 lg:grid-rows-none"> <div className="row-span-2 h-full w-full overflow-auto p-2 md:col-span-2 lg:row-auto"> @@ -120,9 +132,9 @@ export default function BookmarkPreview({ <div className="lg:col-span1 row-span-1 flex flex-col gap-4 overflow-auto bg-accent p-4 lg:row-auto"> <div className="flex w-full flex-col items-center justify-center gap-y-2"> <EditableTitle bookmark={bookmark} /> - {bookmark.content.type == "link" && ( + {sourceUrl && ( <Link - href={bookmark.content.url} + href={sourceUrl} className="flex items-center gap-2 text-gray-400" > <span>View Original</span> diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index 58f1aa85..eedb7b1e 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -1,5 +1,6 @@ import assert from "assert"; import * as dns from "dns"; +import * as path from "node:path"; import type { Job } from "bullmq"; import type { Browser } from "puppeteer"; import { Readability } from "@mozilla/readability"; @@ -26,8 +27,9 @@ import { withTimeout } from "utils"; import type { ZCrawlLinkRequest } from "@hoarder/shared/queues"; import { db } from "@hoarder/db"; -import { bookmarkLinks, bookmarks } from "@hoarder/db/schema"; +import { bookmarkAssets, bookmarkLinks, bookmarks } from "@hoarder/db/schema"; import { + ASSET_TYPES, deleteAsset, newAssetId, saveAsset, @@ -68,7 +70,7 @@ async function startBrowserInstance() { logger.info( `[Crawler] Connecting to existing browser websocket address: ${serverConfig.crawler.browserWebSocketUrl}`, ); - return await puppeteer.connect({ + return puppeteer.connect({ browserWSEndpoint: serverConfig.crawler.browserWebSocketUrl, defaultViewport, }); @@ -83,13 +85,13 @@ async function startBrowserInstance() { logger.info( `[Crawler] Successfully resolved IP address, new address: ${webUrl.toString()}`, ); - return await puppeteer.connect({ + return puppeteer.connect({ browserURL: webUrl.toString(), defaultViewport, }); } else { logger.info(`Launching a new browser instance`); - return await puppeteer.launch({ + return puppeteer.launch({ headless: serverConfig.crawler.headlessBrowser, defaultViewport, }); @@ -271,7 +273,11 @@ async function crawlPage(jobId: string, url: string) { logger.info( `[Crawler][${jobId}] Finished capturing page content and a screenshot. FullPageScreenshot: ${serverConfig.crawler.fullPageScreenshot}`, ); - return { htmlContent, screenshot, url: page.url() }; + return { + htmlContent, + screenshot, + url: page.url(), + }; } finally { await context.close(); } @@ -337,22 +343,17 @@ async function storeScreenshot( return assetId; } -async function downloadAndStoreImage( +async function downloadAndStoreFile( url: string, userId: string, jobId: string, + fileType: string, ) { - if (!serverConfig.crawler.downloadBannerImage) { - logger.info( - `[Crawler][${jobId}] Skipping downloading the image as per the config.`, - ); - return null; - } try { - logger.info(`[Crawler][${jobId}] Downloading image from "${url}"`); + logger.info(`[Crawler][${jobId}] Downloading ${fileType} from "${url}"`); const response = await fetch(url); if (!response.ok) { - throw new Error(`Failed to download image: ${response.status}`); + throw new Error(`Failed to download ${fileType}: ${response.status}`); } const buffer = await response.arrayBuffer(); const assetId = newAssetId(); @@ -370,18 +371,32 @@ async function downloadAndStoreImage( }); logger.info( - `[Crawler][${jobId}] Downloaded the image as assetId: ${assetId}`, + `[Crawler][${jobId}] Downloaded ${fileType} as assetId: ${assetId}`, ); return assetId; } catch (e) { logger.error( - `[Crawler][${jobId}] Failed to download and store image: ${e}`, + `[Crawler][${jobId}] Failed to download and store ${fileType}: ${e}`, ); return null; } } +async function downloadAndStoreImage( + url: string, + userId: string, + jobId: string, +) { + if (!serverConfig.crawler.downloadBannerImage) { + logger.info( + `[Crawler][${jobId}] Skipping downloading the image as per the config.`, + ); + return null; + } + return downloadAndStoreFile(url, userId, jobId, "image"); +} + async function archiveWebpage( html: string, url: string, @@ -415,31 +430,70 @@ async function archiveWebpage( return assetId; } -async function runCrawler(job: Job<ZCrawlLinkRequest, void>) { - const jobId = job.id ?? "unknown"; - - const request = zCrawlLinkRequestSchema.safeParse(job.data); - if (!request.success) { +async function getContentType( + url: string, + jobId: string, +): Promise<string | null> { + try { + logger.info( + `[Crawler][${jobId}] Attempting to determine the content-type for the url ${url}`, + ); + const response = await fetch(url, { + method: "HEAD", + signal: AbortSignal.timeout(5000), + }); + const contentType = response.headers.get("content-type"); + logger.info( + `[Crawler][${jobId}] Content-type for the url ${url} is "${contentType}"`, + ); + return contentType; + } catch (e) { logger.error( - `[Crawler][${jobId}] Got malformed job request: ${request.error.toString()}`, + `[Crawler][${jobId}] Failed to determine the content-type for the url ${url}: ${e}`, ); - return; + return null; } +} - const { bookmarkId } = request.data; - const { - url, - userId, - screenshotAssetId: oldScreenshotAssetId, - imageAssetId: oldImageAssetId, - fullPageArchiveAssetId: oldFullPageArchiveAssetId, - } = await getBookmarkDetails(bookmarkId); - - logger.info( - `[Crawler][${jobId}] Will crawl "${url}" for link with id "${bookmarkId}"`, - ); - validateUrl(url); +/** + * Downloads the pdf asset from the URL and transforms the linkBookmark to an assetBookmark + * @param url the url the user provided + * @param userId the id of the user + * @param jobId the id of the job for logging + * @param bookmarkId the id of the bookmark + */ +async function handlePDFAsAssetBookmark( + url: string, + userId: string, + jobId: string, + bookmarkId: string, +) { + const assetId = await downloadAndStoreFile(url, userId, jobId, "pdf"); + if (!assetId) { + return; + } + await db.transaction(async (trx) => { + await trx.insert(bookmarkAssets).values({ + id: bookmarkId, + assetType: "pdf", + assetId, + content: null, + fileName: path.basename(new URL(url).pathname), + sourceUrl: url, + }); + await trx.delete(bookmarkLinks).where(eq(bookmarkLinks.id, bookmarkId)); + }); +} +async function crawlAndParseUrl( + url: string, + userId: string, + jobId: string, + bookmarkId: string, + oldScreenshotAssetId: string | null, + oldImageAssetId: string | null, + oldFullPageArchiveAssetId: string | null, +) { const { htmlContent, screenshot, @@ -482,6 +536,78 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) { : {}, ]); + return async () => { + if (serverConfig.crawler.fullPageArchive) { + const fullPageArchiveAssetId = await archiveWebpage( + htmlContent, + browserUrl, + userId, + jobId, + ); + + await db + .update(bookmarkLinks) + .set({ + fullPageArchiveAssetId, + }) + .where(eq(bookmarkLinks.id, bookmarkId)); + + if (oldFullPageArchiveAssetId) { + deleteAsset({ userId, assetId: oldFullPageArchiveAssetId }).catch( + () => ({}), + ); + } + } + }; +} + +async function runCrawler(job: Job<ZCrawlLinkRequest, void>) { + const jobId = job.id ?? "unknown"; + + const request = zCrawlLinkRequestSchema.safeParse(job.data); + if (!request.success) { + logger.error( + `[Crawler][${jobId}] Got malformed job request: ${request.error.toString()}`, + ); + return; + } + + const { bookmarkId } = request.data; + const { + url, + userId, + screenshotAssetId: oldScreenshotAssetId, + imageAssetId: oldImageAssetId, + fullPageArchiveAssetId: oldFullPageArchiveAssetId, + } = await getBookmarkDetails(bookmarkId); + + logger.info( + `[Crawler][${jobId}] Will crawl "${url}" for link with id "${bookmarkId}"`, + ); + validateUrl(url); + + const contentType = await getContentType(url, jobId); + + // Link bookmarks get transformed into asset bookmarks if they point to a pdf asset instead of a webpage + const isPdf = contentType === ASSET_TYPES.APPLICATION_PDF; + + let archivalLogic: () => Promise<void> = () => { + return Promise.resolve(); + }; + if (isPdf) { + await handlePDFAsAssetBookmark(url, userId, jobId, bookmarkId); + } else { + archivalLogic = await crawlAndParseUrl( + url, + userId, + jobId, + bookmarkId, + oldScreenshotAssetId, + oldImageAssetId, + oldFullPageArchiveAssetId, + ); + } + // Enqueue openai job (if not set, assume it's true for backward compatibility) if (job.data.runInference !== false) { OpenAIQueue.add("openai", { @@ -493,25 +619,5 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) { triggerSearchReindex(bookmarkId); // Do the archival as a separate last step as it has the potential for failure - if (serverConfig.crawler.fullPageArchive) { - const fullPageArchiveAssetId = await archiveWebpage( - htmlContent, - browserUrl, - userId, - jobId, - ); - - await db - .update(bookmarkLinks) - .set({ - fullPageArchiveAssetId, - }) - .where(eq(bookmarkLinks.id, bookmarkId)); - - if (oldFullPageArchiveAssetId) { - deleteAsset({ userId, assetId: oldFullPageArchiveAssetId }).catch( - () => ({}), - ); - } - } + await archivalLogic(); } diff --git a/packages/db/drizzle/0023_late_night_nurse.sql b/packages/db/drizzle/0023_late_night_nurse.sql new file mode 100644 index 00000000..6740f4e5 --- /dev/null +++ b/packages/db/drizzle/0023_late_night_nurse.sql @@ -0,0 +1 @@ +ALTER TABLE bookmarkAssets ADD `sourceUrl` text;
\ No newline at end of file diff --git a/packages/db/drizzle/meta/0023_snapshot.json b/packages/db/drizzle/meta/0023_snapshot.json new file mode 100644 index 00000000..8cb65488 --- /dev/null +++ b/packages/db/drizzle/meta/0023_snapshot.json @@ -0,0 +1,1022 @@ +{
+ "version": "5",
+ "dialect": "sqlite",
+ "id": "d33de747-6acb-4160-a5ec-a4a7adee3023",
+ "prevId": "f2897961-faba-4fc4-9d82-85e7cf316218",
+ "tables": {
+ "account": {
+ "name": "account",
+ "columns": {
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "type": {
+ "name": "type",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "provider": {
+ "name": "provider",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "providerAccountId": {
+ "name": "providerAccountId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "refresh_token": {
+ "name": "refresh_token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "access_token": {
+ "name": "access_token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "expires_at": {
+ "name": "expires_at",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "token_type": {
+ "name": "token_type",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "scope": {
+ "name": "scope",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "id_token": {
+ "name": "id_token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "session_state": {
+ "name": "session_state",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "account_userId_user_id_fk": {
+ "name": "account_userId_user_id_fk",
+ "tableFrom": "account",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "account_provider_providerAccountId_pk": {
+ "columns": [
+ "provider",
+ "providerAccountId"
+ ],
+ "name": "account_provider_providerAccountId_pk"
+ }
+ },
+ "uniqueConstraints": {}
+ },
+ "apiKey": {
+ "name": "apiKey",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "createdAt": {
+ "name": "createdAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "keyId": {
+ "name": "keyId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "keyHash": {
+ "name": "keyHash",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "apiKey_keyId_unique": {
+ "name": "apiKey_keyId_unique",
+ "columns": [
+ "keyId"
+ ],
+ "isUnique": true
+ },
+ "apiKey_name_userId_unique": {
+ "name": "apiKey_name_userId_unique",
+ "columns": [
+ "name",
+ "userId"
+ ],
+ "isUnique": true
+ }
+ },
+ "foreignKeys": {
+ "apiKey_userId_user_id_fk": {
+ "name": "apiKey_userId_user_id_fk",
+ "tableFrom": "apiKey",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "bookmarkAssets": {
+ "name": "bookmarkAssets",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "assetType": {
+ "name": "assetType",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "assetId": {
+ "name": "assetId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "content": {
+ "name": "content",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "metadata": {
+ "name": "metadata",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "fileName": {
+ "name": "fileName",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "sourceUrl": {
+ "name": "sourceUrl",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "bookmarkAssets_id_bookmarks_id_fk": {
+ "name": "bookmarkAssets_id_bookmarks_id_fk",
+ "tableFrom": "bookmarkAssets",
+ "tableTo": "bookmarks",
+ "columnsFrom": [
+ "id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "bookmarkLinks": {
+ "name": "bookmarkLinks",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "url": {
+ "name": "url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "title": {
+ "name": "title",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "description": {
+ "name": "description",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "imageUrl": {
+ "name": "imageUrl",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "favicon": {
+ "name": "favicon",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "content": {
+ "name": "content",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "htmlContent": {
+ "name": "htmlContent",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "screenshotAssetId": {
+ "name": "screenshotAssetId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "fullPageArchiveAssetId": {
+ "name": "fullPageArchiveAssetId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "imageAssetId": {
+ "name": "imageAssetId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "crawledAt": {
+ "name": "crawledAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "crawlStatus": {
+ "name": "crawlStatus",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false,
+ "default": "'pending'"
+ }
+ },
+ "indexes": {
+ "bookmarkLinks_url_idx": {
+ "name": "bookmarkLinks_url_idx",
+ "columns": [
+ "url"
+ ],
+ "isUnique": false
+ }
+ },
+ "foreignKeys": {
+ "bookmarkLinks_id_bookmarks_id_fk": {
+ "name": "bookmarkLinks_id_bookmarks_id_fk",
+ "tableFrom": "bookmarkLinks",
+ "tableTo": "bookmarks",
+ "columnsFrom": [
+ "id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "bookmarkLists": {
+ "name": "bookmarkLists",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "icon": {
+ "name": "icon",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "createdAt": {
+ "name": "createdAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "parentId": {
+ "name": "parentId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "bookmarkLists_userId_idx": {
+ "name": "bookmarkLists_userId_idx",
+ "columns": [
+ "userId"
+ ],
+ "isUnique": false
+ }
+ },
+ "foreignKeys": {
+ "bookmarkLists_userId_user_id_fk": {
+ "name": "bookmarkLists_userId_user_id_fk",
+ "tableFrom": "bookmarkLists",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "bookmarkLists_parentId_bookmarkLists_id_fk": {
+ "name": "bookmarkLists_parentId_bookmarkLists_id_fk",
+ "tableFrom": "bookmarkLists",
+ "tableTo": "bookmarkLists",
+ "columnsFrom": [
+ "parentId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "set null",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "bookmarkTags": {
+ "name": "bookmarkTags",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "createdAt": {
+ "name": "createdAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "bookmarkTags_name_idx": {
+ "name": "bookmarkTags_name_idx",
+ "columns": [
+ "name"
+ ],
+ "isUnique": false
+ },
+ "bookmarkTags_userId_idx": {
+ "name": "bookmarkTags_userId_idx",
+ "columns": [
+ "userId"
+ ],
+ "isUnique": false
+ },
+ "bookmarkTags_userId_name_unique": {
+ "name": "bookmarkTags_userId_name_unique",
+ "columns": [
+ "userId",
+ "name"
+ ],
+ "isUnique": true
+ }
+ },
+ "foreignKeys": {
+ "bookmarkTags_userId_user_id_fk": {
+ "name": "bookmarkTags_userId_user_id_fk",
+ "tableFrom": "bookmarkTags",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "bookmarkTexts": {
+ "name": "bookmarkTexts",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "text": {
+ "name": "text",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "bookmarkTexts_id_bookmarks_id_fk": {
+ "name": "bookmarkTexts_id_bookmarks_id_fk",
+ "tableFrom": "bookmarkTexts",
+ "tableTo": "bookmarks",
+ "columnsFrom": [
+ "id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "bookmarks": {
+ "name": "bookmarks",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "createdAt": {
+ "name": "createdAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "title": {
+ "name": "title",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "archived": {
+ "name": "archived",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false,
+ "default": false
+ },
+ "favourited": {
+ "name": "favourited",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false,
+ "default": false
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "taggingStatus": {
+ "name": "taggingStatus",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false,
+ "default": "'pending'"
+ },
+ "note": {
+ "name": "note",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "bookmarks_userId_idx": {
+ "name": "bookmarks_userId_idx",
+ "columns": [
+ "userId"
+ ],
+ "isUnique": false
+ },
+ "bookmarks_archived_idx": {
+ "name": "bookmarks_archived_idx",
+ "columns": [
+ "archived"
+ ],
+ "isUnique": false
+ },
+ "bookmarks_favourited_idx": {
+ "name": "bookmarks_favourited_idx",
+ "columns": [
+ "favourited"
+ ],
+ "isUnique": false
+ },
+ "bookmarks_createdAt_idx": {
+ "name": "bookmarks_createdAt_idx",
+ "columns": [
+ "createdAt"
+ ],
+ "isUnique": false
+ }
+ },
+ "foreignKeys": {
+ "bookmarks_userId_user_id_fk": {
+ "name": "bookmarks_userId_user_id_fk",
+ "tableFrom": "bookmarks",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "bookmarksInLists": {
+ "name": "bookmarksInLists",
+ "columns": {
+ "bookmarkId": {
+ "name": "bookmarkId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "listId": {
+ "name": "listId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "addedAt": {
+ "name": "addedAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "bookmarksInLists_bookmarkId_idx": {
+ "name": "bookmarksInLists_bookmarkId_idx",
+ "columns": [
+ "bookmarkId"
+ ],
+ "isUnique": false
+ },
+ "bookmarksInLists_listId_idx": {
+ "name": "bookmarksInLists_listId_idx",
+ "columns": [
+ "listId"
+ ],
+ "isUnique": false
+ }
+ },
+ "foreignKeys": {
+ "bookmarksInLists_bookmarkId_bookmarks_id_fk": {
+ "name": "bookmarksInLists_bookmarkId_bookmarks_id_fk",
+ "tableFrom": "bookmarksInLists",
+ "tableTo": "bookmarks",
+ "columnsFrom": [
+ "bookmarkId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "bookmarksInLists_listId_bookmarkLists_id_fk": {
+ "name": "bookmarksInLists_listId_bookmarkLists_id_fk",
+ "tableFrom": "bookmarksInLists",
+ "tableTo": "bookmarkLists",
+ "columnsFrom": [
+ "listId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "bookmarksInLists_bookmarkId_listId_pk": {
+ "columns": [
+ "bookmarkId",
+ "listId"
+ ],
+ "name": "bookmarksInLists_bookmarkId_listId_pk"
+ }
+ },
+ "uniqueConstraints": {}
+ },
+ "session": {
+ "name": "session",
+ "columns": {
+ "sessionToken": {
+ "name": "sessionToken",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "expires": {
+ "name": "expires",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "session_userId_user_id_fk": {
+ "name": "session_userId_user_id_fk",
+ "tableFrom": "session",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "tagsOnBookmarks": {
+ "name": "tagsOnBookmarks",
+ "columns": {
+ "bookmarkId": {
+ "name": "bookmarkId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "tagId": {
+ "name": "tagId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "attachedAt": {
+ "name": "attachedAt",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "attachedBy": {
+ "name": "attachedBy",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ }
+ },
+ "indexes": {
+ "tagsOnBookmarks_tagId_idx": {
+ "name": "tagsOnBookmarks_tagId_idx",
+ "columns": [
+ "bookmarkId"
+ ],
+ "isUnique": false
+ },
+ "tagsOnBookmarks_bookmarkId_idx": {
+ "name": "tagsOnBookmarks_bookmarkId_idx",
+ "columns": [
+ "bookmarkId"
+ ],
+ "isUnique": false
+ }
+ },
+ "foreignKeys": {
+ "tagsOnBookmarks_bookmarkId_bookmarks_id_fk": {
+ "name": "tagsOnBookmarks_bookmarkId_bookmarks_id_fk",
+ "tableFrom": "tagsOnBookmarks",
+ "tableTo": "bookmarks",
+ "columnsFrom": [
+ "bookmarkId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "tagsOnBookmarks_tagId_bookmarkTags_id_fk": {
+ "name": "tagsOnBookmarks_tagId_bookmarkTags_id_fk",
+ "tableFrom": "tagsOnBookmarks",
+ "tableTo": "bookmarkTags",
+ "columnsFrom": [
+ "tagId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "tagsOnBookmarks_bookmarkId_tagId_pk": {
+ "columns": [
+ "bookmarkId",
+ "tagId"
+ ],
+ "name": "tagsOnBookmarks_bookmarkId_tagId_pk"
+ }
+ },
+ "uniqueConstraints": {}
+ },
+ "user": {
+ "name": "user",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "email": {
+ "name": "email",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "emailVerified": {
+ "name": "emailVerified",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "image": {
+ "name": "image",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "password": {
+ "name": "password",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false
+ },
+ "role": {
+ "name": "role",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "autoincrement": false,
+ "default": "'user'"
+ }
+ },
+ "indexes": {
+ "user_email_unique": {
+ "name": "user_email_unique",
+ "columns": [
+ "email"
+ ],
+ "isUnique": true
+ }
+ },
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "verificationToken": {
+ "name": "verificationToken",
+ "columns": {
+ "identifier": {
+ "name": "identifier",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "token": {
+ "name": "token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ },
+ "expires": {
+ "name": "expires",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "autoincrement": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {
+ "verificationToken_identifier_token_pk": {
+ "columns": [
+ "identifier",
+ "token"
+ ],
+ "name": "verificationToken_identifier_token_pk"
+ }
+ },
+ "uniqueConstraints": {}
+ }
+ },
+ "enums": {},
+ "_meta": {
+ "schemas": {},
+ "tables": {},
+ "columns": {}
+ }
+}
\ No newline at end of file diff --git a/packages/db/drizzle/meta/_journal.json b/packages/db/drizzle/meta/_journal.json index 29fa84f0..75cbfc09 100644 --- a/packages/db/drizzle/meta/_journal.json +++ b/packages/db/drizzle/meta/_journal.json @@ -162,6 +162,13 @@ "when": 1716679762529, "tag": "0022_tough_nextwave", "breakpoints": true + }, + { + "idx": 23, + "version": "5", + "when": 1717960986361, + "tag": "0023_late_night_nurse", + "breakpoints": true } ] }
\ No newline at end of file diff --git a/packages/db/schema.ts b/packages/db/schema.ts index 3fd7897f..07f1686e 100644 --- a/packages/db/schema.ts +++ b/packages/db/schema.ts @@ -126,33 +126,37 @@ export const bookmarks = sqliteTable( }), ); -export const bookmarkLinks = sqliteTable("bookmarkLinks", { - id: text("id") - .notNull() - .primaryKey() - .$defaultFn(() => createId()) - .references(() => bookmarks.id, { onDelete: "cascade" }), - url: text("url").notNull(), +export const bookmarkLinks = sqliteTable( + "bookmarkLinks", + { + id: text("id") + .notNull() + .primaryKey() + .$defaultFn(() => createId()) + .references(() => bookmarks.id, { onDelete: "cascade" }), + url: text("url").notNull(), - // Crawled info - title: text("title"), - description: text("description"), - imageUrl: text("imageUrl"), - favicon: text("favicon"), - content: text("content"), - htmlContent: text("htmlContent"), - screenshotAssetId: text("screenshotAssetId"), - fullPageArchiveAssetId: text("fullPageArchiveAssetId"), - imageAssetId: text("imageAssetId"), - crawledAt: integer("crawledAt", { mode: "timestamp" }), - crawlStatus: text("crawlStatus", { - enum: ["pending", "failure", "success"], - }).default("pending"), -}, (bl) => { - return { - urlIdx: index("bookmarkLinks_url_idx").on(bl.url), - }; -}); + // Crawled info + title: text("title"), + description: text("description"), + imageUrl: text("imageUrl"), + favicon: text("favicon"), + content: text("content"), + htmlContent: text("htmlContent"), + screenshotAssetId: text("screenshotAssetId"), + fullPageArchiveAssetId: text("fullPageArchiveAssetId"), + imageAssetId: text("imageAssetId"), + crawledAt: integer("crawledAt", { mode: "timestamp" }), + crawlStatus: text("crawlStatus", { + enum: ["pending", "failure", "success"], + }).default("pending"), + }, + (bl) => { + return { + urlIdx: index("bookmarkLinks_url_idx").on(bl.url), + }; + }, +); export const bookmarkTexts = sqliteTable("bookmarkTexts", { id: text("id") @@ -174,6 +178,7 @@ export const bookmarkAssets = sqliteTable("bookmarkAssets", { content: text("content"), metadata: text("metadata"), fileName: text("fileName"), + sourceUrl: text("sourceUrl"), }); export const bookmarkTags = sqliteTable( @@ -231,8 +236,10 @@ export const bookmarkLists = sqliteTable( userId: text("userId") .notNull() .references(() => users.id, { onDelete: "cascade" }), - parentId: text("parentId") - .references((): AnySQLiteColumn => bookmarkLists.id, { onDelete: "set null" }), + parentId: text("parentId").references( + (): AnySQLiteColumn => bookmarkLists.id, + { onDelete: "set null" }, + ), }, (bl) => ({ userIdIdx: index("bookmarkLists_userId_idx").on(bl.userId), diff --git a/packages/shared/assetdb.ts b/packages/shared/assetdb.ts index 4cea06b0..fb625af8 100644 --- a/packages/shared/assetdb.ts +++ b/packages/shared/assetdb.ts @@ -6,18 +6,26 @@ import serverConfig from "./config"; const ROOT_PATH = path.join(serverConfig.dataDir, "assets"); +export const enum ASSET_TYPES { + IMAGE_JPEG = "image/jpeg", + IMAGE_PNG = "image/png", + IMAGE_WEBP = "image/webp", + APPLICATION_PDF = "application/pdf", + TEXT_HTML = "text/html", +} + // The assets that we allow the users to upload -export const SUPPORTED_UPLOAD_ASSET_TYPES = new Set([ - "image/jpeg", - "image/png", - "image/webp", - "application/pdf", +export const SUPPORTED_UPLOAD_ASSET_TYPES: Set<string> = new Set<string>([ + ASSET_TYPES.IMAGE_JPEG, + ASSET_TYPES.IMAGE_PNG, + ASSET_TYPES.IMAGE_WEBP, + ASSET_TYPES.APPLICATION_PDF, ]); // The assets that we support saving in the asset db export const SUPPORTED_ASSET_TYPES = new Set([ ...SUPPORTED_UPLOAD_ASSET_TYPES, - "text/html", + ASSET_TYPES.TEXT_HTML, ]); function getAssetDir(userId: string, assetId: string) { diff --git a/packages/shared/types/bookmarks.ts b/packages/shared/types/bookmarks.ts index 06cd632e..c9e3e1a5 100644 --- a/packages/shared/types/bookmarks.ts +++ b/packages/shared/types/bookmarks.ts @@ -30,6 +30,7 @@ export const zBookmarkedAssetSchema = z.object({ assetType: z.enum(["image", "pdf"]), assetId: z.string(), fileName: z.string().nullish(), + sourceUrl: z.string().nullish(), }); export type ZBookmarkedAsset = z.infer<typeof zBookmarkedAssetSchema>; diff --git a/packages/trpc/routers/bookmarks.ts b/packages/trpc/routers/bookmarks.ts index 57463177..e083f83c 100644 --- a/packages/trpc/routers/bookmarks.ts +++ b/packages/trpc/routers/bookmarks.ts @@ -172,6 +172,7 @@ function toZodSchema(bookmark: BookmarkQueryReturnType): ZBookmark { assetType: asset.assetType, assetId: asset.assetId, fileName: asset.fileName, + sourceUrl: asset.sourceUrl, }; } else { content = { type: "unknown" }; @@ -257,6 +258,7 @@ export const bookmarksAppRouter = router({ content: null, metadata: null, fileName: input.fileName ?? null, + sourceUrl: null, }) .returning(); content = { |
