aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMohamedBassem <me@mbassem.com>2024-06-23 12:00:19 +0000
committerMohamedBassem <me@mbassem.com>2024-06-23 12:00:19 +0000
commit1071095435ceb7030955bfdd9fc594e1a43c121b (patch)
treeaf54a402537a6739858f42f39b931403c8aa743f
parent8f0e9b182e971dff98b18c707d3eb6238abf286e (diff)
downloadkarakeep-1071095435ceb7030955bfdd9fc594e1a43c121b.tar.zst
feature: Automatically transfer image urls into bookmared assets. Fixes #246
-rw-r--r--apps/workers/crawlerWorker.ts22
-rw-r--r--packages/shared/assetdb.ts10
2 files changed, 23 insertions, 9 deletions
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts
index e7e5515c..e297c404 100644
--- a/apps/workers/crawlerWorker.ts
+++ b/apps/workers/crawlerWorker.ts
@@ -37,9 +37,11 @@ import {
import {
ASSET_TYPES,
deleteAsset,
+ IMAGE_ASSET_TYPES,
newAssetId,
saveAsset,
saveAssetFromFile,
+ SUPPORTED_UPLOAD_ASSET_TYPES,
} from "@hoarder/shared/assetdb";
import serverConfig from "@hoarder/shared/config";
import logger from "@hoarder/shared/logger";
@@ -471,26 +473,28 @@ async function getContentType(
}
/**
- * Downloads the pdf asset from the URL and transforms the linkBookmark to an assetBookmark
+ * Downloads the asset from the URL and transforms the linkBookmark to an assetBookmark
* @param url the url the user provided
+ * @param assetType the type of the asset we're downloading
* @param userId the id of the user
* @param jobId the id of the job for logging
* @param bookmarkId the id of the bookmark
*/
-async function handlePDFAsAssetBookmark(
+async function handleAsAssetBookmark(
url: string,
+ assetType: "image" | "pdf",
userId: string,
jobId: string,
bookmarkId: string,
) {
- const assetId = await downloadAndStoreFile(url, userId, jobId, "pdf");
+ const assetId = await downloadAndStoreFile(url, userId, jobId, assetType);
if (!assetId) {
return;
}
await db.transaction(async (trx) => {
await trx.insert(bookmarkAssets).values({
id: bookmarkId,
- assetType: "pdf",
+ assetType,
assetId,
content: null,
fileName: path.basename(new URL(url).pathname),
@@ -630,14 +634,20 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) {
const contentType = await getContentType(url, jobId);
- // Link bookmarks get transformed into asset bookmarks if they point to a pdf asset instead of a webpage
+ // Link bookmarks get transformed into asset bookmarks if they point to a supported asset instead of a webpage
const isPdf = contentType === ASSET_TYPES.APPLICATION_PDF;
let archivalLogic: () => Promise<void> = () => {
return Promise.resolve();
};
if (isPdf) {
- await handlePDFAsAssetBookmark(url, userId, jobId, bookmarkId);
+ await handleAsAssetBookmark(url, "pdf", userId, jobId, bookmarkId);
+ } else if (
+ contentType &&
+ IMAGE_ASSET_TYPES.has(contentType) &&
+ SUPPORTED_UPLOAD_ASSET_TYPES.has(contentType)
+ ) {
+ await handleAsAssetBookmark(url, "image", userId, jobId, bookmarkId);
} else {
archivalLogic = await crawlAndParseUrl(
url,
diff --git a/packages/shared/assetdb.ts b/packages/shared/assetdb.ts
index fb625af8..3444001a 100644
--- a/packages/shared/assetdb.ts
+++ b/packages/shared/assetdb.ts
@@ -14,16 +14,20 @@ export const enum ASSET_TYPES {
TEXT_HTML = "text/html",
}
-// The assets that we allow the users to upload
-export const SUPPORTED_UPLOAD_ASSET_TYPES: Set<string> = new Set<string>([
+export const IMAGE_ASSET_TYPES: Set<string> = new Set<string>([
ASSET_TYPES.IMAGE_JPEG,
ASSET_TYPES.IMAGE_PNG,
ASSET_TYPES.IMAGE_WEBP,
+]);
+
+// The assets that we allow the users to upload
+export const SUPPORTED_UPLOAD_ASSET_TYPES: Set<string> = new Set<string>([
+ ...IMAGE_ASSET_TYPES,
ASSET_TYPES.APPLICATION_PDF,
]);
// The assets that we support saving in the asset db
-export const SUPPORTED_ASSET_TYPES = new Set([
+export const SUPPORTED_ASSET_TYPES: Set<string> = new Set<string>([
...SUPPORTED_UPLOAD_ASSET_TYPES,
ASSET_TYPES.TEXT_HTML,
]);