From 4a13c36da50f6b3171d817edebefe96ba85dc666 Mon Sep 17 00:00:00 2001 From: kamtschatka Date: Mon, 28 Oct 2024 02:51:00 +0100 Subject: feature: Archive videos using yt-dlp. Fixes #215 (#525) * Allow downloading more content from a webpage and index it #215 Added a worker that allows downloading videos depending on the environment variables refactored the code a bit added new video asset updated documentation * Some tweaks * Drop the dependency on the yt-dlp wrapper * Update openapi specs * Dont log an error when the url is not supported * Better handle supported websites that dont download anything --------- Co-authored-by: Mohamed Bassem --- packages/shared/config.ts | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'packages/shared/config.ts') diff --git a/packages/shared/config.ts b/packages/shared/config.ts index 4b51d15d..35d3df54 100644 --- a/packages/shared/config.ts +++ b/packages/shared/config.ts @@ -42,6 +42,9 @@ const allEnv = z.object({ CRAWLER_STORE_SCREENSHOT: stringBool("true"), CRAWLER_FULL_PAGE_SCREENSHOT: stringBool("false"), CRAWLER_FULL_PAGE_ARCHIVE: stringBool("false"), + CRAWLER_VIDEO_DOWNLOAD: stringBool("false"), + CRAWLER_VIDEO_DOWNLOAD_MAX_SIZE: z.coerce.number().default(50), + CRAWLER_VIDEO_DOWNLOAD_TIMEOUT_SEC: z.coerce.number().default(10 * 60), MEILI_ADDR: z.string().optional(), MEILI_MASTER_KEY: z.string().default(""), LOG_LEVEL: z.string().default("debug"), @@ -98,6 +101,9 @@ const serverConfigSchema = allEnv.transform((val) => { storeScreenshot: val.CRAWLER_STORE_SCREENSHOT, fullPageScreenshot: val.CRAWLER_FULL_PAGE_SCREENSHOT, fullPageArchive: val.CRAWLER_FULL_PAGE_ARCHIVE, + downloadVideo: val.CRAWLER_VIDEO_DOWNLOAD, + maxVideoDownloadSize: val.CRAWLER_VIDEO_DOWNLOAD_MAX_SIZE, + downloadVideoTimeout: val.CRAWLER_VIDEO_DOWNLOAD_TIMEOUT_SEC, }, ocr: { langs: val.OCR_LANGS, -- cgit v1.2.3-70-g09d2