diff options
| author | Mohamed Bassem <me@mbassem.com> | 2024-11-30 19:12:45 +0000 |
|---|---|---|
| committer | Mohamed Bassem <me@mbassem.com> | 2024-12-08 20:59:42 +0000 |
| commit | 705d539c8e9c6a86882825ee4dabeff3027ba827 (patch) | |
| tree | 9ac5d1c048393213d1302d005630a64a4789178c /apps | |
| parent | a7b13869b149edbea9bdb220614c69c9a05d79b5 (diff) | |
| download | karakeep-705d539c8e9c6a86882825ee4dabeff3027ba827.tar.zst | |
feature: Store crawling status code and allow users to find broken links. Fixes #169
Diffstat (limited to 'apps')
| -rw-r--r-- | apps/web/app/settings/broken-links/page.tsx | 131 | ||||
| -rw-r--r-- | apps/web/components/settings/sidebar/items.tsx | 6 | ||||
| -rw-r--r-- | apps/web/lib/i18n/locales/en/translation.json | 7 | ||||
| -rw-r--r-- | apps/workers/crawlerWorker.ts | 10 |
4 files changed, 150 insertions, 4 deletions
diff --git a/apps/web/app/settings/broken-links/page.tsx b/apps/web/app/settings/broken-links/page.tsx new file mode 100644 index 00000000..0b83dfa9 --- /dev/null +++ b/apps/web/app/settings/broken-links/page.tsx @@ -0,0 +1,131 @@ +"use client"; + +import { ActionButton } from "@/components/ui/action-button"; +import { FullPageSpinner } from "@/components/ui/full-page-spinner"; +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from "@/components/ui/table"; +import { toast } from "@/components/ui/use-toast"; +import { RefreshCw, Trash2 } from "lucide-react"; +import { useTranslation } from "react-i18next"; + +import { + useDeleteBookmark, + useRecrawlBookmark, +} from "@hoarder/shared-react/hooks/bookmarks"; +import { api } from "@hoarder/shared-react/trpc"; + +export default function BrokenLinksPage() { + const { t } = useTranslation(); + + const apiUtils = api.useUtils(); + const { data, isPending } = api.bookmarks.getBrokenLinks.useQuery(); + + const { mutate: deleteBookmark, isPending: isDeleting } = useDeleteBookmark({ + onSuccess: () => { + toast({ + description: t("toasts.bookmarks.deleted"), + }); + apiUtils.bookmarks.getBrokenLinks.invalidate(); + }, + onError: () => { + toast({ + description: t("common.something_went_wrong"), + variant: "destructive", + }); + }, + }); + + const { mutate: recrawlBookmark, isPending: isRecrawling } = + useRecrawlBookmark({ + onSuccess: () => { + toast({ + description: t("toasts.bookmarks.refetch"), + }); + apiUtils.bookmarks.getBrokenLinks.invalidate(); + }, + onError: () => { + toast({ + description: t("common.something_went_wrong"), + variant: "destructive", + }); + }, + }); + + return ( + <div className="rounded-md border bg-background p-4"> + <div className="flex items-center justify-between"> + <div className="mb-2 text-lg font-medium"> + {t("settings.broken_links.broken_links")} + </div> + </div> + <div className="mt-2"> + {isPending && <FullPageSpinner />} + {!isPending && data && data.bookmarks.length == 0 && ( + <p className="rounded-md bg-muted p-2 text-sm text-muted-foreground"> + No broken links found + </p> + )} + {!isPending && data && data.bookmarks.length > 0 && ( + <Table> + <TableHeader> + <TableRow> + <TableHead>{t("common.url")}</TableHead> + <TableHead>{t("common.created_at")}</TableHead> + <TableHead> + {t("settings.broken_links.last_crawled_at")} + </TableHead> + <TableHead> + {t("settings.broken_links.crawling_status")} + </TableHead> + <TableHead>{t("common.action")}</TableHead> + </TableRow> + </TableHeader> + <TableBody> + {data.bookmarks.map((b) => ( + <TableRow key={b.id}> + <TableCell>{b.url}</TableCell> + <TableCell>{b.createdAt?.toLocaleString()}</TableCell> + <TableCell>{b.crawledAt?.toLocaleString()}</TableCell> + <TableCell> + {b.isCrawlingFailure ? ( + <span className="text-red-500">Failed</span> + ) : ( + b.statusCode + )} + </TableCell> + <TableCell className="flex gap-2"> + <ActionButton + variant="secondary" + loading={isRecrawling} + onClick={() => recrawlBookmark({ bookmarkId: b.id })} + className="flex items-center gap-2" + > + <RefreshCw className="size-4" /> + {t("actions.recrawl")} + </ActionButton> + <ActionButton + variant="destructive" + onClick={() => deleteBookmark({ bookmarkId: b.id })} + loading={isDeleting} + className="flex items-center gap-2" + > + <Trash2 className="size-4" /> + {t("actions.delete")} + </ActionButton> + </TableCell> + </TableRow> + ))} + <TableRow></TableRow> + </TableBody> + </Table> + )} + </div> + </div> + ); +} diff --git a/apps/web/components/settings/sidebar/items.tsx b/apps/web/components/settings/sidebar/items.tsx index 43dfabdd..f76d494a 100644 --- a/apps/web/components/settings/sidebar/items.tsx +++ b/apps/web/components/settings/sidebar/items.tsx @@ -4,6 +4,7 @@ import { ArrowLeft, Download, KeyRound, + Link, Rss, Sparkles, User, @@ -46,4 +47,9 @@ export const settingsSidebarItems = ( icon: <KeyRound size={18} />, path: "/settings/api-keys", }, + { + name: t("settings.broken_links.broken_links"), + icon: <Link size={18} />, + path: "/settings/broken-links", + }, ]; diff --git a/apps/web/lib/i18n/locales/en/translation.json b/apps/web/lib/i18n/locales/en/translation.json index 530d489a..9f12487f 100644 --- a/apps/web/lib/i18n/locales/en/translation.json +++ b/apps/web/lib/i18n/locales/en/translation.json @@ -38,6 +38,7 @@ "unfavorite": "Unfavorite", "delete": "Delete", "refresh": "Refresh", + "recrawl": "Recrawl", "download_full_page_archive": "Download Full Page Archive", "edit_tags": "Edit Tags", "add_to_list": "Add to List", @@ -103,6 +104,12 @@ "new_api_key_desc": "Give your API key a unique name", "key_success": "Key was successfully created", "key_success_please_copy": "Please copy the key and store it somewhere safe. Once you close the dialog, you won't be able to access it again." + }, + "broken_links": { + "broken_links": "Broken Links", + "last_crawled_at": "Last Crawled At", + "crawling_status": "Crawling Status", + "crawling_failed": "Crawling Failed" } }, "admin": { diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index 3952a287..208de44b 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -241,14 +241,12 @@ async function browserlessCrawlPage(jobId: string, url: string) { const response = await fetch(url, { signal: AbortSignal.timeout(5000), }); - if (!response.ok) { - throw new Error(`Failed to crawl page: ${response.status}`); - } logger.info( `[Crawler][${jobId}] Successfully fetched the content of "${url}". Status: ${response.status}, Size: ${response.size}`, ); return { htmlContent: await response.text(), + statusCode: response.status, screenshot: undefined, url: response.url, }; @@ -260,6 +258,7 @@ async function crawlPage( ): Promise<{ htmlContent: string; screenshot: Buffer | undefined; + statusCode: number; url: string; }> { let browser: Browser | undefined; @@ -282,7 +281,7 @@ async function crawlPage( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", ); - await page.goto(url, { + const response = await page.goto(url, { timeout: serverConfig.crawler.navigateTimeoutSec * 1000, }); logger.info( @@ -328,6 +327,7 @@ async function crawlPage( return { htmlContent, + statusCode: response?.status() ?? 0, screenshot, url: page.url(), }; @@ -583,6 +583,7 @@ async function crawlAndParseUrl( const { htmlContent, screenshot, + statusCode, url: browserUrl, } = await crawlPage(jobId, url); @@ -618,6 +619,7 @@ async function crawlAndParseUrl( content: readableContent?.textContent, htmlContent: readableContent?.content, crawledAt: new Date(), + crawlStatusCode: statusCode, }) .where(eq(bookmarkLinks.id, bookmarkId)); |
