aboutsummaryrefslogtreecommitdiffstats
path: root/apps
diff options
context:
space:
mode:
authorMohamed Bassem <me@mbassem.com>2024-11-30 19:12:45 +0000
committerMohamed Bassem <me@mbassem.com>2024-12-08 20:59:42 +0000
commit705d539c8e9c6a86882825ee4dabeff3027ba827 (patch)
tree9ac5d1c048393213d1302d005630a64a4789178c /apps
parenta7b13869b149edbea9bdb220614c69c9a05d79b5 (diff)
downloadkarakeep-705d539c8e9c6a86882825ee4dabeff3027ba827.tar.zst
feature: Store crawling status code and allow users to find broken links. Fixes #169
Diffstat (limited to 'apps')
-rw-r--r--apps/web/app/settings/broken-links/page.tsx131
-rw-r--r--apps/web/components/settings/sidebar/items.tsx6
-rw-r--r--apps/web/lib/i18n/locales/en/translation.json7
-rw-r--r--apps/workers/crawlerWorker.ts10
4 files changed, 150 insertions, 4 deletions
diff --git a/apps/web/app/settings/broken-links/page.tsx b/apps/web/app/settings/broken-links/page.tsx
new file mode 100644
index 00000000..0b83dfa9
--- /dev/null
+++ b/apps/web/app/settings/broken-links/page.tsx
@@ -0,0 +1,131 @@
+"use client";
+
+import { ActionButton } from "@/components/ui/action-button";
+import { FullPageSpinner } from "@/components/ui/full-page-spinner";
+import {
+ Table,
+ TableBody,
+ TableCell,
+ TableHead,
+ TableHeader,
+ TableRow,
+} from "@/components/ui/table";
+import { toast } from "@/components/ui/use-toast";
+import { RefreshCw, Trash2 } from "lucide-react";
+import { useTranslation } from "react-i18next";
+
+import {
+ useDeleteBookmark,
+ useRecrawlBookmark,
+} from "@hoarder/shared-react/hooks/bookmarks";
+import { api } from "@hoarder/shared-react/trpc";
+
+export default function BrokenLinksPage() {
+ const { t } = useTranslation();
+
+ const apiUtils = api.useUtils();
+ const { data, isPending } = api.bookmarks.getBrokenLinks.useQuery();
+
+ const { mutate: deleteBookmark, isPending: isDeleting } = useDeleteBookmark({
+ onSuccess: () => {
+ toast({
+ description: t("toasts.bookmarks.deleted"),
+ });
+ apiUtils.bookmarks.getBrokenLinks.invalidate();
+ },
+ onError: () => {
+ toast({
+ description: t("common.something_went_wrong"),
+ variant: "destructive",
+ });
+ },
+ });
+
+ const { mutate: recrawlBookmark, isPending: isRecrawling } =
+ useRecrawlBookmark({
+ onSuccess: () => {
+ toast({
+ description: t("toasts.bookmarks.refetch"),
+ });
+ apiUtils.bookmarks.getBrokenLinks.invalidate();
+ },
+ onError: () => {
+ toast({
+ description: t("common.something_went_wrong"),
+ variant: "destructive",
+ });
+ },
+ });
+
+ return (
+ <div className="rounded-md border bg-background p-4">
+ <div className="flex items-center justify-between">
+ <div className="mb-2 text-lg font-medium">
+ {t("settings.broken_links.broken_links")}
+ </div>
+ </div>
+ <div className="mt-2">
+ {isPending && <FullPageSpinner />}
+ {!isPending && data && data.bookmarks.length == 0 && (
+ <p className="rounded-md bg-muted p-2 text-sm text-muted-foreground">
+ No broken links found
+ </p>
+ )}
+ {!isPending && data && data.bookmarks.length > 0 && (
+ <Table>
+ <TableHeader>
+ <TableRow>
+ <TableHead>{t("common.url")}</TableHead>
+ <TableHead>{t("common.created_at")}</TableHead>
+ <TableHead>
+ {t("settings.broken_links.last_crawled_at")}
+ </TableHead>
+ <TableHead>
+ {t("settings.broken_links.crawling_status")}
+ </TableHead>
+ <TableHead>{t("common.action")}</TableHead>
+ </TableRow>
+ </TableHeader>
+ <TableBody>
+ {data.bookmarks.map((b) => (
+ <TableRow key={b.id}>
+ <TableCell>{b.url}</TableCell>
+ <TableCell>{b.createdAt?.toLocaleString()}</TableCell>
+ <TableCell>{b.crawledAt?.toLocaleString()}</TableCell>
+ <TableCell>
+ {b.isCrawlingFailure ? (
+ <span className="text-red-500">Failed</span>
+ ) : (
+ b.statusCode
+ )}
+ </TableCell>
+ <TableCell className="flex gap-2">
+ <ActionButton
+ variant="secondary"
+ loading={isRecrawling}
+ onClick={() => recrawlBookmark({ bookmarkId: b.id })}
+ className="flex items-center gap-2"
+ >
+ <RefreshCw className="size-4" />
+ {t("actions.recrawl")}
+ </ActionButton>
+ <ActionButton
+ variant="destructive"
+ onClick={() => deleteBookmark({ bookmarkId: b.id })}
+ loading={isDeleting}
+ className="flex items-center gap-2"
+ >
+ <Trash2 className="size-4" />
+ {t("actions.delete")}
+ </ActionButton>
+ </TableCell>
+ </TableRow>
+ ))}
+ <TableRow></TableRow>
+ </TableBody>
+ </Table>
+ )}
+ </div>
+ </div>
+ );
+}
diff --git a/apps/web/components/settings/sidebar/items.tsx b/apps/web/components/settings/sidebar/items.tsx
index 43dfabdd..f76d494a 100644
--- a/apps/web/components/settings/sidebar/items.tsx
+++ b/apps/web/components/settings/sidebar/items.tsx
@@ -4,6 +4,7 @@ import {
ArrowLeft,
Download,
KeyRound,
+ Link,
Rss,
Sparkles,
User,
@@ -46,4 +47,9 @@ export const settingsSidebarItems = (
icon: <KeyRound size={18} />,
path: "/settings/api-keys",
},
+ {
+ name: t("settings.broken_links.broken_links"),
+ icon: <Link size={18} />,
+ path: "/settings/broken-links",
+ },
];
diff --git a/apps/web/lib/i18n/locales/en/translation.json b/apps/web/lib/i18n/locales/en/translation.json
index 530d489a..9f12487f 100644
--- a/apps/web/lib/i18n/locales/en/translation.json
+++ b/apps/web/lib/i18n/locales/en/translation.json
@@ -38,6 +38,7 @@
"unfavorite": "Unfavorite",
"delete": "Delete",
"refresh": "Refresh",
+ "recrawl": "Recrawl",
"download_full_page_archive": "Download Full Page Archive",
"edit_tags": "Edit Tags",
"add_to_list": "Add to List",
@@ -103,6 +104,12 @@
"new_api_key_desc": "Give your API key a unique name",
"key_success": "Key was successfully created",
"key_success_please_copy": "Please copy the key and store it somewhere safe. Once you close the dialog, you won't be able to access it again."
+ },
+ "broken_links": {
+ "broken_links": "Broken Links",
+ "last_crawled_at": "Last Crawled At",
+ "crawling_status": "Crawling Status",
+ "crawling_failed": "Crawling Failed"
}
},
"admin": {
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts
index 3952a287..208de44b 100644
--- a/apps/workers/crawlerWorker.ts
+++ b/apps/workers/crawlerWorker.ts
@@ -241,14 +241,12 @@ async function browserlessCrawlPage(jobId: string, url: string) {
const response = await fetch(url, {
signal: AbortSignal.timeout(5000),
});
- if (!response.ok) {
- throw new Error(`Failed to crawl page: ${response.status}`);
- }
logger.info(
`[Crawler][${jobId}] Successfully fetched the content of "${url}". Status: ${response.status}, Size: ${response.size}`,
);
return {
htmlContent: await response.text(),
+ statusCode: response.status,
screenshot: undefined,
url: response.url,
};
@@ -260,6 +258,7 @@ async function crawlPage(
): Promise<{
htmlContent: string;
screenshot: Buffer | undefined;
+ statusCode: number;
url: string;
}> {
let browser: Browser | undefined;
@@ -282,7 +281,7 @@ async function crawlPage(
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
);
- await page.goto(url, {
+ const response = await page.goto(url, {
timeout: serverConfig.crawler.navigateTimeoutSec * 1000,
});
logger.info(
@@ -328,6 +327,7 @@ async function crawlPage(
return {
htmlContent,
+ statusCode: response?.status() ?? 0,
screenshot,
url: page.url(),
};
@@ -583,6 +583,7 @@ async function crawlAndParseUrl(
const {
htmlContent,
screenshot,
+ statusCode,
url: browserUrl,
} = await crawlPage(jobId, url);
@@ -618,6 +619,7 @@ async function crawlAndParseUrl(
content: readableContent?.textContent,
htmlContent: readableContent?.content,
crawledAt: new Date(),
+ crawlStatusCode: statusCode,
})
.where(eq(bookmarkLinks.id, bookmarkId));