aboutsummaryrefslogtreecommitdiffstats
path: root/apps
diff options
context:
space:
mode:
authorAhmad Mujahid <55625580+AhmadMuj@users.noreply.github.com>2024-04-12 00:52:53 +0400
committerGitHub <noreply@github.com>2024-04-11 23:52:53 +0300
commit95cf8f47300d6eb6efe36d44bcab0f44a8e27585 (patch)
treedcc64d69e11c80d4b84e977e5b27d135a8d28918 /apps
parent238c2967b269ca0f66d8e759c6a0234107e1fd1e (diff)
downloadkarakeep-95cf8f47300d6eb6efe36d44bcab0f44a8e27585.tar.zst
feature: Recrawl failed links from admin UI (#95)
* feature: Retry failed crawling URLs * fix: Enhancing visuals and some minor changes.
Diffstat (limited to 'apps')
-rw-r--r--apps/web/app/dashboard/admin/page.tsx26
-rw-r--r--apps/workers/crawlerWorker.ts20
2 files changed, 41 insertions, 5 deletions
diff --git a/apps/web/app/dashboard/admin/page.tsx b/apps/web/app/dashboard/admin/page.tsx
index ac5e17fb..c1fa4dee 100644
--- a/apps/web/app/dashboard/admin/page.tsx
+++ b/apps/web/app/dashboard/admin/page.tsx
@@ -67,7 +67,7 @@ function ReleaseInfo() {
function ActionsSection() {
const { mutate: recrawlLinks, isPending: isRecrawlPending } =
- api.admin.recrawlAllLinks.useMutation({
+ api.admin.recrawlLinks.useMutation({
onSuccess: () => {
toast({
description: "Recrawl enqueued",
@@ -103,7 +103,15 @@ function ActionsSection() {
className="lg:w-1/2"
variant="destructive"
loading={isRecrawlPending}
- onClick={() => recrawlLinks()}
+ onClick={() => recrawlLinks({ crawlStatus: "failure" })}
+ >
+ Recrawl Failed Links Only
+ </ActionButton>
+ <ActionButton
+ className="lg:w-1/2"
+ variant="destructive"
+ loading={isRecrawlPending}
+ onClick={() => recrawlLinks({ crawlStatus: "all" })}
>
Recrawl All Links
</ActionButton>
@@ -153,18 +161,26 @@ function ServerStatsSection() {
<Separator />
<p className="text-xl">Background Jobs</p>
<Table className="lg:w-1/2">
+ <TableHeader>
+ <TableHead>Job</TableHead>
+ <TableHead>Pending</TableHead>
+ <TableHead>Failed</TableHead>
+ </TableHeader>
<TableBody>
<TableRow>
- <TableCell className="lg:w-2/3">Pending Crawling Jobs</TableCell>
+ <TableCell className="lg:w-2/3">Crawling Jobs</TableCell>
<TableCell>{serverStats.pendingCrawls}</TableCell>
+ <TableCell>{serverStats.failedCrawls}</TableCell>
</TableRow>
<TableRow>
- <TableCell>Pending Indexing Jobs</TableCell>
+ <TableCell>Indexing Jobs</TableCell>
<TableCell>{serverStats.pendingIndexing}</TableCell>
+ <TableCell>{serverStats.failedIndexing}</TableCell>
</TableRow>
<TableRow>
- <TableCell>Pending OpenAI Jobs</TableCell>
+ <TableCell>OpenAI Jobs</TableCell>
<TableCell>{serverStats.pendingOpenai}</TableCell>
+ <TableCell>{serverStats.failedOpenai}</TableCell>
</TableRow>
</TableBody>
</Table>
diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts
index eec8cd98..91b0a03f 100644
--- a/apps/workers/crawlerWorker.ts
+++ b/apps/workers/crawlerWorker.ts
@@ -124,17 +124,37 @@ export class CrawlerWorker {
worker.on("completed", (job) => {
const jobId = job?.id ?? "unknown";
logger.info(`[Crawler][${jobId}] Completed successfully`);
+ const bookmarkId = job?.data.bookmarkId;
+ if (bookmarkId) {
+ changeBookmarkStatus(bookmarkId, "success");
+ }
});
worker.on("failed", (job, error) => {
const jobId = job?.id ?? "unknown";
logger.error(`[Crawler][${jobId}] Crawling job failed: ${error}`);
+ const bookmarkId = job?.data.bookmarkId;
+ if (bookmarkId) {
+ changeBookmarkStatus(bookmarkId, "failure");
+ }
});
return worker;
}
}
+async function changeBookmarkStatus(
+ bookmarkId: string,
+ crawlStatus: "success" | "failure",
+) {
+ await db
+ .update(bookmarkLinks)
+ .set({
+ crawlStatus,
+ })
+ .where(eq(bookmarkLinks.id, bookmarkId));
+}
+
async function getBookmarkUrl(bookmarkId: string) {
const bookmark = await db.query.bookmarkLinks.findFirst({
where: eq(bookmarkLinks.id, bookmarkId),