diff options
| author | MohamedBassem <me@mbassem.com> | 2024-03-01 21:01:00 +0000 |
|---|---|---|
| committer | MohamedBassem <me@mbassem.com> | 2024-03-01 22:11:49 +0000 |
| commit | a5434730ede1272f195d6a4b13207b840a5ac2cf (patch) | |
| tree | 14c8a22fbf573b36f16a434349fd3516b38ea539 /packages | |
| parent | 75d315dda4232ee3b89abf054f0b6ee10105ffe3 (diff) | |
| download | karakeep-a5434730ede1272f195d6a4b13207b840a5ac2cf.tar.zst | |
feature: Add full text search support
Diffstat (limited to 'packages')
| -rw-r--r-- | packages/shared/config.ts | 6 | ||||
| -rw-r--r-- | packages/shared/package.json | 1 | ||||
| -rw-r--r-- | packages/shared/queues.ts | 22 | ||||
| -rw-r--r-- | packages/shared/search.ts | 50 | ||||
| -rw-r--r-- | packages/web/app/dashboard/components/Sidebar.tsx | 18 | ||||
| -rw-r--r-- | packages/web/app/dashboard/search/page.tsx | 93 | ||||
| -rw-r--r-- | packages/web/package.json | 1 | ||||
| -rw-r--r-- | packages/web/server/api/routers/bookmarks.ts | 62 | ||||
| -rw-r--r-- | packages/workers/crawler.ts | 8 | ||||
| -rw-r--r-- | packages/workers/index.ts | 6 | ||||
| -rw-r--r-- | packages/workers/openai.ts | 24 | ||||
| -rw-r--r-- | packages/workers/search.ts | 115 |
12 files changed, 395 insertions, 11 deletions
diff --git a/packages/shared/config.ts b/packages/shared/config.ts index 6ca7b89d..1dee4c4d 100644 --- a/packages/shared/config.ts +++ b/packages/shared/config.ts @@ -29,6 +29,12 @@ const serverConfig = { browserExecutablePath: process.env.BROWSER_EXECUTABLE_PATH, // If not set, the system's browser will be used browserUserDataDir: process.env.BROWSER_USER_DATA_DIR, }, + meilisearch: process.env.MEILI_ADDR + ? { + address: process.env.MEILI_ADDR || "http://127.0.0.1:7700", + key: process.env.MEILI_MASTER_KEY || "", + } + : undefined, logLevel: process.env.LOG_LEVEL || "debug", demoMode: (process.env.DEMO_MODE ?? "false") == "true", }; diff --git a/packages/shared/package.json b/packages/shared/package.json index 9f6b5498..0b3a8078 100644 --- a/packages/shared/package.json +++ b/packages/shared/package.json @@ -4,6 +4,7 @@ "version": "0.1.0", "private": true, "dependencies": { + "meilisearch": "^0.37.0", "winston": "^3.11.0", "zod": "^3.22.4" }, diff --git a/packages/shared/queues.ts b/packages/shared/queues.ts index 0155b1e7..a2cbeceb 100644 --- a/packages/shared/queues.ts +++ b/packages/shared/queues.ts @@ -27,3 +27,25 @@ export type ZOpenAIRequest = z.infer<typeof zOpenAIRequestSchema>; export const OpenAIQueue = new Queue<ZOpenAIRequest, void>("openai_queue", { connection: queueConnectionDetails, }); + +// Search Indexing Worker +export const zSearchIndexingRequestSchema = z.object({ + bookmarkId: z.string(), + type: z.enum(["index", "delete"]), +}); +export type ZSearchIndexingRequest = z.infer< + typeof zSearchIndexingRequestSchema +>; +export const SearchIndexingQueue = new Queue<ZSearchIndexingRequest, void>( + "searching_indexing", + { + connection: queueConnectionDetails, + defaultJobOptions: { + attempts: 5, + backoff: { + type: "exponential", + delay: 1000, + }, + }, + }, +); diff --git a/packages/shared/search.ts b/packages/shared/search.ts new file mode 100644 index 00000000..3bdf1ad1 --- /dev/null +++ b/packages/shared/search.ts @@ -0,0 +1,50 @@ +import { MeiliSearch, Index } from "meilisearch"; +import serverConfig from "./config"; +import { z } from "zod"; + +export const zBookmarkIdxSchema = z.object({ + id: z.string(), + userId: z.string(), + url: z.string().nullish(), + title: z.string().nullish(), + description: z.string().nullish(), + content: z.string().nullish(), + tags: z.array(z.string()).default([]), +}); + +export type ZBookmarkIdx = z.infer<typeof zBookmarkIdxSchema>; + +let searchClient: MeiliSearch | undefined; + +if (serverConfig.meilisearch) { + searchClient = new MeiliSearch({ + host: serverConfig.meilisearch.address, + apiKey: serverConfig.meilisearch.key, + }); +} + +const BOOKMARKS_IDX_NAME = "bookmarks"; + +let idxClient: Index<ZBookmarkIdx> | undefined; + +export async function getSearchIdxClient(): Promise<Index<ZBookmarkIdx> | null> { + if (idxClient) { + return idxClient; + } + if (!searchClient) { + return null; + } + + const indicies = await searchClient.getIndexes(); + let idxFound = indicies.results.find((i) => i.uid == BOOKMARKS_IDX_NAME); + if (!idxFound) { + const idx = await searchClient.createIndex(BOOKMARKS_IDX_NAME, { + primaryKey: "id", + }); + await searchClient.waitForTask(idx.taskUid); + idxFound = await searchClient.getIndex<ZBookmarkIdx>(BOOKMARKS_IDX_NAME); + const taskId = await idxFound.updateFilterableAttributes(["id", "userId"]); + await searchClient.waitForTask(taskId.taskUid); + } + return idxFound; +} diff --git a/packages/web/app/dashboard/components/Sidebar.tsx b/packages/web/app/dashboard/components/Sidebar.tsx index 7eea6b6d..010ee103 100644 --- a/packages/web/app/dashboard/components/Sidebar.tsx +++ b/packages/web/app/dashboard/components/Sidebar.tsx @@ -1,4 +1,12 @@ -import { Archive, Star, Tag, Home, PackageOpen, Settings } from "lucide-react"; +import { + Archive, + Star, + Tag, + Home, + PackageOpen, + Settings, + Search, +} from "lucide-react"; import { redirect } from "next/navigation"; import SidebarItem from "./SidebarItem"; import { getServerAuthSession } from "@/server/auth"; @@ -6,6 +14,7 @@ import Link from "next/link"; import SidebarProfileOptions from "./SidebarProfileOptions"; import { Separator } from "@/components/ui/separator"; import AllLists from "./AllLists"; +import serverConfig from "@hoarder/shared/config"; export default async function Sidebar() { const session = await getServerAuthSession(); @@ -34,6 +43,13 @@ export default async function Sidebar() { name="Favourites" path="/dashboard/bookmarks/favourites" /> + {serverConfig.meilisearch && ( + <SidebarItem + logo={<Search />} + name="Search" + path="/dashboard/search" + /> + )} <SidebarItem logo={<Archive />} name="Archive" diff --git a/packages/web/app/dashboard/search/page.tsx b/packages/web/app/dashboard/search/page.tsx new file mode 100644 index 00000000..1c26608e --- /dev/null +++ b/packages/web/app/dashboard/search/page.tsx @@ -0,0 +1,93 @@ +"use client"; + +import { api } from "@/lib/trpc"; +import { usePathname, useRouter, useSearchParams } from "next/navigation"; +import BookmarksGrid from "../bookmarks/components/BookmarksGrid"; +import { Input } from "@/components/ui/input"; +import Loading from "../bookmarks/loading"; +import { keepPreviousData } from "@tanstack/react-query"; +import { Search } from "lucide-react"; +import { ActionButton } from "@/components/ui/action-button"; +import { Suspense, useRef } from "react"; + +function SearchComp() { + const router = useRouter(); + const pathname = usePathname(); + const searchParams = useSearchParams(); + const searchQuery = searchParams.get("q") || ""; + + const { data, isPending, isPlaceholderData, error } = + api.bookmarks.searchBookmarks.useQuery( + { + text: searchQuery, + }, + { + placeholderData: keepPreviousData, + }, + ); + + if (error) { + throw error; + } + + const inputRef: React.MutableRefObject<HTMLInputElement | null> = + useRef<HTMLInputElement | null>(null); + + let timeoutId: NodeJS.Timeout | undefined; + + // Debounce user input + const doSearch = () => { + if (!inputRef.current) { + return; + } + router.replace(`${pathname}?q=${inputRef.current.value}`); + }; + + const onInputChange = () => { + if (timeoutId) { + clearTimeout(timeoutId); + } + timeoutId = setTimeout(() => { + doSearch(); + }, 200); + }; + + return ( + <div className="container flex flex-col gap-3 p-4"> + <div className="flex gap-2"> + <Input + ref={inputRef} + placeholder="Search" + defaultValue={searchQuery} + onChange={onInputChange} + /> + <ActionButton + loading={isPending || isPlaceholderData} + onClick={doSearch} + > + <span className="flex gap-2"> + <Search /> + <span className="my-auto">Search</span> + </span> + </ActionButton> + </div> + <hr /> + {data ? ( + <BookmarksGrid + query={{ ids: data.bookmarks.map((b) => b.id) }} + bookmarks={data.bookmarks} + /> + ) : ( + <Loading /> + )} + </div> + ); +} + +export default function SearchPage() { + return ( + <Suspense> + <SearchComp /> + </Suspense> + ); +} diff --git a/packages/web/package.json b/packages/web/package.json index 7687704f..b25fc2e9 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -41,6 +41,7 @@ "drizzle-orm": "^0.29.4", "install": "^0.13.0", "lucide-react": "^0.322.0", + "meilisearch": "^0.37.0", "next": "14.1.0", "next-auth": "^4.24.5", "prettier": "^3.2.5", diff --git a/packages/web/server/api/routers/bookmarks.ts b/packages/web/server/api/routers/bookmarks.ts index 8b59f1ef..73818508 100644 --- a/packages/web/server/api/routers/bookmarks.ts +++ b/packages/web/server/api/routers/bookmarks.ts @@ -1,5 +1,6 @@ import { z } from "zod"; import { Context, authedProcedure, router } from "../trpc"; +import { getSearchIdxClient } from "@hoarder/shared/search"; import { ZBookmark, ZBookmarkContent, @@ -17,7 +18,11 @@ import { bookmarks, tagsOnBookmarks, } from "@hoarder/db/schema"; -import { LinkCrawlerQueue, OpenAIQueue } from "@hoarder/shared/queues"; +import { + LinkCrawlerQueue, + OpenAIQueue, + SearchIndexingQueue, +} from "@hoarder/shared/queues"; import { TRPCError, experimental_trpcMiddleware } from "@trpc/server"; import { and, desc, eq, inArray } from "drizzle-orm"; import { ZBookmarkTags } from "@/lib/types/api/tags"; @@ -172,6 +177,10 @@ export const bookmarksAppRouter = router({ break; } } + SearchIndexingQueue.add("search_indexing", { + bookmarkId: bookmark.id, + type: "index", + }); return bookmark; }), @@ -224,6 +233,10 @@ export const bookmarksAppRouter = router({ message: "Bookmark not found", }); } + SearchIndexingQueue.add("search_indexing", { + bookmarkId: input.bookmarkId, + type: "index", + }); }), deleteBookmark: authedProcedure @@ -238,6 +251,10 @@ export const bookmarksAppRouter = router({ eq(bookmarks.id, input.bookmarkId), ), ); + SearchIndexingQueue.add("search_indexing", { + bookmarkId: input.bookmarkId, + type: "delete", + }); }), recrawlBookmark: authedProcedure .input(z.object({ bookmarkId: z.string() })) @@ -280,6 +297,49 @@ export const bookmarksAppRouter = router({ return toZodSchema(bookmark); }), + searchBookmarks: authedProcedure + .input( + z.object({ + text: z.string(), + }), + ) + .output(zGetBookmarksResponseSchema) + .query(async ({ input, ctx }) => { + const client = await getSearchIdxClient(); + if (!client) { + throw new TRPCError({ + code: "INTERNAL_SERVER_ERROR", + message: "Search functionality is not configured", + }); + } + const resp = await client.search(input.text, { + filter: [`userId = '${ctx.user.id}'`], + }); + + if (resp.hits.length == 0) { + return { bookmarks: [] }; + } + const results = await ctx.db.query.bookmarks.findMany({ + where: and( + eq(bookmarks.userId, ctx.user.id), + inArray( + bookmarks.id, + resp.hits.map((h) => h.id), + ), + ), + with: { + tagsOnBookmarks: { + with: { + tag: true, + }, + }, + link: true, + text: true, + }, + }); + + return { bookmarks: results.map(toZodSchema) }; + }), getBookmarks: authedProcedure .input(zGetBookmarksRequestSchema) .output(zGetBookmarksResponseSchema) diff --git a/packages/workers/crawler.ts b/packages/workers/crawler.ts index bfb46218..7be014a7 100644 --- a/packages/workers/crawler.ts +++ b/packages/workers/crawler.ts @@ -2,6 +2,7 @@ import logger from "@hoarder/shared/logger"; import { LinkCrawlerQueue, OpenAIQueue, + SearchIndexingQueue, ZCrawlLinkRequest, queueConnectionDetails, zCrawlLinkRequestSchema, @@ -30,6 +31,7 @@ import assert from "assert"; import serverConfig from "@hoarder/shared/config"; import { bookmarkLinks } from "@hoarder/db/schema"; import { eq } from "drizzle-orm"; +import { SearchIndexingWorker } from "./search"; const metascraperParser = metascraper([ metascraperReadability(), @@ -172,4 +174,10 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) { OpenAIQueue.add("openai", { bookmarkId, }); + + // Update the search index + SearchIndexingQueue.add("search_indexing", { + bookmarkId, + type: "index", + }); } diff --git a/packages/workers/index.ts b/packages/workers/index.ts index 67be7af2..295eeaef 100644 --- a/packages/workers/index.ts +++ b/packages/workers/index.ts @@ -1,14 +1,16 @@ import "dotenv/config"; import { CrawlerWorker } from "./crawler"; import { OpenAiWorker } from "./openai"; +import { SearchIndexingWorker } from "./search"; async function main() { - const [crawler, openai] = [ + const [crawler, openai, search] = [ await CrawlerWorker.build(), await OpenAiWorker.build(), + await SearchIndexingWorker.build(), ]; - await Promise.all([crawler.run(), openai.run()]); + await Promise.all([crawler.run(), openai.run(), search.run()]); } main(); diff --git a/packages/workers/openai.ts b/packages/workers/openai.ts index 8f85c4ec..cc456616 100644 --- a/packages/workers/openai.ts +++ b/packages/workers/openai.ts @@ -3,6 +3,7 @@ import logger from "@hoarder/shared/logger"; import serverConfig from "@hoarder/shared/config"; import { OpenAIQueue, + SearchIndexingQueue, ZOpenAIRequest, queueConnectionDetails, zOpenAIRequestSchema, @@ -159,13 +160,16 @@ async function connectTags(bookmarkId: string, tagIds: string[]) { if (tagIds.length == 0) { return; } - await db.insert(tagsOnBookmarks).values( - tagIds.map((tagId) => ({ - tagId, - bookmarkId, - attachedBy: "ai" as const, - })), - ); + await db + .insert(tagsOnBookmarks) + .values( + tagIds.map((tagId) => ({ + tagId, + bookmarkId, + attachedBy: "ai" as const, + })), + ) + .onConflictDoNothing(); } async function runOpenAI(job: Job<ZOpenAIRequest, void>) { @@ -203,4 +207,10 @@ async function runOpenAI(job: Job<ZOpenAIRequest, void>) { const tagIds = await createTags(tags, bookmark.userId); await connectTags(bookmarkId, tagIds); + + // Update the search index + SearchIndexingQueue.add("search_indexing", { + bookmarkId, + type: "index", + }); } diff --git a/packages/workers/search.ts b/packages/workers/search.ts new file mode 100644 index 00000000..a628b2ed --- /dev/null +++ b/packages/workers/search.ts @@ -0,0 +1,115 @@ +import { db } from "@hoarder/db"; +import logger from "@hoarder/shared/logger"; +import { getSearchIdxClient } from "@hoarder/shared/search"; +import { + SearchIndexingQueue, + ZSearchIndexingRequest, + queueConnectionDetails, + zSearchIndexingRequestSchema, +} from "@hoarder/shared/queues"; +import { Job } from "bullmq"; +import { Worker } from "bullmq"; +import { bookmarks } from "@hoarder/db/schema"; +import { eq } from "drizzle-orm"; + +export class SearchIndexingWorker { + static async build() { + logger.info("Starting search indexing worker ..."); + const worker = new Worker<ZSearchIndexingRequest, void>( + SearchIndexingQueue.name, + runSearchIndexing, + { + connection: queueConnectionDetails, + autorun: false, + }, + ); + + worker.on("completed", (job) => { + const jobId = job?.id || "unknown"; + logger.info(`[search][${jobId}] Completed successfully`); + }); + + worker.on("failed", (job, error) => { + const jobId = job?.id || "unknown"; + logger.error(`[search][${jobId}] openai job failed: ${error}`); + }); + + return worker; + } +} + +async function runIndex( + searchClient: NonNullable<Awaited<ReturnType<typeof getSearchIdxClient>>>, + bookmarkId: string, +) { + const bookmark = await db.query.bookmarks.findFirst({ + where: eq(bookmarks.id, bookmarkId), + with: { + link: true, + text: true, + tagsOnBookmarks: { + with: { + tag: true, + }, + }, + }, + }); + + if (!bookmark) { + throw new Error(`Bookmark ${bookmarkId} not found`); + } + + searchClient.addDocuments([ + { + id: bookmark.id, + userId: bookmark.userId, + ...(bookmark.link + ? { + url: bookmark.link.url, + title: bookmark.link.title, + description: bookmark.link.description, + } + : undefined), + ...(bookmark.text ? { content: bookmark.text.text } : undefined), + tags: bookmark.tagsOnBookmarks.map((t) => t.tag.name), + }, + ]); +} + +async function runDelete( + searchClient: NonNullable<Awaited<ReturnType<typeof getSearchIdxClient>>>, + bookmarkId: string, +) { + await searchClient.deleteDocument(bookmarkId); +} + +async function runSearchIndexing(job: Job<ZSearchIndexingRequest, void>) { + const jobId = job.id || "unknown"; + + const request = zSearchIndexingRequestSchema.safeParse(job.data); + if (!request.success) { + throw new Error( + `[search][${jobId}] Got malformed job request: ${request.error.toString()}`, + ); + } + + const searchClient = await getSearchIdxClient(); + if (!searchClient) { + logger.debug( + `[search][${jobId}] Search is not configured, nothing to do now`, + ); + return; + } + + const bookmarkId = request.data.bookmarkId; + switch (request.data.type) { + case "index": { + await runIndex(searchClient, bookmarkId); + break; + } + case "delete": { + await runDelete(searchClient, bookmarkId); + break; + } + } +} |
