aboutsummaryrefslogtreecommitdiffstats
path: root/packages/shared
diff options
context:
space:
mode:
authorMohamed Bassem <me@mbassem.com>2024-12-31 13:17:56 +0200
committerGitHub <noreply@github.com>2024-12-31 13:17:56 +0200
commitcbaf9e6034aa09911fca967b7af6cad11f154b3e (patch)
tree6995d9d60d9ae5181af78e6577f8d7b724d7a971 /packages/shared
parentf476fca758bb039f9605488b61ba35fc097d6cfc (diff)
downloadkarakeep-cbaf9e6034aa09911fca967b7af6cad11f154b3e.tar.zst
feat: Introduce advanced search capabilities (#753)
* feat: Implement search filtering in the backend * feat: Implement search language parser * rename matcher name * Add ability to interleve text * More fixes * be more tolerable to parsing errors * Add a search query explainer widget * Handle date parsing gracefully * Fix the lockfile * Encode query search param * Fix table body error * Fix error when writing quotes
Diffstat (limited to 'packages/shared')
-rw-r--r--packages/shared/package.json7
-rw-r--r--packages/shared/searchQueryParser.test.ts275
-rw-r--r--packages/shared/searchQueryParser.ts351
-rw-r--r--packages/shared/types/search.ts72
-rw-r--r--packages/shared/vitest.config.ts14
5 files changed, 717 insertions, 2 deletions
diff --git a/packages/shared/package.json b/packages/shared/package.json
index d741b70f..d412301a 100644
--- a/packages/shared/package.json
+++ b/packages/shared/package.json
@@ -10,18 +10,21 @@
"meilisearch": "^0.37.0",
"ollama": "^0.5.9",
"openai": "^4.67.1",
+ "typescript-parsec": "^0.3.4",
"winston": "^3.11.0",
"zod": "^3.22.4"
},
"devDependencies": {
"@hoarder/eslint-config": "workspace:^0.2.0",
"@hoarder/prettier-config": "workspace:^0.1.0",
- "@hoarder/tsconfig": "workspace:^0.1.0"
+ "@hoarder/tsconfig": "workspace:^0.1.0",
+ "vitest": "^1.3.1"
},
"scripts": {
"typecheck": "tsc --noEmit",
"format": "prettier . --ignore-path ../../.prettierignore",
- "lint": "eslint ."
+ "lint": "eslint .",
+ "test": "vitest"
},
"main": "index.ts",
"eslintConfig": {
diff --git a/packages/shared/searchQueryParser.test.ts b/packages/shared/searchQueryParser.test.ts
new file mode 100644
index 00000000..428d5929
--- /dev/null
+++ b/packages/shared/searchQueryParser.test.ts
@@ -0,0 +1,275 @@
+import { describe, expect, test } from "vitest";
+
+import { parseSearchQuery } from "./searchQueryParser";
+
+describe("Search Query Parser", () => {
+ test("simple is queries", () => {
+ expect(parseSearchQuery("is:archived")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "archived",
+ archived: true,
+ },
+ });
+ expect(parseSearchQuery("is:not_archived")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "archived",
+ archived: false,
+ },
+ });
+ expect(parseSearchQuery("is:fav")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "favourited",
+ favourited: true,
+ },
+ });
+ expect(parseSearchQuery("is:not_fav")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "favourited",
+ favourited: false,
+ },
+ });
+ });
+
+ test("simple string queries", () => {
+ expect(parseSearchQuery("url:https://example.com")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "url",
+ url: "https://example.com",
+ },
+ });
+ expect(parseSearchQuery('url:"https://example.com"')).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "url",
+ url: "https://example.com",
+ },
+ });
+ expect(parseSearchQuery("#my-tag")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "tagName",
+ tagName: "my-tag",
+ },
+ });
+ expect(parseSearchQuery('#"my tag"')).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "tagName",
+ tagName: "my tag",
+ },
+ });
+ expect(parseSearchQuery("list:my-list")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "listName",
+ listName: "my-list",
+ },
+ });
+ expect(parseSearchQuery('list:"my list"')).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "listName",
+ listName: "my list",
+ },
+ });
+ });
+ test("date queries", () => {
+ expect(parseSearchQuery("after:2023-10-12")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "dateAfter",
+ dateAfter: new Date("2023-10-12"),
+ },
+ });
+ expect(parseSearchQuery("before:2023-10-12")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "dateBefore",
+ dateBefore: new Date("2023-10-12"),
+ },
+ });
+ });
+
+ test("complex queries", () => {
+ expect(parseSearchQuery("is:fav is:archived")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "and",
+ matchers: [
+ {
+ type: "favourited",
+ favourited: true,
+ },
+ {
+ type: "archived",
+ archived: true,
+ },
+ ],
+ },
+ });
+
+ expect(parseSearchQuery("(is:fav is:archived) #my-tag")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "and",
+ matchers: [
+ {
+ type: "favourited",
+ favourited: true,
+ },
+ {
+ type: "archived",
+ archived: true,
+ },
+ {
+ type: "tagName",
+ tagName: "my-tag",
+ },
+ ],
+ },
+ });
+
+ expect(parseSearchQuery("(is:fav is:archived) or (#my-tag)")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "or",
+ matchers: [
+ {
+ type: "and",
+ matchers: [
+ {
+ type: "favourited",
+ favourited: true,
+ },
+ {
+ type: "archived",
+ archived: true,
+ },
+ ],
+ },
+ {
+ type: "tagName",
+ tagName: "my-tag",
+ },
+ ],
+ },
+ });
+
+ expect(parseSearchQuery("(is:fav or is:archived) and #my-tag")).toEqual({
+ result: "full",
+ text: "",
+ matcher: {
+ type: "and",
+ matchers: [
+ {
+ type: "or",
+ matchers: [
+ {
+ type: "favourited",
+ favourited: true,
+ },
+ {
+ type: "archived",
+ archived: true,
+ },
+ ],
+ },
+ {
+ type: "tagName",
+ tagName: "my-tag",
+ },
+ ],
+ },
+ });
+ });
+ test("pure text", () => {
+ expect(parseSearchQuery("hello")).toEqual({
+ result: "full",
+ text: "hello",
+ matcher: undefined,
+ });
+ expect(parseSearchQuery("hello world")).toEqual({
+ result: "full",
+ text: "hello world",
+ matcher: undefined,
+ });
+ });
+
+ test("text interlived with matchers", () => {
+ expect(
+ parseSearchQuery(
+ "hello is:fav world is:archived mixed world #my-tag test",
+ ),
+ ).toEqual({
+ result: "full",
+ text: "hello world mixed world test",
+ matcher: {
+ type: "and",
+ matchers: [
+ {
+ type: "favourited",
+ favourited: true,
+ },
+ {
+ type: "archived",
+ archived: true,
+ },
+ {
+ type: "tagName",
+ tagName: "my-tag",
+ },
+ ],
+ },
+ });
+ });
+
+ test("unknown qualifiers are emitted as pure text", () => {
+ expect(parseSearchQuery("is:fav is:helloworld")).toEqual({
+ result: "full",
+ text: "is:helloworld",
+ matcher: {
+ type: "favourited",
+ favourited: true,
+ },
+ });
+ });
+
+ test("partial results", () => {
+ expect(parseSearchQuery("(is:archived) or ")).toEqual({
+ result: "partial",
+ text: "or",
+ matcher: {
+ type: "archived",
+ archived: true,
+ },
+ });
+ expect(parseSearchQuery("is:fav is: ( random")).toEqual({
+ result: "partial",
+ text: "is: ( random",
+ matcher: {
+ type: "favourited",
+ favourited: true,
+ },
+ });
+ });
+});
diff --git a/packages/shared/searchQueryParser.ts b/packages/shared/searchQueryParser.ts
new file mode 100644
index 00000000..faf74d08
--- /dev/null
+++ b/packages/shared/searchQueryParser.ts
@@ -0,0 +1,351 @@
+import {
+ alt,
+ alt_sc,
+ apply,
+ kmid,
+ kright,
+ lrec_sc,
+ rule,
+ seq,
+ str,
+ tok,
+ Token,
+ TokenPosition,
+} from "typescript-parsec";
+import { z } from "zod";
+
+import { Matcher } from "./types/search";
+
+enum TokenType {
+ And = "AND",
+ Or = "OR",
+
+ Qualifier = "QUALIFIER",
+ Ident = "IDENT",
+ StringLiteral = "STRING_LITERAL",
+
+ LParen = "LPAREN",
+ RParen = "RPAREN",
+ Space = "SPACE",
+ Hash = "HASH",
+}
+
+// Rules are in order of priority
+const lexerRules: [RegExp, TokenType][] = [
+ [/^and/i, TokenType.And],
+ [/^or/i, TokenType.Or],
+
+ [/^#/, TokenType.Hash],
+ [/^(is|url|list|after|before):/, TokenType.Qualifier],
+
+ [/^"([^"]+)"/, TokenType.StringLiteral],
+
+ [/^\(/, TokenType.LParen],
+ [/^\)/, TokenType.RParen],
+ [/^\s+/, TokenType.Space],
+
+ // This needs to be last as it matches a lot of stuff
+ [/^[^ )(]+/, TokenType.Ident],
+] as const;
+
+class LexerToken implements Token<TokenType> {
+ private constructor(
+ private readonly input: string,
+ public kind: TokenType,
+ public text: string,
+ public pos: TokenPosition,
+ ) {}
+
+ public static from(input: string): Token<TokenType> | undefined {
+ const tok = new LexerToken(
+ input,
+ /* Doesn't matter */ TokenType.Ident,
+ "",
+ {
+ index: 0,
+ rowBegin: 1,
+ rowEnd: 1,
+ columnBegin: 0,
+ columnEnd: 0,
+ },
+ );
+ return tok.next;
+ }
+
+ public get next(): Token<TokenType> | undefined {
+ if (!this.input.length) {
+ return undefined;
+ }
+
+ for (const [regex, tokenType] of lexerRules) {
+ const matchRes = regex.exec(this.input);
+ if (!matchRes) {
+ continue;
+ }
+ const match = matchRes[0];
+ return new LexerToken(this.input.slice(match.length), tokenType, match, {
+ index: this.pos.index + match.length,
+ columnBegin: this.pos.index + 1,
+ columnEnd: this.pos.index + 1 + match.length,
+ // Our strings are always only one line
+ rowBegin: 1,
+ rowEnd: 1,
+ });
+ }
+ // No match
+ throw new Error(
+ `Failed to tokenize the token at position ${this.pos.index}: ${this.input[0]}`,
+ );
+ }
+}
+
+export interface TextAndMatcher {
+ text: string;
+ matcher?: Matcher;
+}
+
+const MATCHER = rule<TokenType, TextAndMatcher>();
+const EXP = rule<TokenType, TextAndMatcher>();
+
+MATCHER.setPattern(
+ alt_sc(
+ apply(kright(str("is:"), tok(TokenType.Ident)), (toks) => {
+ switch (toks.text) {
+ case "fav":
+ return {
+ text: "",
+ matcher: { type: "favourited", favourited: true },
+ };
+ case "not_fav":
+ return {
+ text: "",
+ matcher: { type: "favourited", favourited: false },
+ };
+ case "archived":
+ return {
+ text: "",
+ matcher: { type: "archived", archived: true },
+ };
+ case "not_archived":
+ return {
+ text: "",
+ matcher: { type: "archived", archived: false },
+ };
+ default:
+ // If the token is not known, emit it as pure text
+ return {
+ text: `is:${toks.text}`,
+ matcher: undefined,
+ };
+ }
+ }),
+ apply(
+ seq(
+ alt(tok(TokenType.Qualifier), tok(TokenType.Hash)),
+ alt(
+ apply(tok(TokenType.Ident), (tok) => {
+ return tok.text;
+ }),
+ apply(tok(TokenType.StringLiteral), (tok) => {
+ return tok.text.slice(1, -1);
+ }),
+ ),
+ ),
+ (toks) => {
+ switch (toks[0].text) {
+ case "url:":
+ return {
+ text: "",
+ matcher: { type: "url", url: toks[1] },
+ };
+ case "#":
+ return {
+ text: "",
+ matcher: { type: "tagName", tagName: toks[1] },
+ };
+ case "list:":
+ return {
+ text: "",
+ matcher: { type: "listName", listName: toks[1] },
+ };
+ case "after:":
+ try {
+ return {
+ text: "",
+ matcher: {
+ type: "dateAfter",
+ dateAfter: z.coerce.date().parse(toks[1]),
+ },
+ };
+ } catch (e) {
+ return {
+ // If parsing the date fails, emit it as pure text
+ text: toks[0].text + toks[1],
+ matcher: undefined,
+ };
+ }
+ case "before:":
+ try {
+ return {
+ text: "",
+ matcher: {
+ type: "dateBefore",
+ dateBefore: z.coerce.date().parse(toks[1]),
+ },
+ };
+ } catch (e) {
+ return {
+ // If parsing the date fails, emit it as pure text
+ text: toks[0].text + toks[1],
+ matcher: undefined,
+ };
+ }
+ default:
+ // If the token is not known, emit it as pure text
+ return {
+ text: toks[0].text + toks[1],
+ matcher: undefined,
+ };
+ }
+ },
+ ),
+ // Ident or an incomlete qualifier
+ apply(alt(tok(TokenType.Ident), tok(TokenType.Qualifier)), (toks) => {
+ return {
+ text: toks.text,
+ matcher: undefined,
+ };
+ }),
+ kmid(tok(TokenType.LParen), EXP, tok(TokenType.RParen)),
+ ),
+);
+
+EXP.setPattern(
+ lrec_sc(
+ MATCHER,
+ seq(
+ alt(
+ tok(TokenType.Space),
+ kmid(tok(TokenType.Space), tok(TokenType.And), tok(TokenType.Space)),
+ kmid(tok(TokenType.Space), tok(TokenType.Or), tok(TokenType.Space)),
+ ),
+ MATCHER,
+ ),
+ (toks, next) => {
+ switch (next[0].kind) {
+ case TokenType.Space:
+ case TokenType.And:
+ return {
+ text: [toks.text, next[1].text].join(" ").trim(),
+ matcher:
+ !!toks.matcher || !!next[1].matcher
+ ? {
+ type: "and",
+ matchers: [toks.matcher, next[1].matcher].filter(
+ (a) => !!a,
+ ) as Matcher[],
+ }
+ : undefined,
+ };
+ case TokenType.Or:
+ return {
+ text: [toks.text, next[1].text].join(" ").trim(),
+ matcher:
+ !!toks.matcher || !!next[1].matcher
+ ? {
+ type: "or",
+ matchers: [toks.matcher, next[1].matcher].filter(
+ (a) => !!a,
+ ) as Matcher[],
+ }
+ : undefined,
+ };
+ }
+ },
+ ),
+);
+
+function flattenAndsAndOrs(matcher: Matcher): Matcher {
+ switch (matcher.type) {
+ case "and":
+ case "or": {
+ if (matcher.matchers.length == 1) {
+ return flattenAndsAndOrs(matcher.matchers[0]);
+ }
+ const flattened: Matcher[] = [];
+ for (let m of matcher.matchers) {
+ // If inside the matcher is another matcher of the same type, flatten it
+ m = flattenAndsAndOrs(m);
+ if (m.type == matcher.type) {
+ flattened.push(...m.matchers);
+ } else {
+ flattened.push(m);
+ }
+ }
+ matcher.matchers = flattened;
+ return matcher;
+ }
+ default:
+ return matcher;
+ }
+}
+
+export function _parseAndPrintTokens(query: string) {
+ console.log(`PARSING: ${query}`);
+ let tok = LexerToken.from(query);
+ do {
+ console.log(tok?.kind, tok?.text);
+ tok = tok?.next;
+ } while (tok);
+ console.log("DONE");
+}
+
+function consumeTokenStream(token: Token<TokenType>) {
+ let str = "";
+ let tok: Token<TokenType> | undefined = token;
+ do {
+ str += tok.text;
+ tok = tok.next;
+ } while (tok);
+ return str;
+}
+
+export function parseSearchQuery(
+ query: string,
+): TextAndMatcher & { result: "full" | "partial" | "invalid" } {
+ // _parseAndPrintTokens(query); // Uncomment to debug tokenization
+ const parsed = EXP.parse(LexerToken.from(query.trim()));
+ if (!parsed.successful || parsed.candidates.length != 1) {
+ // If the query is not valid, return the whole query as pure text
+ return {
+ text: query,
+ result: "invalid",
+ };
+ }
+
+ const parseCandidate = parsed.candidates[0];
+ if (parseCandidate.result.matcher) {
+ parseCandidate.result.matcher = flattenAndsAndOrs(
+ parseCandidate.result.matcher,
+ );
+ }
+ if (parseCandidate.nextToken) {
+ // Parser failed to consume the whole query. This usually happen
+ // when the user is still typing the query. Return the partial
+ // result and the remaining query as pure text
+ return {
+ text: (
+ parseCandidate.result.text +
+ consumeTokenStream(parseCandidate.nextToken)
+ ).trim(),
+ matcher: parseCandidate.result.matcher,
+ result: "partial",
+ };
+ }
+
+ return {
+ text: parseCandidate.result.text,
+ matcher: parseCandidate.result.matcher,
+ result: "full",
+ };
+}
diff --git a/packages/shared/types/search.ts b/packages/shared/types/search.ts
new file mode 100644
index 00000000..d430dad5
--- /dev/null
+++ b/packages/shared/types/search.ts
@@ -0,0 +1,72 @@
+import { z } from "zod";
+
+const zTagNameMatcher = z.object({
+ type: z.literal("tagName"),
+ tagName: z.string(),
+});
+
+const zListNameMatcher = z.object({
+ type: z.literal("listName"),
+ listName: z.string(),
+});
+
+const zArchivedMatcher = z.object({
+ type: z.literal("archived"),
+ archived: z.boolean(),
+});
+
+const urlMatcher = z.object({
+ type: z.literal("url"),
+ url: z.string(),
+});
+
+const zFavouritedMatcher = z.object({
+ type: z.literal("favourited"),
+ favourited: z.boolean(),
+});
+
+const zDateAfterMatcher = z.object({
+ type: z.literal("dateAfter"),
+ dateAfter: z.date(),
+});
+
+const zDateBeforeMatcher = z.object({
+ type: z.literal("dateBefore"),
+ dateBefore: z.date(),
+});
+
+const zNonRecursiveMatcher = z.union([
+ zTagNameMatcher,
+ zListNameMatcher,
+ zArchivedMatcher,
+ urlMatcher,
+ zFavouritedMatcher,
+ zDateAfterMatcher,
+ zDateBeforeMatcher,
+]);
+
+type NonRecursiveMatcher = z.infer<typeof zNonRecursiveMatcher>;
+export type Matcher =
+ | NonRecursiveMatcher
+ | { type: "and"; matchers: Matcher[] }
+ | { type: "or"; matchers: Matcher[] };
+
+export const zMatcherSchema: z.ZodType<Matcher> = z.lazy(() => {
+ return z.discriminatedUnion("type", [
+ zTagNameMatcher,
+ zListNameMatcher,
+ zArchivedMatcher,
+ urlMatcher,
+ zFavouritedMatcher,
+ zDateAfterMatcher,
+ zDateBeforeMatcher,
+ z.object({
+ type: z.literal("and"),
+ matchers: z.array(zMatcherSchema),
+ }),
+ z.object({
+ type: z.literal("or"),
+ matchers: z.array(zMatcherSchema),
+ }),
+ ]);
+});
diff --git a/packages/shared/vitest.config.ts b/packages/shared/vitest.config.ts
new file mode 100644
index 00000000..41fd70c4
--- /dev/null
+++ b/packages/shared/vitest.config.ts
@@ -0,0 +1,14 @@
+/// <reference types="vitest" />
+
+import tsconfigPaths from "vite-tsconfig-paths";
+import { defineConfig } from "vitest/config";
+
+// https://vitejs.dev/config/
+export default defineConfig({
+ plugins: [tsconfigPaths()],
+ test: {
+ alias: {
+ "@/*": "./*",
+ },
+ },
+});