aboutsummaryrefslogtreecommitdiffstats
path: root/apps/workers/openaiWorker.ts
diff options
context:
space:
mode:
Diffstat (limited to 'apps/workers/openaiWorker.ts')
-rw-r--r--apps/workers/openaiWorker.ts84
1 files changed, 62 insertions, 22 deletions
diff --git a/apps/workers/openaiWorker.ts b/apps/workers/openaiWorker.ts
index b07e02fe..f0314884 100644
--- a/apps/workers/openaiWorker.ts
+++ b/apps/workers/openaiWorker.ts
@@ -1,6 +1,6 @@
import type { Job } from "bullmq";
import { Worker } from "bullmq";
-import { and, eq, inArray } from "drizzle-orm";
+import { and, Column, eq, inArray, sql } from "drizzle-orm";
import { z } from "zod";
import type { ZOpenAIRequest } from "@hoarder/shared/queues";
@@ -29,6 +29,17 @@ const openAIResponseSchema = z.object({
tags: z.array(z.string()),
});
+function tagNormalizer(col: Column) {
+ function normalizeTag(tag: string) {
+ return tag.toLowerCase().replace(/[ -_]/g, "");
+ }
+
+ return {
+ normalizeTag,
+ sql: sql`lower(replace(replace(replace(${col}, ' ', ''), '-', ''), '_', ''))`,
+ };
+}
+
async function attemptMarkTaggingStatus(
jobData: object | undefined,
status: "success" | "failure",
@@ -257,36 +268,63 @@ async function inferTags(
async function connectTags(
bookmarkId: string,
- newTags: string[],
+ inferredTags: string[],
userId: string,
) {
- if (newTags.length == 0) {
+ if (inferredTags.length == 0) {
return;
}
await db.transaction(async (tx) => {
- // Create tags that didn't exist previously
- await tx
- .insert(bookmarkTags)
- .values(
- newTags.map((t) => ({
- name: t,
- userId,
- })),
- )
- .onConflictDoNothing();
+ // Attempt to match exiting tags with the new ones
+ const { matchedTagIds, notFoundTagNames } = await (async () => {
+ const { normalizeTag, sql: normalizedTagSql } = tagNormalizer(
+ bookmarkTags.name,
+ );
+ const normalizedInferredTags = inferredTags.map((t) => ({
+ originalTag: t,
+ normalizedTag: normalizeTag(t),
+ }));
- const newTagIds = (
- await tx.query.bookmarkTags.findMany({
+ const matchedTags = await tx.query.bookmarkTags.findMany({
where: and(
eq(bookmarkTags.userId, userId),
- inArray(bookmarkTags.name, newTags),
+ inArray(
+ normalizedTagSql,
+ normalizedInferredTags.map((t) => t.normalizedTag),
+ ),
),
- columns: {
- id: true,
- },
- })
- ).map((r) => r.id);
+ });
+
+ const matchedTagIds = matchedTags.map((r) => r.id);
+ const notFoundTagNames = normalizedInferredTags
+ .filter(
+ (t) =>
+ !matchedTags.some(
+ (mt) => normalizeTag(mt.name) === t.normalizedTag,
+ ),
+ )
+ .map((t) => t.originalTag);
+
+ return { matchedTagIds, notFoundTagNames };
+ })();
+
+ // Create tags that didn't exist previously
+ let newTagIds: string[] = [];
+ if (notFoundTagNames.length > 0) {
+ newTagIds = (
+ await tx
+ .insert(bookmarkTags)
+ .values(
+ notFoundTagNames.map((t) => ({
+ name: t,
+ userId,
+ })),
+ )
+ .onConflictDoNothing()
+ .returning()
+ ).map((t) => t.id);
+ }
// Delete old AI tags
await tx
@@ -298,11 +336,13 @@ async function connectTags(
),
);
+ const allTagIds = new Set([...matchedTagIds, ...newTagIds]);
+
// Attach new ones
await tx
.insert(tagsOnBookmarks)
.values(
- newTagIds.map((tagId) => ({
+ [...allTagIds].map((tagId) => ({
tagId,
bookmarkId,
attachedBy: "ai" as const,