diff options
Diffstat (limited to 'apps/workers/workers/inference/summarize.ts')
| -rw-r--r-- | apps/workers/workers/inference/summarize.ts | 54 |
1 files changed, 50 insertions, 4 deletions
diff --git a/apps/workers/workers/inference/summarize.ts b/apps/workers/workers/inference/summarize.ts index 23636961..922eb5b7 100644 --- a/apps/workers/workers/inference/summarize.ts +++ b/apps/workers/workers/inference/summarize.ts @@ -1,12 +1,17 @@ import { and, eq } from "drizzle-orm"; +import { getBookmarkDomain } from "network"; import { db } from "@karakeep/db"; -import { bookmarks, customPrompts } from "@karakeep/db/schema"; -import { triggerSearchReindex, ZOpenAIRequest } from "@karakeep/shared-server"; +import { bookmarks, customPrompts, users } from "@karakeep/db/schema"; +import { + setSpanAttributes, + triggerSearchReindex, + ZOpenAIRequest, +} from "@karakeep/shared-server"; import serverConfig from "@karakeep/shared/config"; import { InferenceClient } from "@karakeep/shared/inference"; import logger from "@karakeep/shared/logger"; -import { buildSummaryPrompt } from "@karakeep/shared/prompts"; +import { buildSummaryPrompt } from "@karakeep/shared/prompts.server"; import { DequeuedJob } from "@karakeep/shared/queueing"; import { BookmarkTypes } from "@karakeep/shared/types/bookmarks"; import { Bookmark } from "@karakeep/trpc/models/bookmarks"; @@ -22,6 +27,7 @@ async function fetchBookmarkDetailsForSummary(bookmarkId: string) { description: true, htmlContent: true, contentAssetId: true, + crawlStatusCode: true, publisher: true, author: true, url: true, @@ -56,6 +62,33 @@ export async function runSummarization( const bookmarkData = await fetchBookmarkDetailsForSummary(bookmarkId); + // Check user-level preference + const userSettings = await db.query.users.findFirst({ + where: eq(users.id, bookmarkData.userId), + columns: { + autoSummarizationEnabled: true, + inferredTagLang: true, + }, + }); + + setSpanAttributes({ + "user.id": bookmarkData.userId, + "bookmark.id": bookmarkData.id, + "bookmark.url": bookmarkData.link?.url, + "bookmark.domain": getBookmarkDomain(bookmarkData.link?.url), + "bookmark.content.type": bookmarkData.type, + "crawler.statusCode": bookmarkData.link?.crawlStatusCode ?? undefined, + "inference.type": "summarization", + "inference.model": serverConfig.inference.textModel, + }); + + if (userSettings?.autoSummarizationEnabled === false) { + logger.debug( + `[inference][${jobId}] Skipping summarization job for bookmark with id "${bookmarkId}" because user has disabled auto-summarization.`, + ); + return; + } + let textToSummarize = ""; if (bookmarkData.type === BookmarkTypes.LINK && bookmarkData.link) { const link = bookmarkData.link; @@ -105,13 +138,21 @@ URL: ${link.url ?? ""} }, }); + setSpanAttributes({ + "inference.prompt.customCount": prompts.length, + }); + const summaryPrompt = await buildSummaryPrompt( - serverConfig.inference.inferredTagLang, + userSettings?.inferredTagLang ?? serverConfig.inference.inferredTagLang, prompts.map((p) => p.text), textToSummarize, serverConfig.inference.contextLength, ); + setSpanAttributes({ + "inference.prompt.size": Buffer.byteLength(summaryPrompt, "utf8"), + }); + const summaryResult = await inferenceClient.inferFromText(summaryPrompt, { schema: null, // Summaries are typically free-form text abortSignal: job.abortSignal, @@ -123,6 +164,11 @@ URL: ${link.url ?? ""} ); } + setSpanAttributes({ + "inference.summary.size": Buffer.byteLength(summaryResult.response, "utf8"), + "inference.totalTokens": summaryResult.totalTokens, + }); + logger.info( `[inference][${jobId}] Generated summary for bookmark "${bookmarkId}" using ${summaryResult.totalTokens} tokens.`, ); |
