4 files changed, 8 insertions, 28 deletions
diff --git a/.env.sample b/.env.sample
deleted file mode 100644
index df0c6ef3..00000000
--- a/.env.sample
+++ /dev/null
@@ -1,23 +0,0 @@
-############## Global ##############
-# DATA_DIR=
-# LOG_LEVEL=debug
-
-############## Crawler ##############
-# CRAWLER_HEADLESS_BROWSER=true
-
-############## Redis ##############
-# REDIS_HOST=
-# REDIS_PORT=
-
-############### OpenAI ##############
-
-# OPENAI_API_KEY=
-
-############### Search ##############
-
-# MEILI_ADDR=
-# MEILI_MASTER_KEY=
-
-############## Auth ##############
-
-# DISABLE_SIGNUPS=
diff --git a/apps/workers/openaiWorker.ts b/apps/workers/openaiWorker.ts
index bb8015a5..c7b519e2 100644
--- a/apps/workers/openaiWorker.ts
+++ b/apps/workers/openaiWorker.ts
@@ -7,6 +7,7 @@ import type { ZOpenAIRequest } from "@hoarder/shared/queues";
 import { db } from "@hoarder/db";
 import { bookmarks, bookmarkTags, tagsOnBookmarks } from "@hoarder/db/schema";
 import { readAsset } from "@hoarder/shared/assetdb";
+import serverConfig from "@hoarder/shared/config";
 import logger from "@hoarder/shared/logger";
 import {
   OpenAIQueue,
@@ -73,15 +74,14 @@ export class OpenAiWorker {
 const IMAGE_PROMPT_BASE = `
 I'm building a read-it-later app and I need your help with automatic tagging.
 Please analyze the attached image and suggest relevant tags that describe its key themes, topics, and main ideas.
-Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres. If it's a famous website
-you may also include a tag for the website. If the tag is not generic enough, don't include it. Aim for 10-15 tags.
-If there are no good tags, don't emit any. You must respond in valid JSON with the key "tags" and the value is list of tags.
-Don't wrap the response in a markdown code.`;
+Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres. The tags language must be ${serverConfig.inference.inferredTagLang}.
+If the tag is not generic enough, don't include it. Aim for 10-15 tags. If there are no good tags, don't emit any. You must respond in valid JSON
+with the key "tags" and the value is list of tags. Don't wrap the response in a markdown code.`;
 
 const TEXT_PROMPT_BASE = `
 I'm building a read-it-later app and I need your help with automatic tagging.
 Please analyze the text after the sentence "CONTENT START HERE:" and suggest relevant tags that describe its key themes, topics, and main ideas.
-Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres. If it's a famous website
+Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres. The tags language must be ${serverConfig.inference.inferredTagLang}. If it's a famous website
 you may also include a tag for the website. If the tag is not generic enough, don't include it. Aim for 3-5 tags. If there are no good tags, don't emit any.
 The content can include text for cookie consent and privacy policy, ignore those while tagging.
 You must respond in JSON with the key "tags" and the value is list of tags.
diff --git a/docs/docs/03-configuration.md b/docs/docs/03-configuration.md
index e888ed02..db660980 100644
--- a/docs/docs/03-configuration.md
+++ b/docs/docs/03-configuration.md
@@ -31,3 +31,4 @@ Either `OPENAI_API_KEY` or `OLLAMA_BASE_URL` need to be set for automatic taggin
 | OLLAMA_BASE_URL       | No       | Not set              | If you want to use ollama for local inference, set the address of ollama API here.                                                                                                              |
 | INFERENCE_TEXT_MODEL  | No       | gpt-3.5-turbo-0125   | The model to use for text inference. You'll need to change this to some other model if you're using ollama.                                                                                     |
 | INFERENCE_IMAGE_MODEL | No       | gpt-4-vision-preview | The model to use for image inference. You'll need to change this to some other model if you're using ollama and that model needs to support vision APIs (e.g. llava).                           |
+| INFERENCE_LANG        | No       | english              | The language in which the tags will be generated.                                                                                                                                               |
diff --git a/packages/shared/config.ts b/packages/shared/config.ts
index 21bcba5a..c1cc371a 100644
--- a/packages/shared/config.ts
+++ b/packages/shared/config.ts
@@ -28,6 +28,7 @@ const allEnv = z.object({
   DEMO_MODE_PASSWORD: z.string().optional(),
   DATA_DIR: z.string().default(""),
   MAX_ASSET_SIZE_MB: z.coerce.number().default(4),
+  INFERENCE_LANG: z.string().default("english"),
 });
 
 const serverConfigSchema = allEnv.transform((val) => {
@@ -42,6 +43,7 @@ const serverConfigSchema = allEnv.transform((val) => {
       ollamaBaseUrl: val.OLLAMA_BASE_URL,
       textModel: val.INFERENCE_TEXT_MODEL,
       imageModel: val.INFERENCE_IMAGE_MODEL,
+      inferredTagLang: val.INFERENCE_LANG,
     },
     bullMQ: {
       redisHost: val.REDIS_HOST,