diff --git a/backend/alembic/versions/a3f1d7c4e9b2_persona_multilingual_query_expansion.py b/backend/alembic/versions/a3f1d7c4e9b2_persona_multilingual_query_expansion.py
new file mode 100644
index 00000000000..ab615c76e6b
--- /dev/null
+++ b/backend/alembic/versions/a3f1d7c4e9b2_persona_multilingual_query_expansion.py
@@ -0,0 +1,32 @@
+"""persona multilingual_query_expansion flag
+
+Revision ID: a3f1d7c4e9b2
+Revises: c8a4e2f9d1b3
+Create Date: 2026-05-04 12:00:00.000000
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "a3f1d7c4e9b2"
+down_revision = "c8a4e2f9d1b3"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+ op.add_column(
+ "persona",
+ sa.Column(
+ "multilingual_query_expansion",
+ sa.Boolean(),
+ nullable=False,
+ server_default=sa.text("false"),
+ ),
+ )
+
+
+def downgrade() -> None:
+ op.drop_column("persona", "multilingual_query_expansion")
diff --git a/backend/danswer/chat/multilingual_translation.py b/backend/danswer/chat/multilingual_translation.py
new file mode 100644
index 00000000000..5976f6fcc89
--- /dev/null
+++ b/backend/danswer/chat/multilingual_translation.py
@@ -0,0 +1,151 @@
+"""Helpers for the per-persona multi-language post-processing pass.
+
+When a persona has `multilingual_query_expansion=True` and the user's
+query is non-English, the answering LLM still produces English most of
+the time (it tends to mirror the English context corpus regardless of
+the LANGUAGE_HINT directive). We compensate by post-translating the
+English answer back into the user's original language.
+
+Trade-off: in translate mode we buffer the streamed answer instead of
+showing it token-by-token. The user sees a brief delay (one extra LLM
+round-trip), but reliably gets a reply in their language. English
+queries are unaffected — they keep streaming normally.
+"""
+from __future__ import annotations
+
+import unicodedata
+
+from danswer.llm.interfaces import LLM
+from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
+from danswer.llm.utils import message_to_string
+from danswer.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+# Display name passed to the translation prompt. Keys are the language
+# codes detect_query_language returns. Anything not in this map is
+# treated as English (no translation needed).
+_LANGUAGE_NAMES: dict[str, str] = {
+ "ja": "Japanese",
+ "zh": "Chinese (Simplified)",
+ "ko": "Korean",
+}
+
+
+def detect_query_language(text: str) -> str:
+ """Cheap script-based language detector covering the languages we
+ explicitly support translation for. Returns one of: 'ja', 'zh',
+ 'ko', or 'en' (English/other — no translation needed).
+
+ Heuristic mirrors the script-presence test in
+ backend/scripts/test_multilanguage_e2e.py: a few percent of CJK /
+ Hangul / kana code points is enough to decide. We don't try to be
+ clever about mixed-language queries — the dominant non-English
+ script wins, and ties default to English.
+ """
+ if not text:
+ return "en"
+
+ counts = {"hiragana_katakana": 0, "hangul": 0, "cjk": 0, "ascii_letter": 0}
+ total_letters = 0
+ for ch in text:
+ cp = ord(ch)
+ if (0x3040 <= cp <= 0x309F) or (0x30A0 <= cp <= 0x30FF):
+ counts["hiragana_katakana"] += 1
+ total_letters += 1
+ elif 0xAC00 <= cp <= 0xD7AF:
+ counts["hangul"] += 1
+ total_letters += 1
+ elif (0x4E00 <= cp <= 0x9FFF) or (0x3400 <= cp <= 0x4DBF):
+ counts["cjk"] += 1
+ total_letters += 1
+ elif unicodedata.category(ch).startswith("L"):
+ counts["ascii_letter"] += 1
+ total_letters += 1
+
+ if total_letters == 0:
+ return "en"
+ threshold = max(1, total_letters // 20) # ~5%
+ if counts["hiragana_katakana"] >= threshold:
+ return "ja"
+ if counts["hangul"] >= threshold:
+ return "ko"
+ if counts["cjk"] >= threshold:
+ return "zh"
+ return "en"
+
+
+def language_name(code: str) -> str | None:
+ return _LANGUAGE_NAMES.get(code)
+
+
+# The prompt is intentionally directive about preserving citations and
+# not adding commentary. Citations are bracketed numerals like [1] /
+# [[1]](url); URLs and code blocks should also pass through unchanged.
+_TRANSLATE_PROMPT = """\
+You are a precise translator.
+
+Translate the text below into {target_language}.
+
+CRITICAL RULES — follow exactly:
+- Preserve every citation marker exactly as-is. Citation markers look
+ like [1], [2], [[1]](https://example.com), etc. Do not translate
+ them, do not change the brackets, do not change the numbers.
+- Preserve every URL exactly.
+- Preserve every code block (text between triple backticks) exactly.
+- Preserve every inline code span (text between single backticks).
+- Do not add any commentary, preface, or trailing notes — output only
+ the translated text.
+- Keep numbers, proper nouns, and product names in their original
+ form unless the target language has a well-established equivalent.
+
+TEXT TO TRANSLATE:
+{text}
+"""
+
+
+def translate_answer_to_language(
+ answer_text: str,
+ target_language_code: str,
+ llm: LLM,
+) -> str:
+ """Translate `answer_text` into the language named by
+ `target_language_code` (a key of _LANGUAGE_NAMES). Returns the
+ English original on any failure — better to ship an English answer
+ than to drop the response entirely."""
+ target_name = _LANGUAGE_NAMES.get(target_language_code)
+ if target_name is None:
+ # Caller should have skipped, but be defensive.
+ return answer_text
+
+ if not answer_text.strip():
+ return answer_text
+
+ prompt_messages = [
+ {
+ "role": "user",
+ "content": _TRANSLATE_PROMPT.format(
+ target_language=target_name, text=answer_text
+ ),
+ }
+ ]
+
+ try:
+ filled = dict_based_prompt_to_langchain_prompt(prompt_messages)
+ translated = message_to_string(llm.invoke(filled))
+ except Exception:
+ logger.exception(
+ "Failed to translate answer to %s; falling back to English",
+ target_name,
+ )
+ return answer_text
+
+ translated = translated.strip()
+ if not translated:
+ logger.warning(
+ "Translation to %s came back empty; falling back to English",
+ target_name,
+ )
+ return answer_text
+ return translated
diff --git a/backend/danswer/chat/process_message.py b/backend/danswer/chat/process_message.py
index 64f6bab822d..4a11aa24ed5 100644
--- a/backend/danswer/chat/process_message.py
+++ b/backend/danswer/chat/process_message.py
@@ -14,6 +14,9 @@
from danswer.chat.models import LLMRelevanceFilterResponse
from danswer.chat.models import QADocsResponse
from danswer.chat.models import StreamingError
+from danswer.chat.multilingual_translation import detect_query_language
+from danswer.chat.multilingual_translation import language_name
+from danswer.chat.multilingual_translation import translate_answer_to_language
from danswer.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
from danswer.configs.chat_configs import DISABLE_LLM_CHOOSE_SEARCH
from danswer.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
@@ -92,11 +95,11 @@ def translate_citations(
for db_doc in db_docs:
if db_doc.document_id not in doc_id_to_saved_doc_id_map:
doc_id_to_saved_doc_id_map[db_doc.document_id] = db_doc.id
- #print(f'found doc id: {db_doc.id}')
+ # print(f'found doc id: {db_doc.id}')
citation_to_saved_doc_id_map: dict[int, int] = {}
for citation in citations_list:
- #print(f'citation id {citation.document_id} for doc num {citation.citation_num}')
+ # print(f'citation id {citation.document_id} for doc num {citation.citation_num}')
if citation.citation_num not in citation_to_saved_doc_id_map:
citation_to_saved_doc_id_map[
citation.citation_num
@@ -404,15 +407,25 @@ def stream_chat_message_objects(
if not final_msg.prompt:
raise RuntimeError("No Prompt found")
+ # Persona may be None for legacy flows; treat the flag as off in
+ # that case. When persona exists, thread its flag through so the
+ # answer-side prompt builders add the LANGUAGE_HINT.
+ persona_multilingual = (
+ persona.multilingual_query_expansion if persona is not None else False
+ )
prompt_config = (
PromptConfig.from_model(
final_msg.prompt,
prompt_override=(
new_msg_req.prompt_override or chat_session.prompt_override
),
+ multilingual_query_expansion=persona_multilingual,
)
if not persona
- else PromptConfig.from_model(persona.prompts[0])
+ else PromptConfig.from_model(
+ persona.prompts[0],
+ multilingual_query_expansion=persona_multilingual,
+ )
)
# find out what tools to use
@@ -539,6 +552,22 @@ def stream_chat_message_objects(
ai_message_files = None # any files to associate with the AI message e.g. dall-e generated images
dropped_indices = None
tool_result = None
+
+ # Multi-language post-processing pass (option C in the design):
+ # when the persona has multilingual_query_expansion=True and the
+ # user's question is in a non-English language, the LLM tends
+ # to answer in English regardless of the LANGUAGE_HINT
+ # directive. We compensate by buffering DanswerAnswerPiece
+ # tokens during the stream and emitting a single translated
+ # piece at the end. Other packet types (citations, tool
+ # responses, image generation, etc.) still flow in real time.
+ translate_target = None
+ if persona_multilingual:
+ detected = detect_query_language(message_text)
+ if language_name(detected) is not None:
+ translate_target = detected
+ buffered_answer_pieces: list[str] = []
+
for packet in answer.processed_streamed_output:
if isinstance(packet, ToolResponse):
if packet.id == SEARCH_RESPONSE_SUMMARY_ID:
@@ -594,8 +623,35 @@ def stream_chat_message_objects(
else:
if isinstance(packet, ToolCallFinalResult):
tool_result = packet
+ if (
+ translate_target is not None
+ and isinstance(packet, DanswerAnswerPiece)
+ and packet.answer_piece
+ ):
+ # Hold answer tokens back; we'll translate the full
+ # answer at the end of the stream.
+ buffered_answer_pieces.append(packet.answer_piece)
+ continue
yield cast(ChatPacket, packet)
+ # End of stream. If we buffered for translation, do the second
+ # LLM pass now and emit the translated answer as one piece.
+ # `answer.llm_answer` reads from the same processed stream, so
+ # it already contains the full English text — we use that as
+ # the source of truth (more reliable than reassembling from
+ # buffered pieces, which may have None entries from end-of-
+ # stream sentinels).
+ translated_answer_text: str | None = None
+ if translate_target is not None:
+ english_answer = answer.llm_answer
+ translated_answer_text = translate_answer_to_language(
+ answer_text=english_answer,
+ target_language_code=translate_target,
+ llm=llm,
+ )
+ yield DanswerAnswerPiece(answer_piece=translated_answer_text)
+ yield DanswerAnswerPiece(answer_piece=None)
+
except Exception as e:
logger.exception("Failed to process chat message")
@@ -627,14 +683,24 @@ def stream_chat_message_objects(
for tool in tool_list:
tool_name_to_tool_id[tool.name()] = tool_id
+ # If we translated, persist the user-facing translated text
+ # rather than the English intermediate. Citations are computed
+ # from the LLM's English output (where the [1]/[2] markers
+ # were emitted relative to retrieved docs); the translation
+ # prompt preserves those markers verbatim.
+ final_answer_text = (
+ translated_answer_text
+ if translated_answer_text is not None
+ else answer.llm_answer
+ )
gen_ai_response_message = partial_response(
- message=answer.llm_answer,
+ message=final_answer_text,
rephrased_query=(
qa_docs_response.rephrased_query if qa_docs_response else None
),
reference_docs=reference_db_search_docs,
files=ai_message_files,
- token_count=len(llm_tokenizer_encode_func(answer.llm_answer)),
+ token_count=len(llm_tokenizer_encode_func(final_answer_text)),
citations=db_citations,
error=None,
tool_calls=[
diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py
index 58a8f32a8e9..e2bc854d09e 100644
--- a/backend/danswer/db/models.py
+++ b/backend/danswer/db/models.py
@@ -1003,6 +1003,15 @@ class Persona(Base):
# Enables using LLM to extract time and source type filters
# Can also be admin disabled globally
llm_filter_extraction: Mapped[bool] = mapped_column(Boolean)
+ # When true, non-English queries on this persona are translated to
+ # English before retrieval and the LLM is instructed to answer in
+ # the user's original language. Off by default since most traffic
+ # is English and turning it on incurs an extra LLM call per query.
+ # Behaves as an override of the global MULTILINGUAL_QUERY_EXPANSION
+ # env var: persona flag wins; if false, falls back to env var.
+ multilingual_query_expansion: Mapped[bool] = mapped_column(
+ Boolean, nullable=False, default=False, server_default="false"
+ )
recency_bias: Mapped[RecencyBiasSetting] = mapped_column(
Enum(RecencyBiasSetting, native_enum=False)
)
diff --git a/backend/danswer/db/persona.py b/backend/danswer/db/persona.py
index 26292fc9264..313192f30e6 100644
--- a/backend/danswer/db/persona.py
+++ b/backend/danswer/db/persona.py
@@ -79,6 +79,7 @@ def create_update_persona(
llm_model_version_override=create_persona_request.llm_model_version_override,
starter_messages=create_persona_request.starter_messages,
is_public=create_persona_request.is_public,
+ multilingual_query_expansion=create_persona_request.multilingual_query_expansion,
db_session=db_session,
)
@@ -327,6 +328,7 @@ def upsert_persona(
tool_ids: list[int] | None = None,
persona_id: int | None = None,
default_persona: bool = False,
+ multilingual_query_expansion: bool = False,
commit: bool = True,
) -> Persona:
if persona_id is not None:
@@ -379,6 +381,7 @@ def upsert_persona(
persona.starter_messages = starter_messages
persona.deleted = False # Un-delete if previously deleted
persona.is_public = is_public
+ persona.multilingual_query_expansion = multilingual_query_expansion
# Do not delete any associations manually added unless
# a new updated list is provided
@@ -411,6 +414,7 @@ def upsert_persona(
llm_model_version_override=llm_model_version_override,
starter_messages=starter_messages,
tools=tools or [],
+ multilingual_query_expansion=multilingual_query_expansion,
)
db_session.add(persona)
diff --git a/backend/danswer/llm/answering/models.py b/backend/danswer/llm/answering/models.py
index a5248fac27a..8301c3a8420 100644
--- a/backend/danswer/llm/answering/models.py
+++ b/backend/danswer/llm/answering/models.py
@@ -121,10 +121,17 @@ class PromptConfig(BaseModel):
task_prompt: str
datetime_aware: bool
include_citations: bool
+ # When true, the answer-side prompts add the LANGUAGE_HINT directive
+ # so the LLM responds in the user's original language. Sourced from
+ # the persona's multilingual_query_expansion flag at construction.
+ multilingual_query_expansion: bool = False
@classmethod
def from_model(
- cls, model: "Prompt", prompt_override: PromptOverride | None = None
+ cls,
+ model: "Prompt",
+ prompt_override: PromptOverride | None = None,
+ multilingual_query_expansion: bool = False,
) -> "PromptConfig":
override_system_prompt = (
prompt_override.system_prompt if prompt_override else None
@@ -136,6 +143,7 @@ def from_model(
task_prompt=override_task_prompt or model.task_prompt,
datetime_aware=model.datetime_aware,
include_citations=model.include_citations,
+ multilingual_query_expansion=multilingual_query_expansion,
)
# needed so that this can be passed into lru_cache funcs
diff --git a/backend/danswer/llm/answering/prompts/citations_prompt.py b/backend/danswer/llm/answering/prompts/citations_prompt.py
index 69f727318d0..fbebe679f24 100644
--- a/backend/danswer/llm/answering/prompts/citations_prompt.py
+++ b/backend/danswer/llm/answering/prompts/citations_prompt.py
@@ -33,13 +33,16 @@
def get_prompt_tokens(prompt_config: PromptConfig) -> int:
# Note: currently custom prompts do not allow datetime aware, only default prompts
+ use_language_hint = prompt_config.multilingual_query_expansion or bool(
+ MULTILINGUAL_QUERY_EXPANSION
+ )
return (
check_number_of_tokens(prompt_config.system_prompt)
+ check_number_of_tokens(prompt_config.task_prompt)
+ CHAT_USER_PROMPT_WITH_CONTEXT_OVERHEAD_TOKEN_CNT
+ CITATION_STATEMENT_TOKEN_CNT
+ CITATION_REMINDER_TOKEN_CNT
- + (LANGUAGE_HINT_TOKEN_CNT if bool(MULTILINGUAL_QUERY_EXPANSION) else 0)
+ + (LANGUAGE_HINT_TOKEN_CNT if use_language_hint else 0)
+ (ADDITIONAL_INFO_TOKEN_CNT if prompt_config.datetime_aware else 0)
)
@@ -135,7 +138,11 @@ def build_citations_user_message(
all_doc_useful: bool,
history_message: str = "",
) -> HumanMessage:
- task_prompt_with_reminder = build_task_prompt_reminders(prompt_config)
+ task_prompt_with_reminder = build_task_prompt_reminders(
+ prompt_config,
+ use_language_hint=prompt_config.multilingual_query_expansion
+ or bool(MULTILINGUAL_QUERY_EXPANSION),
+ )
if context_docs:
context_docs_str = build_complete_context_str(context_docs)
diff --git a/backend/danswer/llm/answering/prompts/quotes_prompt.py b/backend/danswer/llm/answering/prompts/quotes_prompt.py
index b2b67c65b37..a39a4f8bf0e 100644
--- a/backend/danswer/llm/answering/prompts/quotes_prompt.py
+++ b/backend/danswer/llm/answering/prompts/quotes_prompt.py
@@ -74,12 +74,18 @@ def _build_strong_llm_quotes_prompt(
return HumanMessage(content=full_prompt)
+def _resolve_use_language_hint(prompt: PromptConfig, override: bool | None) -> bool:
+ if override is not None:
+ return override
+ return prompt.multilingual_query_expansion or bool(MULTILINGUAL_QUERY_EXPANSION)
+
+
def build_quotes_user_message(
question: str,
context_docs: list[LlmDoc] | list[InferenceChunk],
history_str: str,
prompt: PromptConfig,
- use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION),
+ use_language_hint: bool | None = None,
) -> HumanMessage:
prompt_builder = (
_build_weak_llm_quotes_prompt
@@ -92,7 +98,7 @@ def build_quotes_user_message(
context_docs=context_docs,
history_str=history_str,
prompt=prompt,
- use_language_hint=use_language_hint,
+ use_language_hint=_resolve_use_language_hint(prompt, use_language_hint),
)
@@ -101,7 +107,7 @@ def build_quotes_prompt(
context_docs: list[LlmDoc] | list[InferenceChunk],
history_str: str,
prompt: PromptConfig,
- use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION),
+ use_language_hint: bool | None = None,
) -> HumanMessage:
prompt_builder = (
_build_weak_llm_quotes_prompt
@@ -114,5 +120,5 @@ def build_quotes_prompt(
context_docs=context_docs,
history_str=history_str,
prompt=prompt,
- use_language_hint=use_language_hint,
+ use_language_hint=_resolve_use_language_hint(prompt, use_language_hint),
)
diff --git a/backend/danswer/one_shot_answer/answer_question.py b/backend/danswer/one_shot_answer/answer_question.py
index 3131406cab5..49da002d170 100644
--- a/backend/danswer/one_shot_answer/answer_question.py
+++ b/backend/danswer/one_shot_answer/answer_question.py
@@ -7,6 +7,9 @@
from danswer.chat.chat_utils import reorganize_citations
from danswer.chat.models import CitationInfo
from danswer.chat.models import DanswerAnswerPiece
+from danswer.chat.multilingual_translation import detect_query_language
+from danswer.chat.multilingual_translation import language_name
+from danswer.chat.multilingual_translation import translate_answer_to_language
from danswer.chat.models import DanswerContexts
from danswer.chat.models import DanswerQuotes
from danswer.chat.models import LLMRelevanceFilterResponse
@@ -158,7 +161,14 @@ def stream_answer_objects(
)
llm, fast_llm = get_llms_for_persona(persona=chat_session.persona)
- prompt_config = PromptConfig.from_model(prompt)
+ persona_multilingual = (
+ chat_session.persona.multilingual_query_expansion
+ if chat_session.persona is not None
+ else False
+ )
+ prompt_config = PromptConfig.from_model(
+ prompt, multilingual_query_expansion=persona_multilingual
+ )
document_pruning_config = DocumentPruningConfig(
max_chunks=int(
chat_session.persona.num_chunks
@@ -188,7 +198,9 @@ def stream_answer_objects(
answer = Answer(
question=query_msg.message,
answer_style_config=answer_config,
- prompt_config=PromptConfig.from_model(prompt),
+ prompt_config=PromptConfig.from_model(
+ prompt, multilingual_query_expansion=persona_multilingual
+ ),
llm=get_main_llm_from_tuple(get_llms_for_persona(persona=chat_session.persona)),
single_message_history=history_str,
tools=[search_tool],
@@ -203,6 +215,22 @@ def stream_answer_objects(
)
# won't be any ImageGenerationDisplay responses since that tool is never passed in
dropped_inds: list[int] = []
+
+ # Multi-language post-processing pass for the one-shot path
+ # (mirrors process_message.py). When the persona has the flag on
+ # and the user's question is non-English, buffer DanswerAnswerPiece
+ # tokens during the stream and emit a single translated piece at
+ # the end. CitationInfo packets still flow in real time so the
+ # slackbot's citation-required retry loop sees them. The translate
+ # prompt preserves [1]/[2] markers verbatim, so citations remain
+ # accurate after translation.
+ translate_target = None
+ if persona_multilingual:
+ detected = detect_query_language(query_msg.message)
+ if language_name(detected) is not None:
+ translate_target = detected
+ buffered_answer_pieces: list[str] = []
+
for packet in cast(AnswerObjectIterator, answer.processed_streamed_output):
# for one-shot flow, don't currently do anything with these
if isinstance(packet, ToolResponse):
@@ -252,15 +280,46 @@ def stream_answer_objects(
elif packet.id == SEARCH_DOC_CONTENT_ID:
yield packet.response
else:
+ if (
+ translate_target is not None
+ and isinstance(packet, DanswerAnswerPiece)
+ and packet.answer_piece
+ ):
+ # Hold answer tokens; we'll translate the full answer
+ # at the end of the stream and yield it as one piece.
+ buffered_answer_pieces.append(packet.answer_piece)
+ continue
yield packet
+ # End of stream. If we buffered for translation, do the second LLM
+ # pass now and emit the translated answer as one piece. Use
+ # answer.llm_answer as source-of-truth for the English text — the
+ # processed stream is already cached on the Answer object.
+ translated_answer_text: str | None = None
+ if translate_target is not None:
+ english_answer = answer.llm_answer
+ translated_answer_text = translate_answer_to_language(
+ answer_text=english_answer,
+ target_language_code=translate_target,
+ llm=llm,
+ )
+ yield DanswerAnswerPiece(answer_piece=translated_answer_text)
+ yield DanswerAnswerPiece(answer_piece=None)
+
+ # If we translated, persist the user-facing translated text.
+ final_answer_text = (
+ translated_answer_text
+ if translated_answer_text is not None
+ else answer.llm_answer
+ )
+
# Saving Gen AI answer and responding with message info
gen_ai_response_message = create_new_chat_message(
chat_session_id=chat_session.id,
parent_message=new_user_message,
prompt_id=query_req.prompt_id,
- message=answer.llm_answer,
- token_count=len(llm_tokenizer(answer.llm_answer)),
+ message=final_answer_text,
+ token_count=len(llm_tokenizer(final_answer_text)),
message_type=MessageType.ASSISTANT,
error=None,
reference_docs=reference_db_search_docs,
diff --git a/backend/danswer/search/pipeline.py b/backend/danswer/search/pipeline.py
index 98b1a87161d..7b2a31db7e2 100644
--- a/backend/danswer/search/pipeline.py
+++ b/backend/danswer/search/pipeline.py
@@ -273,12 +273,24 @@ def retrieved_chunks(self) -> list[InferenceChunk]:
if self._retrieved_chunks is not None:
return self._retrieved_chunks
+ # Resolve multilingual expansion: persona flag takes precedence,
+ # then global env var. Persona flag means "translate non-English
+ # queries to English for retrieval"; expressed as the string
+ # "English" so the existing expansion plumbing fans out the
+ # query to that language.
+ persona = self.search_request.persona
+ multilingual_expansion_str: str | None
+ if persona is not None and persona.multilingual_query_expansion:
+ multilingual_expansion_str = "English"
+ else:
+ multilingual_expansion_str = MULTILINGUAL_QUERY_EXPANSION
+
self._retrieved_chunks = retrieve_chunks(
query=self.search_query,
document_index=self.document_index,
db_session=self.db_session,
hybrid_alpha=self.search_request.hybrid_alpha,
- multilingual_expansion_str=MULTILINGUAL_QUERY_EXPANSION,
+ multilingual_expansion_str=multilingual_expansion_str,
retrieval_metrics_callback=self.retrieval_metrics_callback,
)
diff --git a/backend/danswer/secondary_llm_flows/chat_session_naming.py b/backend/danswer/secondary_llm_flows/chat_session_naming.py
index 9449eaded7a..9b02bbb5e27 100644
--- a/backend/danswer/secondary_llm_flows/chat_session_naming.py
+++ b/backend/danswer/secondary_llm_flows/chat_session_naming.py
@@ -15,15 +15,18 @@
def get_renamed_conversation_name(
full_history: list[ChatMessage],
llm: LLM,
+ use_language_hint: bool | None = None,
) -> str:
history_str = combine_message_chain(
messages=full_history, token_limit=GEN_AI_HISTORY_CUTOFF
)
+ # Persona flag wins; otherwise fall back to the global env var.
+ if use_language_hint is None:
+ use_language_hint = bool(MULTILINGUAL_QUERY_EXPANSION)
+
language_hint = (
- f"\n{LANGUAGE_CHAT_NAMING_HINT.strip()}"
- if bool(MULTILINGUAL_QUERY_EXPANSION)
- else ""
+ f"\n{LANGUAGE_CHAT_NAMING_HINT.strip()}" if use_language_hint else ""
)
prompt_msgs = [
diff --git a/backend/danswer/server/features/persona/models.py b/backend/danswer/server/features/persona/models.py
index aee39e72af0..0b7111dc64b 100644
--- a/backend/danswer/server/features/persona/models.py
+++ b/backend/danswer/server/features/persona/models.py
@@ -33,6 +33,9 @@ class CreatePersonaRequest(BaseModel):
# For Private Personas, who should be able to access these
users: list[UUID] | None = None
groups: list[int] | None = None
+ # Translate non-English queries to English for retrieval and
+ # answer in the user's original language. Off by default.
+ multilingual_query_expansion: bool = False
class PersonaSnapshot(BaseModel):
@@ -55,6 +58,7 @@ class PersonaSnapshot(BaseModel):
document_sets: list[DocumentSet]
users: list[MinimalUserSnapshot]
groups: list[int]
+ multilingual_query_expansion: bool
@classmethod
def from_model(
@@ -97,6 +101,7 @@ def from_model(
for user in persona.users
],
groups=[user_group.id for user_group in persona.groups],
+ multilingual_query_expansion=persona.multilingual_query_expansion,
)
diff --git a/backend/danswer/server/query_and_chat/chat_backend.py b/backend/danswer/server/query_and_chat/chat_backend.py
index 4e5a1bb2138..8236375c19a 100644
--- a/backend/danswer/server/query_and_chat/chat_backend.py
+++ b/backend/danswer/server/query_and_chat/chat_backend.py
@@ -234,7 +234,19 @@ def rename_chat_session(
# clear thing we can do
return RenameChatSessionResponse(new_name=full_history[0].message)
- new_name = get_renamed_conversation_name(full_history=full_history, llm=llm)
+ # Honor the persona's multilingual flag so the title is named in the
+ # user's language when the persona is configured for multi-language.
+ chat_session = get_chat_session_by_id(
+ chat_session_id=chat_session_id, user_id=user_id, db_session=db_session
+ )
+ use_language_hint = (
+ chat_session.persona.multilingual_query_expansion
+ if chat_session.persona is not None
+ else None
+ )
+ new_name = get_renamed_conversation_name(
+ full_history=full_history, llm=llm, use_language_hint=use_language_hint
+ )
update_chat_session(
db_session=db_session,
diff --git a/backend/scripts/test_multilanguage_e2e.py b/backend/scripts/test_multilanguage_e2e.py
new file mode 100644
index 00000000000..4fa2a8db2ea
--- /dev/null
+++ b/backend/scripts/test_multilanguage_e2e.py
@@ -0,0 +1,961 @@
+"""End-to-end integration test for the per-persona multi-language flag.
+
+Drives the real stack (Postgres + Vespa + your configured GenAI provider)
+and verifies that:
+
+ Phase 1 Seeded English docs land in Vespa (BM25 + embedding hits)
+ Phase 2 A Persona with `multilingual_query_expansion=True` retrieves
+ the seeded docs for non-English queries
+ Phase 3 The streamed answer is in the user's original language
+ (script-detection heuristic on Unicode ranges)
+ Phase 4 A control Persona with the flag OFF behaves differently
+ (logged, not asserted — flagged behavior is the contract)
+
+Designed for a developer running the local stack. Uses the existing
+ingestion + chat code paths directly (no HTTP) so it doubles as a
+fast smoke test of the wiring we just added.
+
+DESTRUCTIVE: writes (and on --clean removes) rows + Vespa documents
+prefixed with `__test_multilang__`. Run only against a dev DB.
+
+Usage:
+ cd backend
+ PYTHONPATH=$(pwd) python scripts/test_multilanguage_e2e.py [--yes] [--clean] [--keep-data]
+
+Exits 0 on success, non-zero on the first hard failure. Phase 4 is
+informational only and does not gate exit code.
+"""
+from __future__ import annotations
+
+import argparse
+import logging
+import sys
+import unicodedata
+from dataclasses import dataclass
+
+from sqlalchemy import select
+from sqlalchemy import text
+from sqlalchemy.orm import Session
+
+from danswer.chat.models import DanswerAnswerPiece
+from danswer.chat.models import QADocsResponse
+from danswer.chat.models import StreamingError
+from danswer.chat.process_message import stream_chat_message_objects
+from danswer.configs.constants import DocumentSource
+from danswer.connectors.models import Document
+from danswer.connectors.models import IndexAttemptMetadata
+from danswer.connectors.models import InputType
+from danswer.connectors.models import Section
+from danswer.db.chat import create_chat_session
+from danswer.db.chat import get_or_create_root_message
+from danswer.db.embedding_model import get_current_db_embedding_model
+from danswer.db.engine import get_session_context_manager
+from danswer.db.engine import get_sqlalchemy_engine
+from danswer.db.models import Connector
+from danswer.db.models import ConnectorCredentialPair
+from danswer.db.models import Credential
+from danswer.db.models import Document as DbDocument
+from danswer.db.models import DocumentByConnectorCredentialPair
+from danswer.db.models import Persona
+from danswer.db.models import Tool as ToolDBModel
+from danswer.db.persona import get_default_prompt
+from danswer.db.persona import upsert_persona
+from danswer.tools.search.search_tool import SearchTool
+from danswer.document_index.factory import get_default_document_index
+from danswer.indexing.embedder import DefaultIndexingEmbedder
+from danswer.indexing.indexing_pipeline import build_indexing_pipeline
+from danswer.one_shot_answer.answer_question import get_search_answer
+from danswer.one_shot_answer.models import DirectQARequest
+from danswer.one_shot_answer.models import ThreadMessage
+from danswer.search.enums import OptionalSearchSetting
+from danswer.search.enums import RecencyBiasSetting
+from danswer.search.models import RetrievalDetails
+from danswer.server.query_and_chat.models import CreateChatMessageRequest
+
+
+# Keep the global logger quiet so test output is readable.
+logging.getLogger().setLevel(logging.WARNING)
+
+
+SEED_PREFIX = "__test_multilang__"
+PERSONA_ML_NAME = f"{SEED_PREFIX}persona-multilingual"
+PERSONA_CONTROL_NAME = f"{SEED_PREFIX}persona-control"
+
+
+# ---------------------------------------------------------------------------
+# Seed corpus — three facts, each in a distinct doc, all in English.
+# Designed so retrieval recall is unambiguous: each query maps cleanly
+# to exactly one doc.
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class SeedDoc:
+ doc_id: str
+ title: str
+ body: str
+ # The entity the query asks about (kept stable across translations
+ # so we can verify the right doc was retrieved by checking the
+ # answer's content for this string).
+ expected_entity: str
+
+
+# NOTE on entity naming: we deliberately use a fictitious-but-unique
+# brand ("Zorblax") in seed docs so the queries do not collide with any
+# real entity in the host's existing corpus (Salesforce accounts, Slack
+# threads, etc.). When a generic name like "Acme Corp" is used, the
+# retriever's history rephrase + multilingual translation can produce
+# ambiguous fragments that match unrelated docs, and the answer LLM
+# hedges. The unique brand keeps the right doc dominant.
+SEED_CORPUS: list[SeedDoc] = [
+ SeedDoc(
+ doc_id=f"{SEED_PREFIX}doc-vacation-policy",
+ title="Zorblax Vacation Policy",
+ body=(
+ "All Zorblax full-time employees are entitled to 25 paid "
+ "vacation days per calendar year. Vacation days do not roll "
+ "over to the following year. Requests must be submitted at "
+ "least two weeks in advance through the Zorblax HR portal."
+ ),
+ expected_entity="25",
+ ),
+ SeedDoc(
+ doc_id=f"{SEED_PREFIX}doc-vpn-setup",
+ title="Zorblax VPN Setup Guide",
+ body=(
+ "To connect to the Zorblax VPN, install the GlobalProtect "
+ "client from the IT self-service portal. Use your corporate "
+ "email as the username and your single sign-on password. "
+ "The Zorblax gateway URL is vpn.zorblax.example.com."
+ ),
+ expected_entity="GlobalProtect",
+ ),
+ SeedDoc(
+ doc_id=f"{SEED_PREFIX}doc-printer-help",
+ title="Zorblax Office Printer Troubleshooting",
+ body=(
+ "If the Zorblax office printer is not responding, first "
+ "check the network cable and power. The Zorblax printer's "
+ "IP address is 10.20.30.40. To reset the print queue, open "
+ "the Printers control panel and select 'Cancel All "
+ "Documents'."
+ ),
+ expected_entity="10.20.30.40",
+ ),
+]
+
+
+# ---------------------------------------------------------------------------
+# Test queries — each language asks the same questions about the seeded
+# English docs. The translations are deliberately straightforward so the
+# LLM rephrase has a fair chance.
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class LanguageCase:
+ code: str # ISO-ish for display
+ label: str
+ queries: list[tuple[str, SeedDoc]] # (query_text, expected_doc)
+
+
+CASES: list[LanguageCase] = [
+ LanguageCase(
+ code="en",
+ label="English",
+ queries=[
+ ("How many vacation days do Zorblax employees get?", SEED_CORPUS[0]),
+ ("How do I connect to the Zorblax VPN?", SEED_CORPUS[1]),
+ ("What is the IP address of the Zorblax office printer?", SEED_CORPUS[2]),
+ ],
+ ),
+ LanguageCase(
+ code="ja",
+ label="Japanese",
+ queries=[
+ ("Zorblaxの従業員は何日の有給休暇が取れますか?", SEED_CORPUS[0]),
+ ("ZorblaxのVPNに接続するにはどうすればいいですか?", SEED_CORPUS[1]),
+ ("Zorblaxのオフィスプリンタの IPアドレスは何ですか?", SEED_CORPUS[2]),
+ ],
+ ),
+ LanguageCase(
+ code="zh",
+ label="Chinese",
+ queries=[
+ ("Zorblax 公司的员工每年有多少天带薪休假?", SEED_CORPUS[0]),
+ ("如何连接 Zorblax 公司的 VPN?", SEED_CORPUS[1]),
+ ("Zorblax 办公室打印机的 IP 地址是多少?", SEED_CORPUS[2]),
+ ],
+ ),
+ LanguageCase(
+ code="ko",
+ label="Korean",
+ queries=[
+ ("Zorblax 직원은 연간 며칠의 유급 휴가를 받을 수 있나요?", SEED_CORPUS[0]),
+ ("Zorblax의 VPN에 어떻게 접속하나요?", SEED_CORPUS[1]),
+ ("Zorblax 사무실 프린터의 IP 주소는 무엇인가요?", SEED_CORPUS[2]),
+ ],
+ ),
+]
+
+
+# ---------------------------------------------------------------------------
+# Output helpers
+# ---------------------------------------------------------------------------
+
+
+_PASS = "\033[32mPASS\033[0m"
+_FAIL = "\033[31mFAIL\033[0m"
+_INFO = "\033[33mINFO\033[0m"
+
+
+def section(title: str) -> None:
+ print(f"\n=== {title} ===")
+
+
+def ok(msg: str) -> None:
+ print(f" [{_PASS}] {msg}")
+
+
+def fail(msg: str) -> None:
+ print(f" [{_FAIL}] {msg}")
+
+
+def info(msg: str) -> None:
+ print(f" [{_INFO}] {msg}")
+
+
+# ---------------------------------------------------------------------------
+# Language detection — heuristic, by Unicode block dominance.
+# ---------------------------------------------------------------------------
+
+
+def detect_language(text: str) -> str:
+ """Returns one of: 'ja', 'zh', 'ko', 'en', 'mixed/other'.
+
+ Heuristic: count code points by script. If >= 5% Hiragana/Katakana,
+ call it Japanese (kanji alone could be Japanese or Chinese, so
+ presence of kana disambiguates). Else if >= 5% Hangul → Korean.
+ Else if >= 5% CJK ideographs → Chinese. Else if mostly ASCII letters
+ → English. Else 'mixed/other'.
+ """
+ if not text:
+ return "mixed/other"
+ counts = {"hiragana_katakana": 0, "hangul": 0, "cjk": 0, "ascii_letter": 0}
+ total_letters = 0
+ for ch in text:
+ cp = ord(ch)
+ if (0x3040 <= cp <= 0x309F) or (0x30A0 <= cp <= 0x30FF):
+ counts["hiragana_katakana"] += 1
+ total_letters += 1
+ elif 0xAC00 <= cp <= 0xD7AF:
+ counts["hangul"] += 1
+ total_letters += 1
+ elif (0x4E00 <= cp <= 0x9FFF) or (0x3400 <= cp <= 0x4DBF):
+ counts["cjk"] += 1
+ total_letters += 1
+ elif unicodedata.category(ch).startswith("L"):
+ # Latin-script letter (etc.)
+ counts["ascii_letter"] += 1
+ total_letters += 1
+ if total_letters == 0:
+ return "mixed/other"
+ threshold = max(1, total_letters // 20) # 5%
+ if counts["hiragana_katakana"] >= threshold:
+ return "ja"
+ if counts["hangul"] >= threshold:
+ return "ko"
+ if counts["cjk"] >= threshold:
+ return "zh"
+ if counts["ascii_letter"] >= total_letters * 0.7:
+ return "en"
+ return "mixed/other"
+
+
+# ---------------------------------------------------------------------------
+# Setup: connector / credential / cc-pair / docs
+# ---------------------------------------------------------------------------
+
+
+def confirm_destructive(skip: bool) -> None:
+ engine = get_sqlalchemy_engine()
+ url = engine.url
+ safe_url = f"{url.drivername}://{url.username}@{url.host}:{url.port}/{url.database}"
+ if skip:
+ print(f"[--yes] Proceeding against {safe_url}")
+ return
+ print(f"This script writes/deletes tagged ({SEED_PREFIX!r}) data in:")
+ print(f" {safe_url}")
+ print("It also indexes a small set of test docs into Vespa.")
+ answer = input("Type 'yes' to continue: ")
+ if answer.strip().lower() != "yes":
+ print("Aborted.")
+ sys.exit(1)
+
+
+def get_or_create_test_cc_pair(db: Session) -> ConnectorCredentialPair:
+ existing = db.execute(
+ select(ConnectorCredentialPair).where(
+ ConnectorCredentialPair.name == f"{SEED_PREFIX}ccp"
+ )
+ ).scalar_one_or_none()
+ if existing is not None:
+ return existing
+
+ connector = Connector(
+ name=f"{SEED_PREFIX}connector",
+ source=DocumentSource.INGESTION_API,
+ input_type=InputType.LOAD_STATE,
+ connector_specific_config={"_test_multilang": True},
+ refresh_freq=None,
+ disabled=False,
+ )
+ credential = Credential(admin_public=True, credential_json={})
+ db.add_all([connector, credential])
+ db.flush()
+ ccp = ConnectorCredentialPair(
+ connector_id=connector.id,
+ credential_id=credential.id,
+ name=f"{SEED_PREFIX}ccp",
+ is_public=True,
+ total_docs_indexed=0,
+ )
+ db.add(ccp)
+ db.commit()
+ return ccp
+
+
+def seed_vespa_docs(db: Session, ccp: ConnectorCredentialPair) -> int:
+ """Push the SEED_CORPUS through the real indexing pipeline so they
+ land in Vespa with embeddings + BM25. Returns the number indexed."""
+ embedding_model = get_current_db_embedding_model(db)
+ document_index = get_default_document_index(
+ primary_index_name=embedding_model.index_name,
+ secondary_index_name=None,
+ )
+
+ embedder = DefaultIndexingEmbedder(
+ model_name=embedding_model.model_name,
+ normalize=embedding_model.normalize,
+ query_prefix=embedding_model.query_prefix,
+ passage_prefix=embedding_model.passage_prefix,
+ )
+
+ pipeline = build_indexing_pipeline(
+ embedder=embedder,
+ document_index=document_index,
+ ignore_time_skip=True,
+ db_session=db,
+ )
+
+ docs = [
+ Document(
+ id=sd.doc_id,
+ sections=[Section(text=f"{sd.title}\n\n{sd.body}", link=None)],
+ source=DocumentSource.INGESTION_API,
+ semantic_identifier=sd.title,
+ metadata={"_test_multilang": "true"},
+ from_ingestion_api=True,
+ )
+ for sd in SEED_CORPUS
+ ]
+
+ new_doc, chunks = pipeline(
+ documents=docs,
+ index_attempt_metadata=IndexAttemptMetadata(
+ connector_id=ccp.connector_id,
+ credential_id=ccp.credential_id,
+ ),
+ )
+ return new_doc
+
+
+# ---------------------------------------------------------------------------
+# Persona helpers
+# ---------------------------------------------------------------------------
+
+
+def upsert_test_persona(db: Session, name: str, multilingual: bool) -> Persona:
+ default_prompt = get_default_prompt(db)
+ # Without the SearchTool attached, the chat flow has nothing to
+ # retrieve with — the LLM falls back to its training knowledge and
+ # never sees our seeded docs. Look it up by in_code_tool_id so the
+ # test isn't tied to a hardcoded id.
+ search_tool_row = db.execute(
+ select(ToolDBModel).where(ToolDBModel.in_code_tool_id == SearchTool.__name__)
+ ).scalar_one_or_none()
+ if search_tool_row is None:
+ raise RuntimeError(
+ "Built-in SearchTool not found in DB; ensure api-server has "
+ "started at least once so it can seed in-code tools."
+ )
+ persona = upsert_persona(
+ user=None,
+ name=name,
+ description=f"{SEED_PREFIX} persona for multilingual e2e test",
+ num_chunks=10,
+ llm_relevance_filter=False,
+ llm_filter_extraction=False,
+ recency_bias=RecencyBiasSetting.BASE_DECAY,
+ llm_model_provider_override=None,
+ llm_model_version_override=None,
+ starter_messages=None,
+ is_public=True,
+ prompt_ids=[default_prompt.id],
+ document_set_ids=[],
+ tool_ids=[search_tool_row.id],
+ multilingual_query_expansion=multilingual,
+ db_session=db,
+ )
+ return persona
+
+
+# ---------------------------------------------------------------------------
+# Drive a single chat-message call and collect what we need.
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class ChatProbeResult:
+ answer_text: str
+ retrieved_doc_ids: list[str]
+ retrieved_titles: list[str]
+ error: str | None
+
+
+def probe_chat(persona: Persona, query: str) -> ChatProbeResult:
+ """One-shot: create chat session, send message, drain the stream."""
+ with get_session_context_manager() as db_session:
+ chat_session = create_chat_session(
+ db_session=db_session,
+ description=f"{SEED_PREFIX}probe",
+ user_id=None,
+ persona_id=persona.id,
+ )
+ root = get_or_create_root_message(
+ chat_session_id=chat_session.id, db_session=db_session
+ )
+
+ req = CreateChatMessageRequest(
+ chat_session_id=chat_session.id,
+ parent_message_id=root.id,
+ message=query,
+ file_descriptors=[],
+ prompt_id=None,
+ search_doc_ids=None,
+ retrieval_options=RetrievalDetails(
+ run_search=OptionalSearchSetting.ALWAYS, real_time=True
+ ),
+ )
+
+ answer_pieces: list[str] = []
+ retrieved_doc_ids: list[str] = []
+ retrieved_titles: list[str] = []
+ error: str | None = None
+
+ try:
+ for obj in stream_chat_message_objects(
+ new_msg_req=req,
+ user=None,
+ db_session=db_session,
+ ):
+ if isinstance(obj, DanswerAnswerPiece):
+ if obj.answer_piece:
+ answer_pieces.append(obj.answer_piece)
+ elif isinstance(obj, QADocsResponse):
+ for d in obj.top_documents or []:
+ retrieved_doc_ids.append(d.document_id)
+ retrieved_titles.append(d.semantic_identifier or "")
+ elif isinstance(obj, StreamingError):
+ error = obj.error
+ except Exception as exc:
+ error = f"{type(exc).__name__}: {exc}"
+
+ return ChatProbeResult(
+ answer_text="".join(answer_pieces).strip(),
+ retrieved_doc_ids=retrieved_doc_ids,
+ retrieved_titles=retrieved_titles,
+ error=error,
+ )
+
+
+# ---------------------------------------------------------------------------
+# Cleanup
+# ---------------------------------------------------------------------------
+
+
+def cleanup(db: Session) -> None:
+ section("Cleanup")
+ # FK dependency graph (collected via pg_constraint):
+ # chat_message__search_doc -> chat_message
+ # tool_call -> chat_message
+ # chat_feedback -> chat_message
+ # document_retrieval_feedback -> chat_message
+ # chat_message -> chat_session
+ # chat_session -> persona
+ # We must drop dependents before parents. Use raw SQL — much
+ # cleaner than walking the ORM for a destructive teardown.
+ #
+ # Match scope: any chat session whose description carries our
+ # SEED_PREFIX *or* whose persona is one of our test personas.
+ # That covers prior aborted runs, runs that crashed mid-test, and
+ # the case where the chat UI was used to talk to our test persona.
+ db.execute(
+ text(
+ """
+ CREATE TEMP TABLE _ml_test_sessions ON COMMIT DROP AS
+ SELECT cs.id
+ FROM chat_session cs
+ WHERE cs.description LIKE :prefix
+ OR cs.persona_id IN (
+ SELECT id FROM persona WHERE name LIKE :prefix
+ );
+ """
+ ),
+ {"prefix": f"{SEED_PREFIX}%"},
+ )
+ db.execute(
+ text(
+ """
+ CREATE TEMP TABLE _ml_test_messages ON COMMIT DROP AS
+ SELECT id FROM chat_message
+ WHERE chat_session_id IN (SELECT id FROM _ml_test_sessions);
+ """
+ )
+ )
+
+ deleted_msdoc = db.execute(
+ text(
+ """
+ DELETE FROM chat_message__search_doc
+ WHERE chat_message_id IN (SELECT id FROM _ml_test_messages);
+ """
+ )
+ ).rowcount
+ deleted_toolcall = db.execute(
+ text(
+ """
+ DELETE FROM tool_call
+ WHERE message_id IN (SELECT id FROM _ml_test_messages);
+ """
+ )
+ ).rowcount
+ deleted_cfeedback = db.execute(
+ text(
+ """
+ DELETE FROM chat_feedback
+ WHERE chat_message_id IN (SELECT id FROM _ml_test_messages);
+ """
+ )
+ ).rowcount
+ deleted_drfeedback = db.execute(
+ text(
+ """
+ DELETE FROM document_retrieval_feedback
+ WHERE chat_message_id IN (SELECT id FROM _ml_test_messages);
+ """
+ )
+ ).rowcount
+ deleted_msgs = db.execute(
+ text(
+ """
+ DELETE FROM chat_message
+ WHERE id IN (SELECT id FROM _ml_test_messages);
+ """
+ )
+ ).rowcount
+ deleted_sessions = db.execute(
+ text(
+ """
+ DELETE FROM chat_session
+ WHERE id IN (SELECT id FROM _ml_test_sessions);
+ """
+ )
+ ).rowcount
+
+ info(
+ f"deleted {deleted_sessions} chat session(s), {deleted_msgs} "
+ f"message(s); cascaded: msg__search_doc={deleted_msdoc}, "
+ f"tool_call={deleted_toolcall}, chat_feedback={deleted_cfeedback}, "
+ f"document_retrieval_feedback={deleted_drfeedback}"
+ )
+
+ # Personas (now safe to drop — no chat session points at them).
+ personas = (
+ db.execute(select(Persona).where(Persona.name.like(f"{SEED_PREFIX}%")))
+ .scalars()
+ .all()
+ )
+ for p in personas:
+ db.delete(p)
+ info(f"deleted {len(personas)} test persona(s)")
+
+ # Documents (Postgres rows; Vespa cleanup is best-effort below)
+ db_docs = (
+ db.execute(select(DbDocument).where(DbDocument.id.like(f"{SEED_PREFIX}%")))
+ .scalars()
+ .all()
+ )
+ for d in db_docs:
+ db.execute(
+ DocumentByConnectorCredentialPair.__table__.delete().where(
+ DocumentByConnectorCredentialPair.id == d.id
+ )
+ )
+ db.delete(d)
+ info(f"deleted {len(db_docs)} document row(s)")
+
+ # cc-pair, connector, credential
+ ccp = db.execute(
+ select(ConnectorCredentialPair).where(
+ ConnectorCredentialPair.name == f"{SEED_PREFIX}ccp"
+ )
+ ).scalar_one_or_none()
+ if ccp is not None:
+ connector_id = ccp.connector_id
+ credential_id = ccp.credential_id
+ db.delete(ccp)
+ connector = db.get(Connector, connector_id)
+ if connector is not None:
+ db.delete(connector)
+ credential = db.get(Credential, credential_id)
+ if credential is not None:
+ db.delete(credential)
+ info("deleted test cc-pair / connector / credential")
+
+ db.commit()
+ info(
+ "Vespa: tagged test docs intentionally left in the index "
+ "(deletion goes through the connector framework). Re-running "
+ "this test reindexes them in place."
+ )
+
+
+# ---------------------------------------------------------------------------
+# Phases
+# ---------------------------------------------------------------------------
+
+
+def phase_setup(db: Session) -> tuple[ConnectorCredentialPair, Persona, Persona]:
+ section("Phase 1 — setup test fixtures")
+ ccp = get_or_create_test_cc_pair(db)
+ ok(f"cc-pair {ccp.id} ({ccp.name}) ready")
+ n_indexed = seed_vespa_docs(db, ccp)
+ if n_indexed != len(SEED_CORPUS):
+ # n_indexed is the count of *new* docs, so a re-run yields 0.
+ info(
+ f"indexing pipeline reported {n_indexed} new docs "
+ f"(re-runs reindex existing docs in place)"
+ )
+ ok(f"seeded {len(SEED_CORPUS)} English doc(s) into Vespa")
+
+ persona_ml = upsert_test_persona(db, PERSONA_ML_NAME, multilingual=True)
+ persona_ctrl = upsert_test_persona(db, PERSONA_CONTROL_NAME, multilingual=False)
+ ok(
+ f"persona [{persona_ml.name}] id={persona_ml.id}, "
+ f"multilingual_query_expansion={persona_ml.multilingual_query_expansion}"
+ )
+ ok(
+ f"persona [{persona_ctrl.name}] id={persona_ctrl.id}, "
+ f"multilingual_query_expansion={persona_ctrl.multilingual_query_expansion}"
+ )
+ return ccp, persona_ml, persona_ctrl
+
+
+def phase_english_baseline(persona_ml: Persona) -> bool:
+ section("Phase 2 — English baseline (sanity check)")
+ case = next(c for c in CASES if c.code == "en")
+ failures = 0
+ for query, expected_doc in case.queries:
+ result = probe_chat(persona_ml, query)
+ if result.error:
+ fail(f"[en] '{query[:60]}' streaming error: {result.error}")
+ failures += 1
+ continue
+ if expected_doc.doc_id in result.retrieved_doc_ids:
+ ok(f"[en] retrieval hit expected doc for: '{query[:60]}'")
+ else:
+ fail(f"[en] expected doc NOT in top docs for: '{query[:60]}'")
+ info(f" retrieved: {result.retrieved_titles[:3]}")
+ failures += 1
+ # Sanity: did the expected entity appear in the answer?
+ if expected_doc.expected_entity.lower() in result.answer_text.lower():
+ ok(
+ f"[en] answer contains expected entity "
+ f"'{expected_doc.expected_entity}'"
+ )
+ else:
+ info(
+ f"[en] answer does NOT contain '{expected_doc.expected_entity}' "
+ f"(LLM may have paraphrased; check manually). "
+ f"Answer head: {result.answer_text[:120]!r}"
+ )
+ return failures == 0
+
+
+def phase_non_english(persona_ml: Persona) -> bool:
+ """Hard contract for the persona flag: when on, non-English queries
+ must (a) translate-for-retrieval so the right English doc is found,
+ (b) the answer must contain the factual entity from that doc
+ (numeric / proper-noun entities survive translation), AND (c) the
+ final answer text is in the user's language. (c) is enforced by the
+ post-translation pass in process_message.py — the answering LLM
+ might still produce English internally, but the second pass
+ translates that to the user's language before we yield it."""
+ section("Phase 3 — non-English queries with multilingual flag ON")
+ failures = 0
+ lang_match = 0
+ lang_total = 0
+ for case in CASES:
+ if case.code == "en":
+ continue
+ for query, expected_doc in case.queries:
+ result = probe_chat(persona_ml, query)
+ if result.error:
+ fail(
+ f"[{case.code}] '{query[:60]}' streaming error: " f"{result.error}"
+ )
+ failures += 1
+ continue
+
+ # 3a — retrieval brought back the right English doc.
+ # This proves the persona flag wired translate-to-English
+ # into retrieval.
+ if expected_doc.doc_id in result.retrieved_doc_ids:
+ ok(f"[{case.code}] retrieval hit expected doc for: " f"'{query[:60]}'")
+ else:
+ fail(
+ f"[{case.code}] expected doc NOT in top docs for: "
+ f"'{query[:60]}'"
+ )
+ info(f" retrieved: {result.retrieved_titles[:3]}")
+ failures += 1
+
+ # 3b — answer contains the expected entity. Entities are
+ # numerals / proper nouns that survive translation, so the
+ # LLM should keep them verbatim regardless of output
+ # language. This is the strongest correctness signal.
+ if expected_doc.expected_entity.lower() in result.answer_text.lower():
+ ok(
+ f"[{case.code}] answer contains expected entity "
+ f"'{expected_doc.expected_entity}'"
+ )
+ else:
+ fail(
+ f"[{case.code}] answer missing entity "
+ f"'{expected_doc.expected_entity}'. Answer head: "
+ f"{result.answer_text[:120]!r}"
+ )
+ failures += 1
+
+ # 3c — answer is in the user's language. Now a hard
+ # assertion because the post-translation pass guarantees
+ # this regardless of the answering LLM's behavior. If the
+ # detected language doesn't match, either the post-pass
+ # was not invoked (wiring bug) or it returned the English
+ # fallback (translate LLM call failed).
+ detected = detect_language(result.answer_text)
+ lang_total += 1
+ if detected == case.code:
+ lang_match += 1
+ ok(f"[{case.code}] answer language: {detected}")
+ else:
+ fail(
+ f"[{case.code}] expected {case.code} answer, detected "
+ f"{detected}. Answer head: {result.answer_text[:200]!r}"
+ )
+ failures += 1
+ info(
+ f"language-match summary: {lang_match}/{lang_total} non-English "
+ f"answers came back in the user's language"
+ )
+ return failures == 0
+
+
+def probe_slack(persona: Persona, query: str) -> ChatProbeResult:
+ """Drive the one-shot answer path that the Slack listener uses.
+ `get_search_answer` runs the same Answer pipeline as chat but with
+ its own retry loop and citation enforcement."""
+ with get_session_context_manager() as db_session:
+ req = DirectQARequest(
+ messages=[ThreadMessage(message=query, sender=None)],
+ prompt_id=None,
+ persona_id=persona.id,
+ retrieval_options=RetrievalDetails(
+ run_search=OptionalSearchSetting.ALWAYS, real_time=True
+ ),
+ )
+ try:
+ response = get_search_answer(
+ query_req=req,
+ user=None,
+ max_document_tokens=None,
+ max_history_tokens=None,
+ db_session=db_session,
+ use_citations=True,
+ danswerbot_flow=True,
+ )
+ except Exception as exc:
+ return ChatProbeResult(
+ answer_text="",
+ retrieved_doc_ids=[],
+ retrieved_titles=[],
+ error=f"{type(exc).__name__}: {exc}",
+ )
+
+ retrieved_doc_ids: list[str] = []
+ retrieved_titles: list[str] = []
+ if response.docs and response.docs.top_documents:
+ for d in response.docs.top_documents:
+ retrieved_doc_ids.append(d.document_id)
+ retrieved_titles.append(d.semantic_identifier or "")
+ return ChatProbeResult(
+ answer_text=(response.answer or "").strip(),
+ retrieved_doc_ids=retrieved_doc_ids,
+ retrieved_titles=retrieved_titles,
+ error=response.error_msg,
+ )
+
+
+def phase_slack(persona_ml: Persona) -> bool:
+ """Smoke test for the Slack one-shot path. Same hard contract as
+ Phase 3 (retrieval hit + entity in answer + answer in user's
+ language), but driven through `get_search_answer` — the function
+ the slack listener calls."""
+ section("Phase 5 — Slack one-shot path with multilingual flag ON")
+ failures = 0
+ lang_match = 0
+ lang_total = 0
+ for case in CASES:
+ if case.code == "en":
+ continue
+ # One query per language is plenty for a smoke test (each
+ # query takes 2× LLM round-trips: answer + translate).
+ query, expected_doc = case.queries[0]
+ result = probe_slack(persona_ml, query)
+ if result.error:
+ fail(f"[slack {case.code}] '{query[:60]}' error: {result.error}")
+ failures += 1
+ continue
+
+ if expected_doc.doc_id in result.retrieved_doc_ids:
+ ok(f"[slack {case.code}] retrieval hit expected doc")
+ else:
+ fail(
+ f"[slack {case.code}] expected doc NOT in top docs. "
+ f"retrieved: {result.retrieved_titles[:3]}"
+ )
+ failures += 1
+
+ if expected_doc.expected_entity.lower() in result.answer_text.lower():
+ ok(
+ f"[slack {case.code}] answer contains entity "
+ f"'{expected_doc.expected_entity}'"
+ )
+ else:
+ fail(
+ f"[slack {case.code}] answer missing entity "
+ f"'{expected_doc.expected_entity}'. Answer head: "
+ f"{result.answer_text[:120]!r}"
+ )
+ failures += 1
+
+ detected = detect_language(result.answer_text)
+ lang_total += 1
+ if detected == case.code:
+ lang_match += 1
+ ok(f"[slack {case.code}] answer language: {detected}")
+ else:
+ fail(
+ f"[slack {case.code}] expected {case.code}, detected "
+ f"{detected}. Answer head: {result.answer_text[:200]!r}"
+ )
+ failures += 1
+ info(
+ f"slack-path language-match summary: {lang_match}/{lang_total} "
+ f"non-English answers came back in the user's language"
+ )
+ return failures == 0
+
+
+def phase_control(persona_ctrl: Persona) -> None:
+ section("Phase 4 — control: same queries with flag OFF (informational)")
+ for case in CASES:
+ if case.code == "en":
+ continue
+ # Just one query per language is enough to see the contrast.
+ query, expected_doc = case.queries[0]
+ result = probe_chat(persona_ctrl, query)
+ if result.error:
+ info(f"[{case.code}] streaming error: {result.error}")
+ continue
+ retrieved = expected_doc.doc_id in result.retrieved_doc_ids
+ detected = detect_language(result.answer_text)
+ info(
+ f"[{case.code}] flag-OFF persona | retrieval-hit={retrieved} | "
+ f"answer-lang={detected}"
+ )
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+def main() -> int:
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument(
+ "--yes",
+ action="store_true",
+ help="Skip the destructive-action confirmation prompt",
+ )
+ parser.add_argument(
+ "--clean", action="store_true", help="Remove tagged test data and exit"
+ )
+ parser.add_argument(
+ "--keep-data",
+ action="store_true",
+ help="Skip cleanup at the end of a successful run",
+ )
+ args = parser.parse_args()
+
+ confirm_destructive(args.yes)
+
+ if args.clean:
+ with get_session_context_manager() as db:
+ cleanup(db)
+ return 0
+
+ overall_ok = True
+ with get_session_context_manager() as db:
+ try:
+ ccp, persona_ml, persona_ctrl = phase_setup(db)
+ except Exception as exc:
+ fail(f"setup failed: {type(exc).__name__}: {exc}")
+ return 2
+
+ if not phase_english_baseline(persona_ml):
+ overall_ok = False
+
+ if not phase_non_english(persona_ml):
+ overall_ok = False
+
+ if not phase_slack(persona_ml):
+ overall_ok = False
+
+ phase_control(persona_ctrl)
+
+ if not args.keep_data:
+ with get_session_context_manager() as db:
+ cleanup(db)
+
+ print()
+ if overall_ok:
+ print(f"[{_PASS}] multi-language e2e: all hard assertions passed")
+ return 0
+ print(f"[{_FAIL}] multi-language e2e: see failures above")
+ return 1
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/web/src/app/admin/assistants/AssistantEditor.tsx b/web/src/app/admin/assistants/AssistantEditor.tsx
index c58cdcdadf9..4c4e7152bb8 100644
--- a/web/src/app/admin/assistants/AssistantEditor.tsx
+++ b/web/src/app/admin/assistants/AssistantEditor.tsx
@@ -178,6 +178,8 @@ export function AssistantEditor({
num_chunks: existingPersona?.num_chunks ?? null,
include_citations: existingPersona?.prompts[0]?.include_citations ?? true,
llm_relevance_filter: existingPersona?.llm_relevance_filter ?? false,
+ multilingual_query_expansion:
+ existingPersona?.multilingual_query_expansion ?? false,
llm_model_provider_override:
existingPersona?.llm_model_provider_override ?? null,
llm_model_version_override:
@@ -213,6 +215,7 @@ export function AssistantEditor({
num_chunks: Yup.number().nullable(),
include_citations: Yup.boolean().required(),
llm_relevance_filter: Yup.boolean().required(),
+ multilingual_query_expansion: Yup.boolean().required(),
llm_model_version_override: Yup.string().nullable(),
llm_model_provider_override: Yup.string().nullable(),
starter_messages: Yup.array().of(
@@ -580,6 +583,14 @@ export function AssistantEditor({
}
/>
+
+
);
-}
\ No newline at end of file
+}
diff --git a/web/src/app/admin/assistants/interfaces.ts b/web/src/app/admin/assistants/interfaces.ts
index 0a06ac4cc82..5c9869f78d7 100644
--- a/web/src/app/admin/assistants/interfaces.ts
+++ b/web/src/app/admin/assistants/interfaces.ts
@@ -38,4 +38,5 @@ export interface Persona {
default_persona: boolean;
users: MinimalUserSnapshot[];
groups: number[];
+ multilingual_query_expansion?: boolean;
}
diff --git a/web/src/app/admin/assistants/lib.ts b/web/src/app/admin/assistants/lib.ts
index 4d42789d810..4eadb98f243 100644
--- a/web/src/app/admin/assistants/lib.ts
+++ b/web/src/app/admin/assistants/lib.ts
@@ -16,6 +16,7 @@ interface PersonaCreationRequest {
users?: string[];
groups: number[];
tool_ids: number[]; // Added tool_ids to the interface
+ multilingual_query_expansion: boolean;
}
interface PersonaUpdateRequest {
@@ -36,6 +37,7 @@ interface PersonaUpdateRequest {
users?: string[];
groups: number[];
tool_ids: number[]; // Added tool_ids to the interface
+ multilingual_query_expansion: boolean;
}
function promptNameFromPersonaName(personaName: string) {
@@ -110,6 +112,7 @@ function buildPersonaAPIBody(
groups,
users,
tool_ids, // Added tool_ids to the destructuring
+ multilingual_query_expansion,
} = creationRequest;
return {
@@ -128,6 +131,7 @@ function buildPersonaAPIBody(
users,
groups,
tool_ids, // Added tool_ids to the return object
+ multilingual_query_expansion,
};
}
diff --git a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx
index 55497149ea0..032e47aad00 100644
--- a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx
+++ b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx
@@ -167,8 +167,21 @@ export const SlackBotCreationForm = ({
}),
curated_response_config: Yup.object().shape({
enable_curated_response_integration: Yup.boolean().required(),
- response_message: Yup.string().required(
- "Response message is required when curated response integration is enabled"
+ // Mirror jira_config: only require this when the integration
+ // is enabled. Without the .when() guard the field is required
+ // unconditionally, but the UI hides the input when the toggle
+ // is off — Formik silently rejects submit and no error is
+ // visible since the (errored) field isn't on screen.
+ response_message: Yup.string().when(
+ "enable_curated_response_integration",
+ {
+ is: true,
+ then: (schema) =>
+ schema.required(
+ "Response message is required when curated response integration is enabled"
+ ),
+ otherwise: (schema) => schema.notRequired(),
+ }
),
}),
jira_title_filter: Yup.array()
diff --git a/web/src/components/table/DragHandle.tsx b/web/src/components/table/DragHandle.tsx
index a288d58b46c..3bebed22822 100644
--- a/web/src/components/table/DragHandle.tsx
+++ b/web/src/components/table/DragHandle.tsx
@@ -1,13 +1,14 @@
import React from "react";
import { MdDragIndicator } from "react-icons/md";
-export const DragHandle = (props: any) => {
+export const DragHandle = ({ isDragging, ...rest }: any) => {
+ // `isDragging` is a logical prop from @dnd-kit/sortable; pull it
+ // out before spreading so React doesn't warn about an unknown DOM
+ // attribute on the div.
return (