diff --git a/backend/alembic/versions/a3f1d7c4e9b2_persona_multilingual_query_expansion.py b/backend/alembic/versions/a3f1d7c4e9b2_persona_multilingual_query_expansion.py new file mode 100644 index 00000000000..ab615c76e6b --- /dev/null +++ b/backend/alembic/versions/a3f1d7c4e9b2_persona_multilingual_query_expansion.py @@ -0,0 +1,32 @@ +"""persona multilingual_query_expansion flag + +Revision ID: a3f1d7c4e9b2 +Revises: c8a4e2f9d1b3 +Create Date: 2026-05-04 12:00:00.000000 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "a3f1d7c4e9b2" +down_revision = "c8a4e2f9d1b3" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "persona", + sa.Column( + "multilingual_query_expansion", + sa.Boolean(), + nullable=False, + server_default=sa.text("false"), + ), + ) + + +def downgrade() -> None: + op.drop_column("persona", "multilingual_query_expansion") diff --git a/backend/danswer/chat/multilingual_translation.py b/backend/danswer/chat/multilingual_translation.py new file mode 100644 index 00000000000..5976f6fcc89 --- /dev/null +++ b/backend/danswer/chat/multilingual_translation.py @@ -0,0 +1,151 @@ +"""Helpers for the per-persona multi-language post-processing pass. + +When a persona has `multilingual_query_expansion=True` and the user's +query is non-English, the answering LLM still produces English most of +the time (it tends to mirror the English context corpus regardless of +the LANGUAGE_HINT directive). We compensate by post-translating the +English answer back into the user's original language. + +Trade-off: in translate mode we buffer the streamed answer instead of +showing it token-by-token. The user sees a brief delay (one extra LLM +round-trip), but reliably gets a reply in their language. English +queries are unaffected — they keep streaming normally. +""" +from __future__ import annotations + +import unicodedata + +from danswer.llm.interfaces import LLM +from danswer.llm.utils import dict_based_prompt_to_langchain_prompt +from danswer.llm.utils import message_to_string +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +# Display name passed to the translation prompt. Keys are the language +# codes detect_query_language returns. Anything not in this map is +# treated as English (no translation needed). +_LANGUAGE_NAMES: dict[str, str] = { + "ja": "Japanese", + "zh": "Chinese (Simplified)", + "ko": "Korean", +} + + +def detect_query_language(text: str) -> str: + """Cheap script-based language detector covering the languages we + explicitly support translation for. Returns one of: 'ja', 'zh', + 'ko', or 'en' (English/other — no translation needed). + + Heuristic mirrors the script-presence test in + backend/scripts/test_multilanguage_e2e.py: a few percent of CJK / + Hangul / kana code points is enough to decide. We don't try to be + clever about mixed-language queries — the dominant non-English + script wins, and ties default to English. + """ + if not text: + return "en" + + counts = {"hiragana_katakana": 0, "hangul": 0, "cjk": 0, "ascii_letter": 0} + total_letters = 0 + for ch in text: + cp = ord(ch) + if (0x3040 <= cp <= 0x309F) or (0x30A0 <= cp <= 0x30FF): + counts["hiragana_katakana"] += 1 + total_letters += 1 + elif 0xAC00 <= cp <= 0xD7AF: + counts["hangul"] += 1 + total_letters += 1 + elif (0x4E00 <= cp <= 0x9FFF) or (0x3400 <= cp <= 0x4DBF): + counts["cjk"] += 1 + total_letters += 1 + elif unicodedata.category(ch).startswith("L"): + counts["ascii_letter"] += 1 + total_letters += 1 + + if total_letters == 0: + return "en" + threshold = max(1, total_letters // 20) # ~5% + if counts["hiragana_katakana"] >= threshold: + return "ja" + if counts["hangul"] >= threshold: + return "ko" + if counts["cjk"] >= threshold: + return "zh" + return "en" + + +def language_name(code: str) -> str | None: + return _LANGUAGE_NAMES.get(code) + + +# The prompt is intentionally directive about preserving citations and +# not adding commentary. Citations are bracketed numerals like [1] / +# [[1]](url); URLs and code blocks should also pass through unchanged. +_TRANSLATE_PROMPT = """\ +You are a precise translator. + +Translate the text below into {target_language}. + +CRITICAL RULES — follow exactly: +- Preserve every citation marker exactly as-is. Citation markers look + like [1], [2], [[1]](https://example.com), etc. Do not translate + them, do not change the brackets, do not change the numbers. +- Preserve every URL exactly. +- Preserve every code block (text between triple backticks) exactly. +- Preserve every inline code span (text between single backticks). +- Do not add any commentary, preface, or trailing notes — output only + the translated text. +- Keep numbers, proper nouns, and product names in their original + form unless the target language has a well-established equivalent. + +TEXT TO TRANSLATE: +{text} +""" + + +def translate_answer_to_language( + answer_text: str, + target_language_code: str, + llm: LLM, +) -> str: + """Translate `answer_text` into the language named by + `target_language_code` (a key of _LANGUAGE_NAMES). Returns the + English original on any failure — better to ship an English answer + than to drop the response entirely.""" + target_name = _LANGUAGE_NAMES.get(target_language_code) + if target_name is None: + # Caller should have skipped, but be defensive. + return answer_text + + if not answer_text.strip(): + return answer_text + + prompt_messages = [ + { + "role": "user", + "content": _TRANSLATE_PROMPT.format( + target_language=target_name, text=answer_text + ), + } + ] + + try: + filled = dict_based_prompt_to_langchain_prompt(prompt_messages) + translated = message_to_string(llm.invoke(filled)) + except Exception: + logger.exception( + "Failed to translate answer to %s; falling back to English", + target_name, + ) + return answer_text + + translated = translated.strip() + if not translated: + logger.warning( + "Translation to %s came back empty; falling back to English", + target_name, + ) + return answer_text + return translated diff --git a/backend/danswer/chat/process_message.py b/backend/danswer/chat/process_message.py index 64f6bab822d..4a11aa24ed5 100644 --- a/backend/danswer/chat/process_message.py +++ b/backend/danswer/chat/process_message.py @@ -14,6 +14,9 @@ from danswer.chat.models import LLMRelevanceFilterResponse from danswer.chat.models import QADocsResponse from danswer.chat.models import StreamingError +from danswer.chat.multilingual_translation import detect_query_language +from danswer.chat.multilingual_translation import language_name +from danswer.chat.multilingual_translation import translate_answer_to_language from danswer.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE from danswer.configs.chat_configs import DISABLE_LLM_CHOOSE_SEARCH from danswer.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT @@ -92,11 +95,11 @@ def translate_citations( for db_doc in db_docs: if db_doc.document_id not in doc_id_to_saved_doc_id_map: doc_id_to_saved_doc_id_map[db_doc.document_id] = db_doc.id - #print(f'found doc id: {db_doc.id}') + # print(f'found doc id: {db_doc.id}') citation_to_saved_doc_id_map: dict[int, int] = {} for citation in citations_list: - #print(f'citation id {citation.document_id} for doc num {citation.citation_num}') + # print(f'citation id {citation.document_id} for doc num {citation.citation_num}') if citation.citation_num not in citation_to_saved_doc_id_map: citation_to_saved_doc_id_map[ citation.citation_num @@ -404,15 +407,25 @@ def stream_chat_message_objects( if not final_msg.prompt: raise RuntimeError("No Prompt found") + # Persona may be None for legacy flows; treat the flag as off in + # that case. When persona exists, thread its flag through so the + # answer-side prompt builders add the LANGUAGE_HINT. + persona_multilingual = ( + persona.multilingual_query_expansion if persona is not None else False + ) prompt_config = ( PromptConfig.from_model( final_msg.prompt, prompt_override=( new_msg_req.prompt_override or chat_session.prompt_override ), + multilingual_query_expansion=persona_multilingual, ) if not persona - else PromptConfig.from_model(persona.prompts[0]) + else PromptConfig.from_model( + persona.prompts[0], + multilingual_query_expansion=persona_multilingual, + ) ) # find out what tools to use @@ -539,6 +552,22 @@ def stream_chat_message_objects( ai_message_files = None # any files to associate with the AI message e.g. dall-e generated images dropped_indices = None tool_result = None + + # Multi-language post-processing pass (option C in the design): + # when the persona has multilingual_query_expansion=True and the + # user's question is in a non-English language, the LLM tends + # to answer in English regardless of the LANGUAGE_HINT + # directive. We compensate by buffering DanswerAnswerPiece + # tokens during the stream and emitting a single translated + # piece at the end. Other packet types (citations, tool + # responses, image generation, etc.) still flow in real time. + translate_target = None + if persona_multilingual: + detected = detect_query_language(message_text) + if language_name(detected) is not None: + translate_target = detected + buffered_answer_pieces: list[str] = [] + for packet in answer.processed_streamed_output: if isinstance(packet, ToolResponse): if packet.id == SEARCH_RESPONSE_SUMMARY_ID: @@ -594,8 +623,35 @@ def stream_chat_message_objects( else: if isinstance(packet, ToolCallFinalResult): tool_result = packet + if ( + translate_target is not None + and isinstance(packet, DanswerAnswerPiece) + and packet.answer_piece + ): + # Hold answer tokens back; we'll translate the full + # answer at the end of the stream. + buffered_answer_pieces.append(packet.answer_piece) + continue yield cast(ChatPacket, packet) + # End of stream. If we buffered for translation, do the second + # LLM pass now and emit the translated answer as one piece. + # `answer.llm_answer` reads from the same processed stream, so + # it already contains the full English text — we use that as + # the source of truth (more reliable than reassembling from + # buffered pieces, which may have None entries from end-of- + # stream sentinels). + translated_answer_text: str | None = None + if translate_target is not None: + english_answer = answer.llm_answer + translated_answer_text = translate_answer_to_language( + answer_text=english_answer, + target_language_code=translate_target, + llm=llm, + ) + yield DanswerAnswerPiece(answer_piece=translated_answer_text) + yield DanswerAnswerPiece(answer_piece=None) + except Exception as e: logger.exception("Failed to process chat message") @@ -627,14 +683,24 @@ def stream_chat_message_objects( for tool in tool_list: tool_name_to_tool_id[tool.name()] = tool_id + # If we translated, persist the user-facing translated text + # rather than the English intermediate. Citations are computed + # from the LLM's English output (where the [1]/[2] markers + # were emitted relative to retrieved docs); the translation + # prompt preserves those markers verbatim. + final_answer_text = ( + translated_answer_text + if translated_answer_text is not None + else answer.llm_answer + ) gen_ai_response_message = partial_response( - message=answer.llm_answer, + message=final_answer_text, rephrased_query=( qa_docs_response.rephrased_query if qa_docs_response else None ), reference_docs=reference_db_search_docs, files=ai_message_files, - token_count=len(llm_tokenizer_encode_func(answer.llm_answer)), + token_count=len(llm_tokenizer_encode_func(final_answer_text)), citations=db_citations, error=None, tool_calls=[ diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py index 58a8f32a8e9..e2bc854d09e 100644 --- a/backend/danswer/db/models.py +++ b/backend/danswer/db/models.py @@ -1003,6 +1003,15 @@ class Persona(Base): # Enables using LLM to extract time and source type filters # Can also be admin disabled globally llm_filter_extraction: Mapped[bool] = mapped_column(Boolean) + # When true, non-English queries on this persona are translated to + # English before retrieval and the LLM is instructed to answer in + # the user's original language. Off by default since most traffic + # is English and turning it on incurs an extra LLM call per query. + # Behaves as an override of the global MULTILINGUAL_QUERY_EXPANSION + # env var: persona flag wins; if false, falls back to env var. + multilingual_query_expansion: Mapped[bool] = mapped_column( + Boolean, nullable=False, default=False, server_default="false" + ) recency_bias: Mapped[RecencyBiasSetting] = mapped_column( Enum(RecencyBiasSetting, native_enum=False) ) diff --git a/backend/danswer/db/persona.py b/backend/danswer/db/persona.py index 26292fc9264..313192f30e6 100644 --- a/backend/danswer/db/persona.py +++ b/backend/danswer/db/persona.py @@ -79,6 +79,7 @@ def create_update_persona( llm_model_version_override=create_persona_request.llm_model_version_override, starter_messages=create_persona_request.starter_messages, is_public=create_persona_request.is_public, + multilingual_query_expansion=create_persona_request.multilingual_query_expansion, db_session=db_session, ) @@ -327,6 +328,7 @@ def upsert_persona( tool_ids: list[int] | None = None, persona_id: int | None = None, default_persona: bool = False, + multilingual_query_expansion: bool = False, commit: bool = True, ) -> Persona: if persona_id is not None: @@ -379,6 +381,7 @@ def upsert_persona( persona.starter_messages = starter_messages persona.deleted = False # Un-delete if previously deleted persona.is_public = is_public + persona.multilingual_query_expansion = multilingual_query_expansion # Do not delete any associations manually added unless # a new updated list is provided @@ -411,6 +414,7 @@ def upsert_persona( llm_model_version_override=llm_model_version_override, starter_messages=starter_messages, tools=tools or [], + multilingual_query_expansion=multilingual_query_expansion, ) db_session.add(persona) diff --git a/backend/danswer/llm/answering/models.py b/backend/danswer/llm/answering/models.py index a5248fac27a..8301c3a8420 100644 --- a/backend/danswer/llm/answering/models.py +++ b/backend/danswer/llm/answering/models.py @@ -121,10 +121,17 @@ class PromptConfig(BaseModel): task_prompt: str datetime_aware: bool include_citations: bool + # When true, the answer-side prompts add the LANGUAGE_HINT directive + # so the LLM responds in the user's original language. Sourced from + # the persona's multilingual_query_expansion flag at construction. + multilingual_query_expansion: bool = False @classmethod def from_model( - cls, model: "Prompt", prompt_override: PromptOverride | None = None + cls, + model: "Prompt", + prompt_override: PromptOverride | None = None, + multilingual_query_expansion: bool = False, ) -> "PromptConfig": override_system_prompt = ( prompt_override.system_prompt if prompt_override else None @@ -136,6 +143,7 @@ def from_model( task_prompt=override_task_prompt or model.task_prompt, datetime_aware=model.datetime_aware, include_citations=model.include_citations, + multilingual_query_expansion=multilingual_query_expansion, ) # needed so that this can be passed into lru_cache funcs diff --git a/backend/danswer/llm/answering/prompts/citations_prompt.py b/backend/danswer/llm/answering/prompts/citations_prompt.py index 69f727318d0..fbebe679f24 100644 --- a/backend/danswer/llm/answering/prompts/citations_prompt.py +++ b/backend/danswer/llm/answering/prompts/citations_prompt.py @@ -33,13 +33,16 @@ def get_prompt_tokens(prompt_config: PromptConfig) -> int: # Note: currently custom prompts do not allow datetime aware, only default prompts + use_language_hint = prompt_config.multilingual_query_expansion or bool( + MULTILINGUAL_QUERY_EXPANSION + ) return ( check_number_of_tokens(prompt_config.system_prompt) + check_number_of_tokens(prompt_config.task_prompt) + CHAT_USER_PROMPT_WITH_CONTEXT_OVERHEAD_TOKEN_CNT + CITATION_STATEMENT_TOKEN_CNT + CITATION_REMINDER_TOKEN_CNT - + (LANGUAGE_HINT_TOKEN_CNT if bool(MULTILINGUAL_QUERY_EXPANSION) else 0) + + (LANGUAGE_HINT_TOKEN_CNT if use_language_hint else 0) + (ADDITIONAL_INFO_TOKEN_CNT if prompt_config.datetime_aware else 0) ) @@ -135,7 +138,11 @@ def build_citations_user_message( all_doc_useful: bool, history_message: str = "", ) -> HumanMessage: - task_prompt_with_reminder = build_task_prompt_reminders(prompt_config) + task_prompt_with_reminder = build_task_prompt_reminders( + prompt_config, + use_language_hint=prompt_config.multilingual_query_expansion + or bool(MULTILINGUAL_QUERY_EXPANSION), + ) if context_docs: context_docs_str = build_complete_context_str(context_docs) diff --git a/backend/danswer/llm/answering/prompts/quotes_prompt.py b/backend/danswer/llm/answering/prompts/quotes_prompt.py index b2b67c65b37..a39a4f8bf0e 100644 --- a/backend/danswer/llm/answering/prompts/quotes_prompt.py +++ b/backend/danswer/llm/answering/prompts/quotes_prompt.py @@ -74,12 +74,18 @@ def _build_strong_llm_quotes_prompt( return HumanMessage(content=full_prompt) +def _resolve_use_language_hint(prompt: PromptConfig, override: bool | None) -> bool: + if override is not None: + return override + return prompt.multilingual_query_expansion or bool(MULTILINGUAL_QUERY_EXPANSION) + + def build_quotes_user_message( question: str, context_docs: list[LlmDoc] | list[InferenceChunk], history_str: str, prompt: PromptConfig, - use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION), + use_language_hint: bool | None = None, ) -> HumanMessage: prompt_builder = ( _build_weak_llm_quotes_prompt @@ -92,7 +98,7 @@ def build_quotes_user_message( context_docs=context_docs, history_str=history_str, prompt=prompt, - use_language_hint=use_language_hint, + use_language_hint=_resolve_use_language_hint(prompt, use_language_hint), ) @@ -101,7 +107,7 @@ def build_quotes_prompt( context_docs: list[LlmDoc] | list[InferenceChunk], history_str: str, prompt: PromptConfig, - use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION), + use_language_hint: bool | None = None, ) -> HumanMessage: prompt_builder = ( _build_weak_llm_quotes_prompt @@ -114,5 +120,5 @@ def build_quotes_prompt( context_docs=context_docs, history_str=history_str, prompt=prompt, - use_language_hint=use_language_hint, + use_language_hint=_resolve_use_language_hint(prompt, use_language_hint), ) diff --git a/backend/danswer/one_shot_answer/answer_question.py b/backend/danswer/one_shot_answer/answer_question.py index 3131406cab5..49da002d170 100644 --- a/backend/danswer/one_shot_answer/answer_question.py +++ b/backend/danswer/one_shot_answer/answer_question.py @@ -7,6 +7,9 @@ from danswer.chat.chat_utils import reorganize_citations from danswer.chat.models import CitationInfo from danswer.chat.models import DanswerAnswerPiece +from danswer.chat.multilingual_translation import detect_query_language +from danswer.chat.multilingual_translation import language_name +from danswer.chat.multilingual_translation import translate_answer_to_language from danswer.chat.models import DanswerContexts from danswer.chat.models import DanswerQuotes from danswer.chat.models import LLMRelevanceFilterResponse @@ -158,7 +161,14 @@ def stream_answer_objects( ) llm, fast_llm = get_llms_for_persona(persona=chat_session.persona) - prompt_config = PromptConfig.from_model(prompt) + persona_multilingual = ( + chat_session.persona.multilingual_query_expansion + if chat_session.persona is not None + else False + ) + prompt_config = PromptConfig.from_model( + prompt, multilingual_query_expansion=persona_multilingual + ) document_pruning_config = DocumentPruningConfig( max_chunks=int( chat_session.persona.num_chunks @@ -188,7 +198,9 @@ def stream_answer_objects( answer = Answer( question=query_msg.message, answer_style_config=answer_config, - prompt_config=PromptConfig.from_model(prompt), + prompt_config=PromptConfig.from_model( + prompt, multilingual_query_expansion=persona_multilingual + ), llm=get_main_llm_from_tuple(get_llms_for_persona(persona=chat_session.persona)), single_message_history=history_str, tools=[search_tool], @@ -203,6 +215,22 @@ def stream_answer_objects( ) # won't be any ImageGenerationDisplay responses since that tool is never passed in dropped_inds: list[int] = [] + + # Multi-language post-processing pass for the one-shot path + # (mirrors process_message.py). When the persona has the flag on + # and the user's question is non-English, buffer DanswerAnswerPiece + # tokens during the stream and emit a single translated piece at + # the end. CitationInfo packets still flow in real time so the + # slackbot's citation-required retry loop sees them. The translate + # prompt preserves [1]/[2] markers verbatim, so citations remain + # accurate after translation. + translate_target = None + if persona_multilingual: + detected = detect_query_language(query_msg.message) + if language_name(detected) is not None: + translate_target = detected + buffered_answer_pieces: list[str] = [] + for packet in cast(AnswerObjectIterator, answer.processed_streamed_output): # for one-shot flow, don't currently do anything with these if isinstance(packet, ToolResponse): @@ -252,15 +280,46 @@ def stream_answer_objects( elif packet.id == SEARCH_DOC_CONTENT_ID: yield packet.response else: + if ( + translate_target is not None + and isinstance(packet, DanswerAnswerPiece) + and packet.answer_piece + ): + # Hold answer tokens; we'll translate the full answer + # at the end of the stream and yield it as one piece. + buffered_answer_pieces.append(packet.answer_piece) + continue yield packet + # End of stream. If we buffered for translation, do the second LLM + # pass now and emit the translated answer as one piece. Use + # answer.llm_answer as source-of-truth for the English text — the + # processed stream is already cached on the Answer object. + translated_answer_text: str | None = None + if translate_target is not None: + english_answer = answer.llm_answer + translated_answer_text = translate_answer_to_language( + answer_text=english_answer, + target_language_code=translate_target, + llm=llm, + ) + yield DanswerAnswerPiece(answer_piece=translated_answer_text) + yield DanswerAnswerPiece(answer_piece=None) + + # If we translated, persist the user-facing translated text. + final_answer_text = ( + translated_answer_text + if translated_answer_text is not None + else answer.llm_answer + ) + # Saving Gen AI answer and responding with message info gen_ai_response_message = create_new_chat_message( chat_session_id=chat_session.id, parent_message=new_user_message, prompt_id=query_req.prompt_id, - message=answer.llm_answer, - token_count=len(llm_tokenizer(answer.llm_answer)), + message=final_answer_text, + token_count=len(llm_tokenizer(final_answer_text)), message_type=MessageType.ASSISTANT, error=None, reference_docs=reference_db_search_docs, diff --git a/backend/danswer/search/pipeline.py b/backend/danswer/search/pipeline.py index 98b1a87161d..7b2a31db7e2 100644 --- a/backend/danswer/search/pipeline.py +++ b/backend/danswer/search/pipeline.py @@ -273,12 +273,24 @@ def retrieved_chunks(self) -> list[InferenceChunk]: if self._retrieved_chunks is not None: return self._retrieved_chunks + # Resolve multilingual expansion: persona flag takes precedence, + # then global env var. Persona flag means "translate non-English + # queries to English for retrieval"; expressed as the string + # "English" so the existing expansion plumbing fans out the + # query to that language. + persona = self.search_request.persona + multilingual_expansion_str: str | None + if persona is not None and persona.multilingual_query_expansion: + multilingual_expansion_str = "English" + else: + multilingual_expansion_str = MULTILINGUAL_QUERY_EXPANSION + self._retrieved_chunks = retrieve_chunks( query=self.search_query, document_index=self.document_index, db_session=self.db_session, hybrid_alpha=self.search_request.hybrid_alpha, - multilingual_expansion_str=MULTILINGUAL_QUERY_EXPANSION, + multilingual_expansion_str=multilingual_expansion_str, retrieval_metrics_callback=self.retrieval_metrics_callback, ) diff --git a/backend/danswer/secondary_llm_flows/chat_session_naming.py b/backend/danswer/secondary_llm_flows/chat_session_naming.py index 9449eaded7a..9b02bbb5e27 100644 --- a/backend/danswer/secondary_llm_flows/chat_session_naming.py +++ b/backend/danswer/secondary_llm_flows/chat_session_naming.py @@ -15,15 +15,18 @@ def get_renamed_conversation_name( full_history: list[ChatMessage], llm: LLM, + use_language_hint: bool | None = None, ) -> str: history_str = combine_message_chain( messages=full_history, token_limit=GEN_AI_HISTORY_CUTOFF ) + # Persona flag wins; otherwise fall back to the global env var. + if use_language_hint is None: + use_language_hint = bool(MULTILINGUAL_QUERY_EXPANSION) + language_hint = ( - f"\n{LANGUAGE_CHAT_NAMING_HINT.strip()}" - if bool(MULTILINGUAL_QUERY_EXPANSION) - else "" + f"\n{LANGUAGE_CHAT_NAMING_HINT.strip()}" if use_language_hint else "" ) prompt_msgs = [ diff --git a/backend/danswer/server/features/persona/models.py b/backend/danswer/server/features/persona/models.py index aee39e72af0..0b7111dc64b 100644 --- a/backend/danswer/server/features/persona/models.py +++ b/backend/danswer/server/features/persona/models.py @@ -33,6 +33,9 @@ class CreatePersonaRequest(BaseModel): # For Private Personas, who should be able to access these users: list[UUID] | None = None groups: list[int] | None = None + # Translate non-English queries to English for retrieval and + # answer in the user's original language. Off by default. + multilingual_query_expansion: bool = False class PersonaSnapshot(BaseModel): @@ -55,6 +58,7 @@ class PersonaSnapshot(BaseModel): document_sets: list[DocumentSet] users: list[MinimalUserSnapshot] groups: list[int] + multilingual_query_expansion: bool @classmethod def from_model( @@ -97,6 +101,7 @@ def from_model( for user in persona.users ], groups=[user_group.id for user_group in persona.groups], + multilingual_query_expansion=persona.multilingual_query_expansion, ) diff --git a/backend/danswer/server/query_and_chat/chat_backend.py b/backend/danswer/server/query_and_chat/chat_backend.py index 4e5a1bb2138..8236375c19a 100644 --- a/backend/danswer/server/query_and_chat/chat_backend.py +++ b/backend/danswer/server/query_and_chat/chat_backend.py @@ -234,7 +234,19 @@ def rename_chat_session( # clear thing we can do return RenameChatSessionResponse(new_name=full_history[0].message) - new_name = get_renamed_conversation_name(full_history=full_history, llm=llm) + # Honor the persona's multilingual flag so the title is named in the + # user's language when the persona is configured for multi-language. + chat_session = get_chat_session_by_id( + chat_session_id=chat_session_id, user_id=user_id, db_session=db_session + ) + use_language_hint = ( + chat_session.persona.multilingual_query_expansion + if chat_session.persona is not None + else None + ) + new_name = get_renamed_conversation_name( + full_history=full_history, llm=llm, use_language_hint=use_language_hint + ) update_chat_session( db_session=db_session, diff --git a/backend/scripts/test_multilanguage_e2e.py b/backend/scripts/test_multilanguage_e2e.py new file mode 100644 index 00000000000..4fa2a8db2ea --- /dev/null +++ b/backend/scripts/test_multilanguage_e2e.py @@ -0,0 +1,961 @@ +"""End-to-end integration test for the per-persona multi-language flag. + +Drives the real stack (Postgres + Vespa + your configured GenAI provider) +and verifies that: + + Phase 1 Seeded English docs land in Vespa (BM25 + embedding hits) + Phase 2 A Persona with `multilingual_query_expansion=True` retrieves + the seeded docs for non-English queries + Phase 3 The streamed answer is in the user's original language + (script-detection heuristic on Unicode ranges) + Phase 4 A control Persona with the flag OFF behaves differently + (logged, not asserted — flagged behavior is the contract) + +Designed for a developer running the local stack. Uses the existing +ingestion + chat code paths directly (no HTTP) so it doubles as a +fast smoke test of the wiring we just added. + +DESTRUCTIVE: writes (and on --clean removes) rows + Vespa documents +prefixed with `__test_multilang__`. Run only against a dev DB. + +Usage: + cd backend + PYTHONPATH=$(pwd) python scripts/test_multilanguage_e2e.py [--yes] [--clean] [--keep-data] + +Exits 0 on success, non-zero on the first hard failure. Phase 4 is +informational only and does not gate exit code. +""" +from __future__ import annotations + +import argparse +import logging +import sys +import unicodedata +from dataclasses import dataclass + +from sqlalchemy import select +from sqlalchemy import text +from sqlalchemy.orm import Session + +from danswer.chat.models import DanswerAnswerPiece +from danswer.chat.models import QADocsResponse +from danswer.chat.models import StreamingError +from danswer.chat.process_message import stream_chat_message_objects +from danswer.configs.constants import DocumentSource +from danswer.connectors.models import Document +from danswer.connectors.models import IndexAttemptMetadata +from danswer.connectors.models import InputType +from danswer.connectors.models import Section +from danswer.db.chat import create_chat_session +from danswer.db.chat import get_or_create_root_message +from danswer.db.embedding_model import get_current_db_embedding_model +from danswer.db.engine import get_session_context_manager +from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.models import Connector +from danswer.db.models import ConnectorCredentialPair +from danswer.db.models import Credential +from danswer.db.models import Document as DbDocument +from danswer.db.models import DocumentByConnectorCredentialPair +from danswer.db.models import Persona +from danswer.db.models import Tool as ToolDBModel +from danswer.db.persona import get_default_prompt +from danswer.db.persona import upsert_persona +from danswer.tools.search.search_tool import SearchTool +from danswer.document_index.factory import get_default_document_index +from danswer.indexing.embedder import DefaultIndexingEmbedder +from danswer.indexing.indexing_pipeline import build_indexing_pipeline +from danswer.one_shot_answer.answer_question import get_search_answer +from danswer.one_shot_answer.models import DirectQARequest +from danswer.one_shot_answer.models import ThreadMessage +from danswer.search.enums import OptionalSearchSetting +from danswer.search.enums import RecencyBiasSetting +from danswer.search.models import RetrievalDetails +from danswer.server.query_and_chat.models import CreateChatMessageRequest + + +# Keep the global logger quiet so test output is readable. +logging.getLogger().setLevel(logging.WARNING) + + +SEED_PREFIX = "__test_multilang__" +PERSONA_ML_NAME = f"{SEED_PREFIX}persona-multilingual" +PERSONA_CONTROL_NAME = f"{SEED_PREFIX}persona-control" + + +# --------------------------------------------------------------------------- +# Seed corpus — three facts, each in a distinct doc, all in English. +# Designed so retrieval recall is unambiguous: each query maps cleanly +# to exactly one doc. +# --------------------------------------------------------------------------- + + +@dataclass +class SeedDoc: + doc_id: str + title: str + body: str + # The entity the query asks about (kept stable across translations + # so we can verify the right doc was retrieved by checking the + # answer's content for this string). + expected_entity: str + + +# NOTE on entity naming: we deliberately use a fictitious-but-unique +# brand ("Zorblax") in seed docs so the queries do not collide with any +# real entity in the host's existing corpus (Salesforce accounts, Slack +# threads, etc.). When a generic name like "Acme Corp" is used, the +# retriever's history rephrase + multilingual translation can produce +# ambiguous fragments that match unrelated docs, and the answer LLM +# hedges. The unique brand keeps the right doc dominant. +SEED_CORPUS: list[SeedDoc] = [ + SeedDoc( + doc_id=f"{SEED_PREFIX}doc-vacation-policy", + title="Zorblax Vacation Policy", + body=( + "All Zorblax full-time employees are entitled to 25 paid " + "vacation days per calendar year. Vacation days do not roll " + "over to the following year. Requests must be submitted at " + "least two weeks in advance through the Zorblax HR portal." + ), + expected_entity="25", + ), + SeedDoc( + doc_id=f"{SEED_PREFIX}doc-vpn-setup", + title="Zorblax VPN Setup Guide", + body=( + "To connect to the Zorblax VPN, install the GlobalProtect " + "client from the IT self-service portal. Use your corporate " + "email as the username and your single sign-on password. " + "The Zorblax gateway URL is vpn.zorblax.example.com." + ), + expected_entity="GlobalProtect", + ), + SeedDoc( + doc_id=f"{SEED_PREFIX}doc-printer-help", + title="Zorblax Office Printer Troubleshooting", + body=( + "If the Zorblax office printer is not responding, first " + "check the network cable and power. The Zorblax printer's " + "IP address is 10.20.30.40. To reset the print queue, open " + "the Printers control panel and select 'Cancel All " + "Documents'." + ), + expected_entity="10.20.30.40", + ), +] + + +# --------------------------------------------------------------------------- +# Test queries — each language asks the same questions about the seeded +# English docs. The translations are deliberately straightforward so the +# LLM rephrase has a fair chance. +# --------------------------------------------------------------------------- + + +@dataclass +class LanguageCase: + code: str # ISO-ish for display + label: str + queries: list[tuple[str, SeedDoc]] # (query_text, expected_doc) + + +CASES: list[LanguageCase] = [ + LanguageCase( + code="en", + label="English", + queries=[ + ("How many vacation days do Zorblax employees get?", SEED_CORPUS[0]), + ("How do I connect to the Zorblax VPN?", SEED_CORPUS[1]), + ("What is the IP address of the Zorblax office printer?", SEED_CORPUS[2]), + ], + ), + LanguageCase( + code="ja", + label="Japanese", + queries=[ + ("Zorblaxの従業員は何日の有給休暇が取れますか?", SEED_CORPUS[0]), + ("ZorblaxのVPNに接続するにはどうすればいいですか?", SEED_CORPUS[1]), + ("Zorblaxのオフィスプリンタの IPアドレスは何ですか?", SEED_CORPUS[2]), + ], + ), + LanguageCase( + code="zh", + label="Chinese", + queries=[ + ("Zorblax 公司的员工每年有多少天带薪休假?", SEED_CORPUS[0]), + ("如何连接 Zorblax 公司的 VPN?", SEED_CORPUS[1]), + ("Zorblax 办公室打印机的 IP 地址是多少?", SEED_CORPUS[2]), + ], + ), + LanguageCase( + code="ko", + label="Korean", + queries=[ + ("Zorblax 직원은 연간 며칠의 유급 휴가를 받을 수 있나요?", SEED_CORPUS[0]), + ("Zorblax의 VPN에 어떻게 접속하나요?", SEED_CORPUS[1]), + ("Zorblax 사무실 프린터의 IP 주소는 무엇인가요?", SEED_CORPUS[2]), + ], + ), +] + + +# --------------------------------------------------------------------------- +# Output helpers +# --------------------------------------------------------------------------- + + +_PASS = "\033[32mPASS\033[0m" +_FAIL = "\033[31mFAIL\033[0m" +_INFO = "\033[33mINFO\033[0m" + + +def section(title: str) -> None: + print(f"\n=== {title} ===") + + +def ok(msg: str) -> None: + print(f" [{_PASS}] {msg}") + + +def fail(msg: str) -> None: + print(f" [{_FAIL}] {msg}") + + +def info(msg: str) -> None: + print(f" [{_INFO}] {msg}") + + +# --------------------------------------------------------------------------- +# Language detection — heuristic, by Unicode block dominance. +# --------------------------------------------------------------------------- + + +def detect_language(text: str) -> str: + """Returns one of: 'ja', 'zh', 'ko', 'en', 'mixed/other'. + + Heuristic: count code points by script. If >= 5% Hiragana/Katakana, + call it Japanese (kanji alone could be Japanese or Chinese, so + presence of kana disambiguates). Else if >= 5% Hangul → Korean. + Else if >= 5% CJK ideographs → Chinese. Else if mostly ASCII letters + → English. Else 'mixed/other'. + """ + if not text: + return "mixed/other" + counts = {"hiragana_katakana": 0, "hangul": 0, "cjk": 0, "ascii_letter": 0} + total_letters = 0 + for ch in text: + cp = ord(ch) + if (0x3040 <= cp <= 0x309F) or (0x30A0 <= cp <= 0x30FF): + counts["hiragana_katakana"] += 1 + total_letters += 1 + elif 0xAC00 <= cp <= 0xD7AF: + counts["hangul"] += 1 + total_letters += 1 + elif (0x4E00 <= cp <= 0x9FFF) or (0x3400 <= cp <= 0x4DBF): + counts["cjk"] += 1 + total_letters += 1 + elif unicodedata.category(ch).startswith("L"): + # Latin-script letter (etc.) + counts["ascii_letter"] += 1 + total_letters += 1 + if total_letters == 0: + return "mixed/other" + threshold = max(1, total_letters // 20) # 5% + if counts["hiragana_katakana"] >= threshold: + return "ja" + if counts["hangul"] >= threshold: + return "ko" + if counts["cjk"] >= threshold: + return "zh" + if counts["ascii_letter"] >= total_letters * 0.7: + return "en" + return "mixed/other" + + +# --------------------------------------------------------------------------- +# Setup: connector / credential / cc-pair / docs +# --------------------------------------------------------------------------- + + +def confirm_destructive(skip: bool) -> None: + engine = get_sqlalchemy_engine() + url = engine.url + safe_url = f"{url.drivername}://{url.username}@{url.host}:{url.port}/{url.database}" + if skip: + print(f"[--yes] Proceeding against {safe_url}") + return + print(f"This script writes/deletes tagged ({SEED_PREFIX!r}) data in:") + print(f" {safe_url}") + print("It also indexes a small set of test docs into Vespa.") + answer = input("Type 'yes' to continue: ") + if answer.strip().lower() != "yes": + print("Aborted.") + sys.exit(1) + + +def get_or_create_test_cc_pair(db: Session) -> ConnectorCredentialPair: + existing = db.execute( + select(ConnectorCredentialPair).where( + ConnectorCredentialPair.name == f"{SEED_PREFIX}ccp" + ) + ).scalar_one_or_none() + if existing is not None: + return existing + + connector = Connector( + name=f"{SEED_PREFIX}connector", + source=DocumentSource.INGESTION_API, + input_type=InputType.LOAD_STATE, + connector_specific_config={"_test_multilang": True}, + refresh_freq=None, + disabled=False, + ) + credential = Credential(admin_public=True, credential_json={}) + db.add_all([connector, credential]) + db.flush() + ccp = ConnectorCredentialPair( + connector_id=connector.id, + credential_id=credential.id, + name=f"{SEED_PREFIX}ccp", + is_public=True, + total_docs_indexed=0, + ) + db.add(ccp) + db.commit() + return ccp + + +def seed_vespa_docs(db: Session, ccp: ConnectorCredentialPair) -> int: + """Push the SEED_CORPUS through the real indexing pipeline so they + land in Vespa with embeddings + BM25. Returns the number indexed.""" + embedding_model = get_current_db_embedding_model(db) + document_index = get_default_document_index( + primary_index_name=embedding_model.index_name, + secondary_index_name=None, + ) + + embedder = DefaultIndexingEmbedder( + model_name=embedding_model.model_name, + normalize=embedding_model.normalize, + query_prefix=embedding_model.query_prefix, + passage_prefix=embedding_model.passage_prefix, + ) + + pipeline = build_indexing_pipeline( + embedder=embedder, + document_index=document_index, + ignore_time_skip=True, + db_session=db, + ) + + docs = [ + Document( + id=sd.doc_id, + sections=[Section(text=f"{sd.title}\n\n{sd.body}", link=None)], + source=DocumentSource.INGESTION_API, + semantic_identifier=sd.title, + metadata={"_test_multilang": "true"}, + from_ingestion_api=True, + ) + for sd in SEED_CORPUS + ] + + new_doc, chunks = pipeline( + documents=docs, + index_attempt_metadata=IndexAttemptMetadata( + connector_id=ccp.connector_id, + credential_id=ccp.credential_id, + ), + ) + return new_doc + + +# --------------------------------------------------------------------------- +# Persona helpers +# --------------------------------------------------------------------------- + + +def upsert_test_persona(db: Session, name: str, multilingual: bool) -> Persona: + default_prompt = get_default_prompt(db) + # Without the SearchTool attached, the chat flow has nothing to + # retrieve with — the LLM falls back to its training knowledge and + # never sees our seeded docs. Look it up by in_code_tool_id so the + # test isn't tied to a hardcoded id. + search_tool_row = db.execute( + select(ToolDBModel).where(ToolDBModel.in_code_tool_id == SearchTool.__name__) + ).scalar_one_or_none() + if search_tool_row is None: + raise RuntimeError( + "Built-in SearchTool not found in DB; ensure api-server has " + "started at least once so it can seed in-code tools." + ) + persona = upsert_persona( + user=None, + name=name, + description=f"{SEED_PREFIX} persona for multilingual e2e test", + num_chunks=10, + llm_relevance_filter=False, + llm_filter_extraction=False, + recency_bias=RecencyBiasSetting.BASE_DECAY, + llm_model_provider_override=None, + llm_model_version_override=None, + starter_messages=None, + is_public=True, + prompt_ids=[default_prompt.id], + document_set_ids=[], + tool_ids=[search_tool_row.id], + multilingual_query_expansion=multilingual, + db_session=db, + ) + return persona + + +# --------------------------------------------------------------------------- +# Drive a single chat-message call and collect what we need. +# --------------------------------------------------------------------------- + + +@dataclass +class ChatProbeResult: + answer_text: str + retrieved_doc_ids: list[str] + retrieved_titles: list[str] + error: str | None + + +def probe_chat(persona: Persona, query: str) -> ChatProbeResult: + """One-shot: create chat session, send message, drain the stream.""" + with get_session_context_manager() as db_session: + chat_session = create_chat_session( + db_session=db_session, + description=f"{SEED_PREFIX}probe", + user_id=None, + persona_id=persona.id, + ) + root = get_or_create_root_message( + chat_session_id=chat_session.id, db_session=db_session + ) + + req = CreateChatMessageRequest( + chat_session_id=chat_session.id, + parent_message_id=root.id, + message=query, + file_descriptors=[], + prompt_id=None, + search_doc_ids=None, + retrieval_options=RetrievalDetails( + run_search=OptionalSearchSetting.ALWAYS, real_time=True + ), + ) + + answer_pieces: list[str] = [] + retrieved_doc_ids: list[str] = [] + retrieved_titles: list[str] = [] + error: str | None = None + + try: + for obj in stream_chat_message_objects( + new_msg_req=req, + user=None, + db_session=db_session, + ): + if isinstance(obj, DanswerAnswerPiece): + if obj.answer_piece: + answer_pieces.append(obj.answer_piece) + elif isinstance(obj, QADocsResponse): + for d in obj.top_documents or []: + retrieved_doc_ids.append(d.document_id) + retrieved_titles.append(d.semantic_identifier or "") + elif isinstance(obj, StreamingError): + error = obj.error + except Exception as exc: + error = f"{type(exc).__name__}: {exc}" + + return ChatProbeResult( + answer_text="".join(answer_pieces).strip(), + retrieved_doc_ids=retrieved_doc_ids, + retrieved_titles=retrieved_titles, + error=error, + ) + + +# --------------------------------------------------------------------------- +# Cleanup +# --------------------------------------------------------------------------- + + +def cleanup(db: Session) -> None: + section("Cleanup") + # FK dependency graph (collected via pg_constraint): + # chat_message__search_doc -> chat_message + # tool_call -> chat_message + # chat_feedback -> chat_message + # document_retrieval_feedback -> chat_message + # chat_message -> chat_session + # chat_session -> persona + # We must drop dependents before parents. Use raw SQL — much + # cleaner than walking the ORM for a destructive teardown. + # + # Match scope: any chat session whose description carries our + # SEED_PREFIX *or* whose persona is one of our test personas. + # That covers prior aborted runs, runs that crashed mid-test, and + # the case where the chat UI was used to talk to our test persona. + db.execute( + text( + """ + CREATE TEMP TABLE _ml_test_sessions ON COMMIT DROP AS + SELECT cs.id + FROM chat_session cs + WHERE cs.description LIKE :prefix + OR cs.persona_id IN ( + SELECT id FROM persona WHERE name LIKE :prefix + ); + """ + ), + {"prefix": f"{SEED_PREFIX}%"}, + ) + db.execute( + text( + """ + CREATE TEMP TABLE _ml_test_messages ON COMMIT DROP AS + SELECT id FROM chat_message + WHERE chat_session_id IN (SELECT id FROM _ml_test_sessions); + """ + ) + ) + + deleted_msdoc = db.execute( + text( + """ + DELETE FROM chat_message__search_doc + WHERE chat_message_id IN (SELECT id FROM _ml_test_messages); + """ + ) + ).rowcount + deleted_toolcall = db.execute( + text( + """ + DELETE FROM tool_call + WHERE message_id IN (SELECT id FROM _ml_test_messages); + """ + ) + ).rowcount + deleted_cfeedback = db.execute( + text( + """ + DELETE FROM chat_feedback + WHERE chat_message_id IN (SELECT id FROM _ml_test_messages); + """ + ) + ).rowcount + deleted_drfeedback = db.execute( + text( + """ + DELETE FROM document_retrieval_feedback + WHERE chat_message_id IN (SELECT id FROM _ml_test_messages); + """ + ) + ).rowcount + deleted_msgs = db.execute( + text( + """ + DELETE FROM chat_message + WHERE id IN (SELECT id FROM _ml_test_messages); + """ + ) + ).rowcount + deleted_sessions = db.execute( + text( + """ + DELETE FROM chat_session + WHERE id IN (SELECT id FROM _ml_test_sessions); + """ + ) + ).rowcount + + info( + f"deleted {deleted_sessions} chat session(s), {deleted_msgs} " + f"message(s); cascaded: msg__search_doc={deleted_msdoc}, " + f"tool_call={deleted_toolcall}, chat_feedback={deleted_cfeedback}, " + f"document_retrieval_feedback={deleted_drfeedback}" + ) + + # Personas (now safe to drop — no chat session points at them). + personas = ( + db.execute(select(Persona).where(Persona.name.like(f"{SEED_PREFIX}%"))) + .scalars() + .all() + ) + for p in personas: + db.delete(p) + info(f"deleted {len(personas)} test persona(s)") + + # Documents (Postgres rows; Vespa cleanup is best-effort below) + db_docs = ( + db.execute(select(DbDocument).where(DbDocument.id.like(f"{SEED_PREFIX}%"))) + .scalars() + .all() + ) + for d in db_docs: + db.execute( + DocumentByConnectorCredentialPair.__table__.delete().where( + DocumentByConnectorCredentialPair.id == d.id + ) + ) + db.delete(d) + info(f"deleted {len(db_docs)} document row(s)") + + # cc-pair, connector, credential + ccp = db.execute( + select(ConnectorCredentialPair).where( + ConnectorCredentialPair.name == f"{SEED_PREFIX}ccp" + ) + ).scalar_one_or_none() + if ccp is not None: + connector_id = ccp.connector_id + credential_id = ccp.credential_id + db.delete(ccp) + connector = db.get(Connector, connector_id) + if connector is not None: + db.delete(connector) + credential = db.get(Credential, credential_id) + if credential is not None: + db.delete(credential) + info("deleted test cc-pair / connector / credential") + + db.commit() + info( + "Vespa: tagged test docs intentionally left in the index " + "(deletion goes through the connector framework). Re-running " + "this test reindexes them in place." + ) + + +# --------------------------------------------------------------------------- +# Phases +# --------------------------------------------------------------------------- + + +def phase_setup(db: Session) -> tuple[ConnectorCredentialPair, Persona, Persona]: + section("Phase 1 — setup test fixtures") + ccp = get_or_create_test_cc_pair(db) + ok(f"cc-pair {ccp.id} ({ccp.name}) ready") + n_indexed = seed_vespa_docs(db, ccp) + if n_indexed != len(SEED_CORPUS): + # n_indexed is the count of *new* docs, so a re-run yields 0. + info( + f"indexing pipeline reported {n_indexed} new docs " + f"(re-runs reindex existing docs in place)" + ) + ok(f"seeded {len(SEED_CORPUS)} English doc(s) into Vespa") + + persona_ml = upsert_test_persona(db, PERSONA_ML_NAME, multilingual=True) + persona_ctrl = upsert_test_persona(db, PERSONA_CONTROL_NAME, multilingual=False) + ok( + f"persona [{persona_ml.name}] id={persona_ml.id}, " + f"multilingual_query_expansion={persona_ml.multilingual_query_expansion}" + ) + ok( + f"persona [{persona_ctrl.name}] id={persona_ctrl.id}, " + f"multilingual_query_expansion={persona_ctrl.multilingual_query_expansion}" + ) + return ccp, persona_ml, persona_ctrl + + +def phase_english_baseline(persona_ml: Persona) -> bool: + section("Phase 2 — English baseline (sanity check)") + case = next(c for c in CASES if c.code == "en") + failures = 0 + for query, expected_doc in case.queries: + result = probe_chat(persona_ml, query) + if result.error: + fail(f"[en] '{query[:60]}' streaming error: {result.error}") + failures += 1 + continue + if expected_doc.doc_id in result.retrieved_doc_ids: + ok(f"[en] retrieval hit expected doc for: '{query[:60]}'") + else: + fail(f"[en] expected doc NOT in top docs for: '{query[:60]}'") + info(f" retrieved: {result.retrieved_titles[:3]}") + failures += 1 + # Sanity: did the expected entity appear in the answer? + if expected_doc.expected_entity.lower() in result.answer_text.lower(): + ok( + f"[en] answer contains expected entity " + f"'{expected_doc.expected_entity}'" + ) + else: + info( + f"[en] answer does NOT contain '{expected_doc.expected_entity}' " + f"(LLM may have paraphrased; check manually). " + f"Answer head: {result.answer_text[:120]!r}" + ) + return failures == 0 + + +def phase_non_english(persona_ml: Persona) -> bool: + """Hard contract for the persona flag: when on, non-English queries + must (a) translate-for-retrieval so the right English doc is found, + (b) the answer must contain the factual entity from that doc + (numeric / proper-noun entities survive translation), AND (c) the + final answer text is in the user's language. (c) is enforced by the + post-translation pass in process_message.py — the answering LLM + might still produce English internally, but the second pass + translates that to the user's language before we yield it.""" + section("Phase 3 — non-English queries with multilingual flag ON") + failures = 0 + lang_match = 0 + lang_total = 0 + for case in CASES: + if case.code == "en": + continue + for query, expected_doc in case.queries: + result = probe_chat(persona_ml, query) + if result.error: + fail( + f"[{case.code}] '{query[:60]}' streaming error: " f"{result.error}" + ) + failures += 1 + continue + + # 3a — retrieval brought back the right English doc. + # This proves the persona flag wired translate-to-English + # into retrieval. + if expected_doc.doc_id in result.retrieved_doc_ids: + ok(f"[{case.code}] retrieval hit expected doc for: " f"'{query[:60]}'") + else: + fail( + f"[{case.code}] expected doc NOT in top docs for: " + f"'{query[:60]}'" + ) + info(f" retrieved: {result.retrieved_titles[:3]}") + failures += 1 + + # 3b — answer contains the expected entity. Entities are + # numerals / proper nouns that survive translation, so the + # LLM should keep them verbatim regardless of output + # language. This is the strongest correctness signal. + if expected_doc.expected_entity.lower() in result.answer_text.lower(): + ok( + f"[{case.code}] answer contains expected entity " + f"'{expected_doc.expected_entity}'" + ) + else: + fail( + f"[{case.code}] answer missing entity " + f"'{expected_doc.expected_entity}'. Answer head: " + f"{result.answer_text[:120]!r}" + ) + failures += 1 + + # 3c — answer is in the user's language. Now a hard + # assertion because the post-translation pass guarantees + # this regardless of the answering LLM's behavior. If the + # detected language doesn't match, either the post-pass + # was not invoked (wiring bug) or it returned the English + # fallback (translate LLM call failed). + detected = detect_language(result.answer_text) + lang_total += 1 + if detected == case.code: + lang_match += 1 + ok(f"[{case.code}] answer language: {detected}") + else: + fail( + f"[{case.code}] expected {case.code} answer, detected " + f"{detected}. Answer head: {result.answer_text[:200]!r}" + ) + failures += 1 + info( + f"language-match summary: {lang_match}/{lang_total} non-English " + f"answers came back in the user's language" + ) + return failures == 0 + + +def probe_slack(persona: Persona, query: str) -> ChatProbeResult: + """Drive the one-shot answer path that the Slack listener uses. + `get_search_answer` runs the same Answer pipeline as chat but with + its own retry loop and citation enforcement.""" + with get_session_context_manager() as db_session: + req = DirectQARequest( + messages=[ThreadMessage(message=query, sender=None)], + prompt_id=None, + persona_id=persona.id, + retrieval_options=RetrievalDetails( + run_search=OptionalSearchSetting.ALWAYS, real_time=True + ), + ) + try: + response = get_search_answer( + query_req=req, + user=None, + max_document_tokens=None, + max_history_tokens=None, + db_session=db_session, + use_citations=True, + danswerbot_flow=True, + ) + except Exception as exc: + return ChatProbeResult( + answer_text="", + retrieved_doc_ids=[], + retrieved_titles=[], + error=f"{type(exc).__name__}: {exc}", + ) + + retrieved_doc_ids: list[str] = [] + retrieved_titles: list[str] = [] + if response.docs and response.docs.top_documents: + for d in response.docs.top_documents: + retrieved_doc_ids.append(d.document_id) + retrieved_titles.append(d.semantic_identifier or "") + return ChatProbeResult( + answer_text=(response.answer or "").strip(), + retrieved_doc_ids=retrieved_doc_ids, + retrieved_titles=retrieved_titles, + error=response.error_msg, + ) + + +def phase_slack(persona_ml: Persona) -> bool: + """Smoke test for the Slack one-shot path. Same hard contract as + Phase 3 (retrieval hit + entity in answer + answer in user's + language), but driven through `get_search_answer` — the function + the slack listener calls.""" + section("Phase 5 — Slack one-shot path with multilingual flag ON") + failures = 0 + lang_match = 0 + lang_total = 0 + for case in CASES: + if case.code == "en": + continue + # One query per language is plenty for a smoke test (each + # query takes 2× LLM round-trips: answer + translate). + query, expected_doc = case.queries[0] + result = probe_slack(persona_ml, query) + if result.error: + fail(f"[slack {case.code}] '{query[:60]}' error: {result.error}") + failures += 1 + continue + + if expected_doc.doc_id in result.retrieved_doc_ids: + ok(f"[slack {case.code}] retrieval hit expected doc") + else: + fail( + f"[slack {case.code}] expected doc NOT in top docs. " + f"retrieved: {result.retrieved_titles[:3]}" + ) + failures += 1 + + if expected_doc.expected_entity.lower() in result.answer_text.lower(): + ok( + f"[slack {case.code}] answer contains entity " + f"'{expected_doc.expected_entity}'" + ) + else: + fail( + f"[slack {case.code}] answer missing entity " + f"'{expected_doc.expected_entity}'. Answer head: " + f"{result.answer_text[:120]!r}" + ) + failures += 1 + + detected = detect_language(result.answer_text) + lang_total += 1 + if detected == case.code: + lang_match += 1 + ok(f"[slack {case.code}] answer language: {detected}") + else: + fail( + f"[slack {case.code}] expected {case.code}, detected " + f"{detected}. Answer head: {result.answer_text[:200]!r}" + ) + failures += 1 + info( + f"slack-path language-match summary: {lang_match}/{lang_total} " + f"non-English answers came back in the user's language" + ) + return failures == 0 + + +def phase_control(persona_ctrl: Persona) -> None: + section("Phase 4 — control: same queries with flag OFF (informational)") + for case in CASES: + if case.code == "en": + continue + # Just one query per language is enough to see the contrast. + query, expected_doc = case.queries[0] + result = probe_chat(persona_ctrl, query) + if result.error: + info(f"[{case.code}] streaming error: {result.error}") + continue + retrieved = expected_doc.doc_id in result.retrieved_doc_ids + detected = detect_language(result.answer_text) + info( + f"[{case.code}] flag-OFF persona | retrieval-hit={retrieved} | " + f"answer-lang={detected}" + ) + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--yes", + action="store_true", + help="Skip the destructive-action confirmation prompt", + ) + parser.add_argument( + "--clean", action="store_true", help="Remove tagged test data and exit" + ) + parser.add_argument( + "--keep-data", + action="store_true", + help="Skip cleanup at the end of a successful run", + ) + args = parser.parse_args() + + confirm_destructive(args.yes) + + if args.clean: + with get_session_context_manager() as db: + cleanup(db) + return 0 + + overall_ok = True + with get_session_context_manager() as db: + try: + ccp, persona_ml, persona_ctrl = phase_setup(db) + except Exception as exc: + fail(f"setup failed: {type(exc).__name__}: {exc}") + return 2 + + if not phase_english_baseline(persona_ml): + overall_ok = False + + if not phase_non_english(persona_ml): + overall_ok = False + + if not phase_slack(persona_ml): + overall_ok = False + + phase_control(persona_ctrl) + + if not args.keep_data: + with get_session_context_manager() as db: + cleanup(db) + + print() + if overall_ok: + print(f"[{_PASS}] multi-language e2e: all hard assertions passed") + return 0 + print(f"[{_FAIL}] multi-language e2e: see failures above") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/web/src/app/admin/assistants/AssistantEditor.tsx b/web/src/app/admin/assistants/AssistantEditor.tsx index c58cdcdadf9..4c4e7152bb8 100644 --- a/web/src/app/admin/assistants/AssistantEditor.tsx +++ b/web/src/app/admin/assistants/AssistantEditor.tsx @@ -178,6 +178,8 @@ export function AssistantEditor({ num_chunks: existingPersona?.num_chunks ?? null, include_citations: existingPersona?.prompts[0]?.include_citations ?? true, llm_relevance_filter: existingPersona?.llm_relevance_filter ?? false, + multilingual_query_expansion: + existingPersona?.multilingual_query_expansion ?? false, llm_model_provider_override: existingPersona?.llm_model_provider_override ?? null, llm_model_version_override: @@ -213,6 +215,7 @@ export function AssistantEditor({ num_chunks: Yup.number().nullable(), include_citations: Yup.boolean().required(), llm_relevance_filter: Yup.boolean().required(), + multilingual_query_expansion: Yup.boolean().required(), llm_model_version_override: Yup.string().nullable(), llm_model_provider_override: Yup.string().nullable(), starter_messages: Yup.array().of( @@ -580,6 +583,14 @@ export function AssistantEditor({ } /> + + ); -} \ No newline at end of file +} diff --git a/web/src/app/admin/assistants/interfaces.ts b/web/src/app/admin/assistants/interfaces.ts index 0a06ac4cc82..5c9869f78d7 100644 --- a/web/src/app/admin/assistants/interfaces.ts +++ b/web/src/app/admin/assistants/interfaces.ts @@ -38,4 +38,5 @@ export interface Persona { default_persona: boolean; users: MinimalUserSnapshot[]; groups: number[]; + multilingual_query_expansion?: boolean; } diff --git a/web/src/app/admin/assistants/lib.ts b/web/src/app/admin/assistants/lib.ts index 4d42789d810..4eadb98f243 100644 --- a/web/src/app/admin/assistants/lib.ts +++ b/web/src/app/admin/assistants/lib.ts @@ -16,6 +16,7 @@ interface PersonaCreationRequest { users?: string[]; groups: number[]; tool_ids: number[]; // Added tool_ids to the interface + multilingual_query_expansion: boolean; } interface PersonaUpdateRequest { @@ -36,6 +37,7 @@ interface PersonaUpdateRequest { users?: string[]; groups: number[]; tool_ids: number[]; // Added tool_ids to the interface + multilingual_query_expansion: boolean; } function promptNameFromPersonaName(personaName: string) { @@ -110,6 +112,7 @@ function buildPersonaAPIBody( groups, users, tool_ids, // Added tool_ids to the destructuring + multilingual_query_expansion, } = creationRequest; return { @@ -128,6 +131,7 @@ function buildPersonaAPIBody( users, groups, tool_ids, // Added tool_ids to the return object + multilingual_query_expansion, }; } diff --git a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx index 55497149ea0..032e47aad00 100644 --- a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx +++ b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx @@ -167,8 +167,21 @@ export const SlackBotCreationForm = ({ }), curated_response_config: Yup.object().shape({ enable_curated_response_integration: Yup.boolean().required(), - response_message: Yup.string().required( - "Response message is required when curated response integration is enabled" + // Mirror jira_config: only require this when the integration + // is enabled. Without the .when() guard the field is required + // unconditionally, but the UI hides the input when the toggle + // is off — Formik silently rejects submit and no error is + // visible since the (errored) field isn't on screen. + response_message: Yup.string().when( + "enable_curated_response_integration", + { + is: true, + then: (schema) => + schema.required( + "Response message is required when curated response integration is enabled" + ), + otherwise: (schema) => schema.notRequired(), + } ), }), jira_title_filter: Yup.array() diff --git a/web/src/components/table/DragHandle.tsx b/web/src/components/table/DragHandle.tsx index a288d58b46c..3bebed22822 100644 --- a/web/src/components/table/DragHandle.tsx +++ b/web/src/components/table/DragHandle.tsx @@ -1,13 +1,14 @@ import React from "react"; import { MdDragIndicator } from "react-icons/md"; -export const DragHandle = (props: any) => { +export const DragHandle = ({ isDragging, ...rest }: any) => { + // `isDragging` is a logical prop from @dnd-kit/sortable; pull it + // out before spreading so React doesn't warn about an unknown DOM + // attribute on the div. return (