diff --git a/backend/alembic/versions/a3f1d7c4e9b2_persona_multilingual_query_expansion.py b/backend/alembic/versions/a3f1d7c4e9b2_persona_multilingual_query_expansion.py
new file mode 100644
index 00000000000..ab615c76e6b
--- /dev/null
+++ b/backend/alembic/versions/a3f1d7c4e9b2_persona_multilingual_query_expansion.py
@@ -0,0 +1,32 @@
+"""persona multilingual_query_expansion flag
+
+Revision ID: a3f1d7c4e9b2
+Revises: c8a4e2f9d1b3
+Create Date: 2026-05-04 12:00:00.000000
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "a3f1d7c4e9b2"
+down_revision = "c8a4e2f9d1b3"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "persona",
+        sa.Column(
+            "multilingual_query_expansion",
+            sa.Boolean(),
+            nullable=False,
+            server_default=sa.text("false"),
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("persona", "multilingual_query_expansion")
diff --git a/backend/danswer/chat/multilingual_translation.py b/backend/danswer/chat/multilingual_translation.py
new file mode 100644
index 00000000000..5976f6fcc89
--- /dev/null
+++ b/backend/danswer/chat/multilingual_translation.py
@@ -0,0 +1,151 @@
+"""Helpers for the per-persona multi-language post-processing pass.
+
+When a persona has `multilingual_query_expansion=True` and the user's
+query is non-English, the answering LLM still produces English most of
+the time (it tends to mirror the English context corpus regardless of
+the LANGUAGE_HINT directive). We compensate by post-translating the
+English answer back into the user's original language.
+
+Trade-off: in translate mode we buffer the streamed answer instead of
+showing it token-by-token. The user sees a brief delay (one extra LLM
+round-trip), but reliably gets a reply in their language. English
+queries are unaffected — they keep streaming normally.
+"""
+from __future__ import annotations
+
+import unicodedata
+
+from danswer.llm.interfaces import LLM
+from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
+from danswer.llm.utils import message_to_string
+from danswer.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+# Display name passed to the translation prompt. Keys are the language
+# codes detect_query_language returns. Anything not in this map is
+# treated as English (no translation needed).
+_LANGUAGE_NAMES: dict[str, str] = {
+    "ja": "Japanese",
+    "zh": "Chinese (Simplified)",
+    "ko": "Korean",
+}
+
+
+def detect_query_language(text: str) -> str:
+    """Cheap script-based language detector covering the languages we
+    explicitly support translation for. Returns one of: 'ja', 'zh',
+    'ko', or 'en' (English/other — no translation needed).
+
+    Heuristic mirrors the script-presence test in
+    backend/scripts/test_multilanguage_e2e.py: a few percent of CJK /
+    Hangul / kana code points is enough to decide. We don't try to be
+    clever about mixed-language queries — the dominant non-English
+    script wins, and ties default to English.
+    """
+    if not text:
+        return "en"
+
+    counts = {"hiragana_katakana": 0, "hangul": 0, "cjk": 0, "ascii_letter": 0}
+    total_letters = 0
+    for ch in text:
+        cp = ord(ch)
+        if (0x3040 <= cp <= 0x309F) or (0x30A0 <= cp <= 0x30FF):
+            counts["hiragana_katakana"] += 1
+            total_letters += 1
+        elif 0xAC00 <= cp <= 0xD7AF:
+            counts["hangul"] += 1
+            total_letters += 1
+        elif (0x4E00 <= cp <= 0x9FFF) or (0x3400 <= cp <= 0x4DBF):
+            counts["cjk"] += 1
+            total_letters += 1
+        elif unicodedata.category(ch).startswith("L"):
+            counts["ascii_letter"] += 1
+            total_letters += 1
+
+    if total_letters == 0:
+        return "en"
+    threshold = max(1, total_letters // 20)  # ~5%
+    if counts["hiragana_katakana"] >= threshold:
+        return "ja"
+    if counts["hangul"] >= threshold:
+        return "ko"
+    if counts["cjk"] >= threshold:
+        return "zh"
+    return "en"
+
+
+def language_name(code: str) -> str | None:
+    return _LANGUAGE_NAMES.get(code)
+
+
+# The prompt is intentionally directive about preserving citations and
+# not adding commentary. Citations are bracketed numerals like [1] /
+# [[1]](url); URLs and code blocks should also pass through unchanged.
+_TRANSLATE_PROMPT = """\
+You are a precise translator.
+
+Translate the text below into {target_language}.
+
+CRITICAL RULES — follow exactly:
+- Preserve every citation marker exactly as-is. Citation markers look
+  like [1], [2], [[1]](https://example.com), etc. Do not translate
+  them, do not change the brackets, do not change the numbers.
+- Preserve every URL exactly.
+- Preserve every code block (text between triple backticks) exactly.
+- Preserve every inline code span (text between single backticks).
+- Do not add any commentary, preface, or trailing notes — output only
+  the translated text.
+- Keep numbers, proper nouns, and product names in their original
+  form unless the target language has a well-established equivalent.
+
+TEXT TO TRANSLATE:
+{text}
+"""
+
+
+def translate_answer_to_language(
+    answer_text: str,
+    target_language_code: str,
+    llm: LLM,
+) -> str:
+    """Translate `answer_text` into the language named by
+    `target_language_code` (a key of _LANGUAGE_NAMES). Returns the
+    English original on any failure — better to ship an English answer
+    than to drop the response entirely."""
+    target_name = _LANGUAGE_NAMES.get(target_language_code)
+    if target_name is None:
+        # Caller should have skipped, but be defensive.
+        return answer_text
+
+    if not answer_text.strip():
+        return answer_text
+
+    prompt_messages = [
+        {
+            "role": "user",
+            "content": _TRANSLATE_PROMPT.format(
+                target_language=target_name, text=answer_text
+            ),
+        }
+    ]
+
+    try:
+        filled = dict_based_prompt_to_langchain_prompt(prompt_messages)
+        translated = message_to_string(llm.invoke(filled))
+    except Exception:
+        logger.exception(
+            "Failed to translate answer to %s; falling back to English",
+            target_name,
+        )
+        return answer_text
+
+    translated = translated.strip()
+    if not translated:
+        logger.warning(
+            "Translation to %s came back empty; falling back to English",
+            target_name,
+        )
+        return answer_text
+    return translated
diff --git a/backend/danswer/chat/process_message.py b/backend/danswer/chat/process_message.py
index 64f6bab822d..4a11aa24ed5 100644
--- a/backend/danswer/chat/process_message.py
+++ b/backend/danswer/chat/process_message.py
@@ -14,6 +14,9 @@
 from danswer.chat.models import LLMRelevanceFilterResponse
 from danswer.chat.models import QADocsResponse
 from danswer.chat.models import StreamingError
+from danswer.chat.multilingual_translation import detect_query_language
+from danswer.chat.multilingual_translation import language_name
+from danswer.chat.multilingual_translation import translate_answer_to_language
 from danswer.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
 from danswer.configs.chat_configs import DISABLE_LLM_CHOOSE_SEARCH
 from danswer.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
@@ -92,11 +95,11 @@ def translate_citations(
     for db_doc in db_docs:
         if db_doc.document_id not in doc_id_to_saved_doc_id_map:
             doc_id_to_saved_doc_id_map[db_doc.document_id] = db_doc.id
-            #print(f'found doc id: {db_doc.id}')
+            # print(f'found doc id: {db_doc.id}')
 
     citation_to_saved_doc_id_map: dict[int, int] = {}
     for citation in citations_list:
-        #print(f'citation id {citation.document_id} for doc num {citation.citation_num}')
+        # print(f'citation id {citation.document_id} for doc num {citation.citation_num}')
         if citation.citation_num not in citation_to_saved_doc_id_map:
             citation_to_saved_doc_id_map[
                 citation.citation_num
@@ -404,15 +407,25 @@ def stream_chat_message_objects(
         if not final_msg.prompt:
             raise RuntimeError("No Prompt found")
 
+        # Persona may be None for legacy flows; treat the flag as off in
+        # that case. When persona exists, thread its flag through so the
+        # answer-side prompt builders add the LANGUAGE_HINT.
+        persona_multilingual = (
+            persona.multilingual_query_expansion if persona is not None else False
+        )
         prompt_config = (
             PromptConfig.from_model(
                 final_msg.prompt,
                 prompt_override=(
                     new_msg_req.prompt_override or chat_session.prompt_override
                 ),
+                multilingual_query_expansion=persona_multilingual,
             )
             if not persona
-            else PromptConfig.from_model(persona.prompts[0])
+            else PromptConfig.from_model(
+                persona.prompts[0],
+                multilingual_query_expansion=persona_multilingual,
+            )
         )
 
         # find out what tools to use
@@ -539,6 +552,22 @@ def stream_chat_message_objects(
         ai_message_files = None  # any files to associate with the AI message e.g. dall-e generated images
         dropped_indices = None
         tool_result = None
+
+        # Multi-language post-processing pass (option C in the design):
+        # when the persona has multilingual_query_expansion=True and the
+        # user's question is in a non-English language, the LLM tends
+        # to answer in English regardless of the LANGUAGE_HINT
+        # directive. We compensate by buffering DanswerAnswerPiece
+        # tokens during the stream and emitting a single translated
+        # piece at the end. Other packet types (citations, tool
+        # responses, image generation, etc.) still flow in real time.
+        translate_target = None
+        if persona_multilingual:
+            detected = detect_query_language(message_text)
+            if language_name(detected) is not None:
+                translate_target = detected
+        buffered_answer_pieces: list[str] = []
+
         for packet in answer.processed_streamed_output:
             if isinstance(packet, ToolResponse):
                 if packet.id == SEARCH_RESPONSE_SUMMARY_ID:
@@ -594,8 +623,35 @@ def stream_chat_message_objects(
             else:
                 if isinstance(packet, ToolCallFinalResult):
                     tool_result = packet
+                if (
+                    translate_target is not None
+                    and isinstance(packet, DanswerAnswerPiece)
+                    and packet.answer_piece
+                ):
+                    # Hold answer tokens back; we'll translate the full
+                    # answer at the end of the stream.
+                    buffered_answer_pieces.append(packet.answer_piece)
+                    continue
                 yield cast(ChatPacket, packet)
 
+        # End of stream. If we buffered for translation, do the second
+        # LLM pass now and emit the translated answer as one piece.
+        # `answer.llm_answer` reads from the same processed stream, so
+        # it already contains the full English text — we use that as
+        # the source of truth (more reliable than reassembling from
+        # buffered pieces, which may have None entries from end-of-
+        # stream sentinels).
+        translated_answer_text: str | None = None
+        if translate_target is not None:
+            english_answer = answer.llm_answer
+            translated_answer_text = translate_answer_to_language(
+                answer_text=english_answer,
+                target_language_code=translate_target,
+                llm=llm,
+            )
+            yield DanswerAnswerPiece(answer_piece=translated_answer_text)
+            yield DanswerAnswerPiece(answer_piece=None)
+
     except Exception as e:
         logger.exception("Failed to process chat message")
 
@@ -627,14 +683,24 @@ def stream_chat_message_objects(
             for tool in tool_list:
                 tool_name_to_tool_id[tool.name()] = tool_id
 
+        # If we translated, persist the user-facing translated text
+        # rather than the English intermediate. Citations are computed
+        # from the LLM's English output (where the [1]/[2] markers
+        # were emitted relative to retrieved docs); the translation
+        # prompt preserves those markers verbatim.
+        final_answer_text = (
+            translated_answer_text
+            if translated_answer_text is not None
+            else answer.llm_answer
+        )
         gen_ai_response_message = partial_response(
-            message=answer.llm_answer,
+            message=final_answer_text,
             rephrased_query=(
                 qa_docs_response.rephrased_query if qa_docs_response else None
             ),
             reference_docs=reference_db_search_docs,
             files=ai_message_files,
-            token_count=len(llm_tokenizer_encode_func(answer.llm_answer)),
+            token_count=len(llm_tokenizer_encode_func(final_answer_text)),
             citations=db_citations,
             error=None,
             tool_calls=[
diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py
index 58a8f32a8e9..e2bc854d09e 100644
--- a/backend/danswer/db/models.py
+++ b/backend/danswer/db/models.py
@@ -1003,6 +1003,15 @@ class Persona(Base):
     # Enables using LLM to extract time and source type filters
     # Can also be admin disabled globally
     llm_filter_extraction: Mapped[bool] = mapped_column(Boolean)
+    # When true, non-English queries on this persona are translated to
+    # English before retrieval and the LLM is instructed to answer in
+    # the user's original language. Off by default since most traffic
+    # is English and turning it on incurs an extra LLM call per query.
+    # Behaves as an override of the global MULTILINGUAL_QUERY_EXPANSION
+    # env var: persona flag wins; if false, falls back to env var.
+    multilingual_query_expansion: Mapped[bool] = mapped_column(
+        Boolean, nullable=False, default=False, server_default="false"
+    )
     recency_bias: Mapped[RecencyBiasSetting] = mapped_column(
         Enum(RecencyBiasSetting, native_enum=False)
     )
diff --git a/backend/danswer/db/persona.py b/backend/danswer/db/persona.py
index 26292fc9264..313192f30e6 100644
--- a/backend/danswer/db/persona.py
+++ b/backend/danswer/db/persona.py
@@ -79,6 +79,7 @@ def create_update_persona(
             llm_model_version_override=create_persona_request.llm_model_version_override,
             starter_messages=create_persona_request.starter_messages,
             is_public=create_persona_request.is_public,
+            multilingual_query_expansion=create_persona_request.multilingual_query_expansion,
             db_session=db_session,
         )
 
@@ -327,6 +328,7 @@ def upsert_persona(
     tool_ids: list[int] | None = None,
     persona_id: int | None = None,
     default_persona: bool = False,
+    multilingual_query_expansion: bool = False,
     commit: bool = True,
 ) -> Persona:
     if persona_id is not None:
@@ -379,6 +381,7 @@ def upsert_persona(
         persona.starter_messages = starter_messages
         persona.deleted = False  # Un-delete if previously deleted
         persona.is_public = is_public
+        persona.multilingual_query_expansion = multilingual_query_expansion
 
         # Do not delete any associations manually added unless
         # a new updated list is provided
@@ -411,6 +414,7 @@ def upsert_persona(
             llm_model_version_override=llm_model_version_override,
             starter_messages=starter_messages,
             tools=tools or [],
+            multilingual_query_expansion=multilingual_query_expansion,
         )
         db_session.add(persona)
 
diff --git a/backend/danswer/llm/answering/models.py b/backend/danswer/llm/answering/models.py
index a5248fac27a..8301c3a8420 100644
--- a/backend/danswer/llm/answering/models.py
+++ b/backend/danswer/llm/answering/models.py
@@ -121,10 +121,17 @@ class PromptConfig(BaseModel):
     task_prompt: str
     datetime_aware: bool
     include_citations: bool
+    # When true, the answer-side prompts add the LANGUAGE_HINT directive
+    # so the LLM responds in the user's original language. Sourced from
+    # the persona's multilingual_query_expansion flag at construction.
+    multilingual_query_expansion: bool = False
 
     @classmethod
     def from_model(
-        cls, model: "Prompt", prompt_override: PromptOverride | None = None
+        cls,
+        model: "Prompt",
+        prompt_override: PromptOverride | None = None,
+        multilingual_query_expansion: bool = False,
     ) -> "PromptConfig":
         override_system_prompt = (
             prompt_override.system_prompt if prompt_override else None
@@ -136,6 +143,7 @@ def from_model(
             task_prompt=override_task_prompt or model.task_prompt,
             datetime_aware=model.datetime_aware,
             include_citations=model.include_citations,
+            multilingual_query_expansion=multilingual_query_expansion,
         )
 
     # needed so that this can be passed into lru_cache funcs
diff --git a/backend/danswer/llm/answering/prompts/citations_prompt.py b/backend/danswer/llm/answering/prompts/citations_prompt.py
index 69f727318d0..fbebe679f24 100644
--- a/backend/danswer/llm/answering/prompts/citations_prompt.py
+++ b/backend/danswer/llm/answering/prompts/citations_prompt.py
@@ -33,13 +33,16 @@
 
 def get_prompt_tokens(prompt_config: PromptConfig) -> int:
     # Note: currently custom prompts do not allow datetime aware, only default prompts
+    use_language_hint = prompt_config.multilingual_query_expansion or bool(
+        MULTILINGUAL_QUERY_EXPANSION
+    )
     return (
         check_number_of_tokens(prompt_config.system_prompt)
         + check_number_of_tokens(prompt_config.task_prompt)
         + CHAT_USER_PROMPT_WITH_CONTEXT_OVERHEAD_TOKEN_CNT
         + CITATION_STATEMENT_TOKEN_CNT
         + CITATION_REMINDER_TOKEN_CNT
-        + (LANGUAGE_HINT_TOKEN_CNT if bool(MULTILINGUAL_QUERY_EXPANSION) else 0)
+        + (LANGUAGE_HINT_TOKEN_CNT if use_language_hint else 0)
         + (ADDITIONAL_INFO_TOKEN_CNT if prompt_config.datetime_aware else 0)
     )
 
@@ -135,7 +138,11 @@ def build_citations_user_message(
     all_doc_useful: bool,
     history_message: str = "",
 ) -> HumanMessage:
-    task_prompt_with_reminder = build_task_prompt_reminders(prompt_config)
+    task_prompt_with_reminder = build_task_prompt_reminders(
+        prompt_config,
+        use_language_hint=prompt_config.multilingual_query_expansion
+        or bool(MULTILINGUAL_QUERY_EXPANSION),
+    )
 
     if context_docs:
         context_docs_str = build_complete_context_str(context_docs)
diff --git a/backend/danswer/llm/answering/prompts/quotes_prompt.py b/backend/danswer/llm/answering/prompts/quotes_prompt.py
index b2b67c65b37..a39a4f8bf0e 100644
--- a/backend/danswer/llm/answering/prompts/quotes_prompt.py
+++ b/backend/danswer/llm/answering/prompts/quotes_prompt.py
@@ -74,12 +74,18 @@ def _build_strong_llm_quotes_prompt(
     return HumanMessage(content=full_prompt)
 
 
+def _resolve_use_language_hint(prompt: PromptConfig, override: bool | None) -> bool:
+    if override is not None:
+        return override
+    return prompt.multilingual_query_expansion or bool(MULTILINGUAL_QUERY_EXPANSION)
+
+
 def build_quotes_user_message(
     question: str,
     context_docs: list[LlmDoc] | list[InferenceChunk],
     history_str: str,
     prompt: PromptConfig,
-    use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION),
+    use_language_hint: bool | None = None,
 ) -> HumanMessage:
     prompt_builder = (
         _build_weak_llm_quotes_prompt
@@ -92,7 +98,7 @@ def build_quotes_user_message(
         context_docs=context_docs,
         history_str=history_str,
         prompt=prompt,
-        use_language_hint=use_language_hint,
+        use_language_hint=_resolve_use_language_hint(prompt, use_language_hint),
     )
 
 
@@ -101,7 +107,7 @@ def build_quotes_prompt(
     context_docs: list[LlmDoc] | list[InferenceChunk],
     history_str: str,
     prompt: PromptConfig,
-    use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION),
+    use_language_hint: bool | None = None,
 ) -> HumanMessage:
     prompt_builder = (
         _build_weak_llm_quotes_prompt
@@ -114,5 +120,5 @@ def build_quotes_prompt(
         context_docs=context_docs,
         history_str=history_str,
         prompt=prompt,
-        use_language_hint=use_language_hint,
+        use_language_hint=_resolve_use_language_hint(prompt, use_language_hint),
     )
diff --git a/backend/danswer/one_shot_answer/answer_question.py b/backend/danswer/one_shot_answer/answer_question.py
index 3131406cab5..49da002d170 100644
--- a/backend/danswer/one_shot_answer/answer_question.py
+++ b/backend/danswer/one_shot_answer/answer_question.py
@@ -7,6 +7,9 @@
 from danswer.chat.chat_utils import reorganize_citations
 from danswer.chat.models import CitationInfo
 from danswer.chat.models import DanswerAnswerPiece
+from danswer.chat.multilingual_translation import detect_query_language
+from danswer.chat.multilingual_translation import language_name
+from danswer.chat.multilingual_translation import translate_answer_to_language
 from danswer.chat.models import DanswerContexts
 from danswer.chat.models import DanswerQuotes
 from danswer.chat.models import LLMRelevanceFilterResponse
@@ -158,7 +161,14 @@ def stream_answer_objects(
     )
 
     llm, fast_llm = get_llms_for_persona(persona=chat_session.persona)
-    prompt_config = PromptConfig.from_model(prompt)
+    persona_multilingual = (
+        chat_session.persona.multilingual_query_expansion
+        if chat_session.persona is not None
+        else False
+    )
+    prompt_config = PromptConfig.from_model(
+        prompt, multilingual_query_expansion=persona_multilingual
+    )
     document_pruning_config = DocumentPruningConfig(
         max_chunks=int(
             chat_session.persona.num_chunks
@@ -188,7 +198,9 @@ def stream_answer_objects(
     answer = Answer(
         question=query_msg.message,
         answer_style_config=answer_config,
-        prompt_config=PromptConfig.from_model(prompt),
+        prompt_config=PromptConfig.from_model(
+            prompt, multilingual_query_expansion=persona_multilingual
+        ),
         llm=get_main_llm_from_tuple(get_llms_for_persona(persona=chat_session.persona)),
         single_message_history=history_str,
         tools=[search_tool],
@@ -203,6 +215,22 @@ def stream_answer_objects(
     )
     # won't be any ImageGenerationDisplay responses since that tool is never passed in
     dropped_inds: list[int] = []
+
+    # Multi-language post-processing pass for the one-shot path
+    # (mirrors process_message.py). When the persona has the flag on
+    # and the user's question is non-English, buffer DanswerAnswerPiece
+    # tokens during the stream and emit a single translated piece at
+    # the end. CitationInfo packets still flow in real time so the
+    # slackbot's citation-required retry loop sees them. The translate
+    # prompt preserves [1]/[2] markers verbatim, so citations remain
+    # accurate after translation.
+    translate_target = None
+    if persona_multilingual:
+        detected = detect_query_language(query_msg.message)
+        if language_name(detected) is not None:
+            translate_target = detected
+    buffered_answer_pieces: list[str] = []
+
     for packet in cast(AnswerObjectIterator, answer.processed_streamed_output):
         # for one-shot flow, don't currently do anything with these
         if isinstance(packet, ToolResponse):
@@ -252,15 +280,46 @@ def stream_answer_objects(
             elif packet.id == SEARCH_DOC_CONTENT_ID:
                 yield packet.response
         else:
+            if (
+                translate_target is not None
+                and isinstance(packet, DanswerAnswerPiece)
+                and packet.answer_piece
+            ):
+                # Hold answer tokens; we'll translate the full answer
+                # at the end of the stream and yield it as one piece.
+                buffered_answer_pieces.append(packet.answer_piece)
+                continue
             yield packet
 
+    # End of stream. If we buffered for translation, do the second LLM
+    # pass now and emit the translated answer as one piece. Use
+    # answer.llm_answer as source-of-truth for the English text — the
+    # processed stream is already cached on the Answer object.
+    translated_answer_text: str | None = None
+    if translate_target is not None:
+        english_answer = answer.llm_answer
+        translated_answer_text = translate_answer_to_language(
+            answer_text=english_answer,
+            target_language_code=translate_target,
+            llm=llm,
+        )
+        yield DanswerAnswerPiece(answer_piece=translated_answer_text)
+        yield DanswerAnswerPiece(answer_piece=None)
+
+    # If we translated, persist the user-facing translated text.
+    final_answer_text = (
+        translated_answer_text
+        if translated_answer_text is not None
+        else answer.llm_answer
+    )
+
     # Saving Gen AI answer and responding with message info
     gen_ai_response_message = create_new_chat_message(
         chat_session_id=chat_session.id,
         parent_message=new_user_message,
         prompt_id=query_req.prompt_id,
-        message=answer.llm_answer,
-        token_count=len(llm_tokenizer(answer.llm_answer)),
+        message=final_answer_text,
+        token_count=len(llm_tokenizer(final_answer_text)),
         message_type=MessageType.ASSISTANT,
         error=None,
         reference_docs=reference_db_search_docs,
diff --git a/backend/danswer/search/pipeline.py b/backend/danswer/search/pipeline.py
index 98b1a87161d..7b2a31db7e2 100644
--- a/backend/danswer/search/pipeline.py
+++ b/backend/danswer/search/pipeline.py
@@ -273,12 +273,24 @@ def retrieved_chunks(self) -> list[InferenceChunk]:
         if self._retrieved_chunks is not None:
             return self._retrieved_chunks
 
+        # Resolve multilingual expansion: persona flag takes precedence,
+        # then global env var. Persona flag means "translate non-English
+        # queries to English for retrieval"; expressed as the string
+        # "English" so the existing expansion plumbing fans out the
+        # query to that language.
+        persona = self.search_request.persona
+        multilingual_expansion_str: str | None
+        if persona is not None and persona.multilingual_query_expansion:
+            multilingual_expansion_str = "English"
+        else:
+            multilingual_expansion_str = MULTILINGUAL_QUERY_EXPANSION
+
         self._retrieved_chunks = retrieve_chunks(
             query=self.search_query,
             document_index=self.document_index,
             db_session=self.db_session,
             hybrid_alpha=self.search_request.hybrid_alpha,
-            multilingual_expansion_str=MULTILINGUAL_QUERY_EXPANSION,
+            multilingual_expansion_str=multilingual_expansion_str,
             retrieval_metrics_callback=self.retrieval_metrics_callback,
         )
 
diff --git a/backend/danswer/secondary_llm_flows/chat_session_naming.py b/backend/danswer/secondary_llm_flows/chat_session_naming.py
index 9449eaded7a..9b02bbb5e27 100644
--- a/backend/danswer/secondary_llm_flows/chat_session_naming.py
+++ b/backend/danswer/secondary_llm_flows/chat_session_naming.py
@@ -15,15 +15,18 @@
 def get_renamed_conversation_name(
     full_history: list[ChatMessage],
     llm: LLM,
+    use_language_hint: bool | None = None,
 ) -> str:
     history_str = combine_message_chain(
         messages=full_history, token_limit=GEN_AI_HISTORY_CUTOFF
     )
 
+    # Persona flag wins; otherwise fall back to the global env var.
+    if use_language_hint is None:
+        use_language_hint = bool(MULTILINGUAL_QUERY_EXPANSION)
+
     language_hint = (
-        f"\n{LANGUAGE_CHAT_NAMING_HINT.strip()}"
-        if bool(MULTILINGUAL_QUERY_EXPANSION)
-        else ""
+        f"\n{LANGUAGE_CHAT_NAMING_HINT.strip()}" if use_language_hint else ""
     )
 
     prompt_msgs = [
diff --git a/backend/danswer/server/features/persona/models.py b/backend/danswer/server/features/persona/models.py
index aee39e72af0..0b7111dc64b 100644
--- a/backend/danswer/server/features/persona/models.py
+++ b/backend/danswer/server/features/persona/models.py
@@ -33,6 +33,9 @@ class CreatePersonaRequest(BaseModel):
     # For Private Personas, who should be able to access these
     users: list[UUID] | None = None
     groups: list[int] | None = None
+    # Translate non-English queries to English for retrieval and
+    # answer in the user's original language. Off by default.
+    multilingual_query_expansion: bool = False
 
 
 class PersonaSnapshot(BaseModel):
@@ -55,6 +58,7 @@ class PersonaSnapshot(BaseModel):
     document_sets: list[DocumentSet]
     users: list[MinimalUserSnapshot]
     groups: list[int]
+    multilingual_query_expansion: bool
 
     @classmethod
     def from_model(
@@ -97,6 +101,7 @@ def from_model(
                 for user in persona.users
             ],
             groups=[user_group.id for user_group in persona.groups],
+            multilingual_query_expansion=persona.multilingual_query_expansion,
         )
 
 
diff --git a/backend/danswer/server/query_and_chat/chat_backend.py b/backend/danswer/server/query_and_chat/chat_backend.py
index 4e5a1bb2138..8236375c19a 100644
--- a/backend/danswer/server/query_and_chat/chat_backend.py
+++ b/backend/danswer/server/query_and_chat/chat_backend.py
@@ -234,7 +234,19 @@ def rename_chat_session(
         # clear thing we can do
         return RenameChatSessionResponse(new_name=full_history[0].message)
 
-    new_name = get_renamed_conversation_name(full_history=full_history, llm=llm)
+    # Honor the persona's multilingual flag so the title is named in the
+    # user's language when the persona is configured for multi-language.
+    chat_session = get_chat_session_by_id(
+        chat_session_id=chat_session_id, user_id=user_id, db_session=db_session
+    )
+    use_language_hint = (
+        chat_session.persona.multilingual_query_expansion
+        if chat_session.persona is not None
+        else None
+    )
+    new_name = get_renamed_conversation_name(
+        full_history=full_history, llm=llm, use_language_hint=use_language_hint
+    )
 
     update_chat_session(
         db_session=db_session,
diff --git a/backend/scripts/test_multilanguage_e2e.py b/backend/scripts/test_multilanguage_e2e.py
new file mode 100644
index 00000000000..4fa2a8db2ea
--- /dev/null
+++ b/backend/scripts/test_multilanguage_e2e.py
@@ -0,0 +1,961 @@
+"""End-to-end integration test for the per-persona multi-language flag.
+
+Drives the real stack (Postgres + Vespa + your configured GenAI provider)
+and verifies that:
+
+  Phase 1  Seeded English docs land in Vespa (BM25 + embedding hits)
+  Phase 2  A Persona with `multilingual_query_expansion=True` retrieves
+           the seeded docs for non-English queries
+  Phase 3  The streamed answer is in the user's original language
+           (script-detection heuristic on Unicode ranges)
+  Phase 4  A control Persona with the flag OFF behaves differently
+           (logged, not asserted — flagged behavior is the contract)
+
+Designed for a developer running the local stack. Uses the existing
+ingestion + chat code paths directly (no HTTP) so it doubles as a
+fast smoke test of the wiring we just added.
+
+DESTRUCTIVE: writes (and on --clean removes) rows + Vespa documents
+prefixed with `__test_multilang__`. Run only against a dev DB.
+
+Usage:
+    cd backend
+    PYTHONPATH=$(pwd) python scripts/test_multilanguage_e2e.py [--yes] [--clean] [--keep-data]
+
+Exits 0 on success, non-zero on the first hard failure. Phase 4 is
+informational only and does not gate exit code.
+"""
+from __future__ import annotations
+
+import argparse
+import logging
+import sys
+import unicodedata
+from dataclasses import dataclass
+
+from sqlalchemy import select
+from sqlalchemy import text
+from sqlalchemy.orm import Session
+
+from danswer.chat.models import DanswerAnswerPiece
+from danswer.chat.models import QADocsResponse
+from danswer.chat.models import StreamingError
+from danswer.chat.process_message import stream_chat_message_objects
+from danswer.configs.constants import DocumentSource
+from danswer.connectors.models import Document
+from danswer.connectors.models import IndexAttemptMetadata
+from danswer.connectors.models import InputType
+from danswer.connectors.models import Section
+from danswer.db.chat import create_chat_session
+from danswer.db.chat import get_or_create_root_message
+from danswer.db.embedding_model import get_current_db_embedding_model
+from danswer.db.engine import get_session_context_manager
+from danswer.db.engine import get_sqlalchemy_engine
+from danswer.db.models import Connector
+from danswer.db.models import ConnectorCredentialPair
+from danswer.db.models import Credential
+from danswer.db.models import Document as DbDocument
+from danswer.db.models import DocumentByConnectorCredentialPair
+from danswer.db.models import Persona
+from danswer.db.models import Tool as ToolDBModel
+from danswer.db.persona import get_default_prompt
+from danswer.db.persona import upsert_persona
+from danswer.tools.search.search_tool import SearchTool
+from danswer.document_index.factory import get_default_document_index
+from danswer.indexing.embedder import DefaultIndexingEmbedder
+from danswer.indexing.indexing_pipeline import build_indexing_pipeline
+from danswer.one_shot_answer.answer_question import get_search_answer
+from danswer.one_shot_answer.models import DirectQARequest
+from danswer.one_shot_answer.models import ThreadMessage
+from danswer.search.enums import OptionalSearchSetting
+from danswer.search.enums import RecencyBiasSetting
+from danswer.search.models import RetrievalDetails
+from danswer.server.query_and_chat.models import CreateChatMessageRequest
+
+
+# Keep the global logger quiet so test output is readable.
+logging.getLogger().setLevel(logging.WARNING)
+
+
+SEED_PREFIX = "__test_multilang__"
+PERSONA_ML_NAME = f"{SEED_PREFIX}persona-multilingual"
+PERSONA_CONTROL_NAME = f"{SEED_PREFIX}persona-control"
+
+
+# ---------------------------------------------------------------------------
+# Seed corpus — three facts, each in a distinct doc, all in English.
+# Designed so retrieval recall is unambiguous: each query maps cleanly
+# to exactly one doc.
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class SeedDoc:
+    doc_id: str
+    title: str
+    body: str
+    # The entity the query asks about (kept stable across translations
+    # so we can verify the right doc was retrieved by checking the
+    # answer's content for this string).
+    expected_entity: str
+
+
+# NOTE on entity naming: we deliberately use a fictitious-but-unique
+# brand ("Zorblax") in seed docs so the queries do not collide with any
+# real entity in the host's existing corpus (Salesforce accounts, Slack
+# threads, etc.). When a generic name like "Acme Corp" is used, the
+# retriever's history rephrase + multilingual translation can produce
+# ambiguous fragments that match unrelated docs, and the answer LLM
+# hedges. The unique brand keeps the right doc dominant.
+SEED_CORPUS: list[SeedDoc] = [
+    SeedDoc(
+        doc_id=f"{SEED_PREFIX}doc-vacation-policy",
+        title="Zorblax Vacation Policy",
+        body=(
+            "All Zorblax full-time employees are entitled to 25 paid "
+            "vacation days per calendar year. Vacation days do not roll "
+            "over to the following year. Requests must be submitted at "
+            "least two weeks in advance through the Zorblax HR portal."
+        ),
+        expected_entity="25",
+    ),
+    SeedDoc(
+        doc_id=f"{SEED_PREFIX}doc-vpn-setup",
+        title="Zorblax VPN Setup Guide",
+        body=(
+            "To connect to the Zorblax VPN, install the GlobalProtect "
+            "client from the IT self-service portal. Use your corporate "
+            "email as the username and your single sign-on password. "
+            "The Zorblax gateway URL is vpn.zorblax.example.com."
+        ),
+        expected_entity="GlobalProtect",
+    ),
+    SeedDoc(
+        doc_id=f"{SEED_PREFIX}doc-printer-help",
+        title="Zorblax Office Printer Troubleshooting",
+        body=(
+            "If the Zorblax office printer is not responding, first "
+            "check the network cable and power. The Zorblax printer's "
+            "IP address is 10.20.30.40. To reset the print queue, open "
+            "the Printers control panel and select 'Cancel All "
+            "Documents'."
+        ),
+        expected_entity="10.20.30.40",
+    ),
+]
+
+
+# ---------------------------------------------------------------------------
+# Test queries — each language asks the same questions about the seeded
+# English docs. The translations are deliberately straightforward so the
+# LLM rephrase has a fair chance.
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class LanguageCase:
+    code: str  # ISO-ish for display
+    label: str
+    queries: list[tuple[str, SeedDoc]]  # (query_text, expected_doc)
+
+
+CASES: list[LanguageCase] = [
+    LanguageCase(
+        code="en",
+        label="English",
+        queries=[
+            ("How many vacation days do Zorblax employees get?", SEED_CORPUS[0]),
+            ("How do I connect to the Zorblax VPN?", SEED_CORPUS[1]),
+            ("What is the IP address of the Zorblax office printer?", SEED_CORPUS[2]),
+        ],
+    ),
+    LanguageCase(
+        code="ja",
+        label="Japanese",
+        queries=[
+            ("Zorblaxの従業員は何日の有給休暇が取れますか?", SEED_CORPUS[0]),
+            ("ZorblaxのVPNに接続するにはどうすればいいですか?", SEED_CORPUS[1]),
+            ("Zorblaxのオフィスプリンタの IPアドレスは何ですか?", SEED_CORPUS[2]),
+        ],
+    ),
+    LanguageCase(
+        code="zh",
+        label="Chinese",
+        queries=[
+            ("Zorblax 公司的员工每年有多少天带薪休假?", SEED_CORPUS[0]),
+            ("如何连接 Zorblax 公司的 VPN?", SEED_CORPUS[1]),
+            ("Zorblax 办公室打印机的 IP 地址是多少?", SEED_CORPUS[2]),
+        ],
+    ),
+    LanguageCase(
+        code="ko",
+        label="Korean",
+        queries=[
+            ("Zorblax 직원은 연간 며칠의 유급 휴가를 받을 수 있나요?", SEED_CORPUS[0]),
+            ("Zorblax의 VPN에 어떻게 접속하나요?", SEED_CORPUS[1]),
+            ("Zorblax 사무실 프린터의 IP 주소는 무엇인가요?", SEED_CORPUS[2]),
+        ],
+    ),
+]
+
+
+# ---------------------------------------------------------------------------
+# Output helpers
+# ---------------------------------------------------------------------------
+
+
+_PASS = "\033[32mPASS\033[0m"
+_FAIL = "\033[31mFAIL\033[0m"
+_INFO = "\033[33mINFO\033[0m"
+
+
+def section(title: str) -> None:
+    print(f"\n=== {title} ===")
+
+
+def ok(msg: str) -> None:
+    print(f"  [{_PASS}] {msg}")
+
+
+def fail(msg: str) -> None:
+    print(f"  [{_FAIL}] {msg}")
+
+
+def info(msg: str) -> None:
+    print(f"  [{_INFO}] {msg}")
+
+
+# ---------------------------------------------------------------------------
+# Language detection — heuristic, by Unicode block dominance.
+# ---------------------------------------------------------------------------
+
+
+def detect_language(text: str) -> str:
+    """Returns one of: 'ja', 'zh', 'ko', 'en', 'mixed/other'.
+
+    Heuristic: count code points by script. If >= 5% Hiragana/Katakana,
+    call it Japanese (kanji alone could be Japanese or Chinese, so
+    presence of kana disambiguates). Else if >= 5% Hangul → Korean.
+    Else if >= 5% CJK ideographs → Chinese. Else if mostly ASCII letters
+    → English. Else 'mixed/other'.
+    """
+    if not text:
+        return "mixed/other"
+    counts = {"hiragana_katakana": 0, "hangul": 0, "cjk": 0, "ascii_letter": 0}
+    total_letters = 0
+    for ch in text:
+        cp = ord(ch)
+        if (0x3040 <= cp <= 0x309F) or (0x30A0 <= cp <= 0x30FF):
+            counts["hiragana_katakana"] += 1
+            total_letters += 1
+        elif 0xAC00 <= cp <= 0xD7AF:
+            counts["hangul"] += 1
+            total_letters += 1
+        elif (0x4E00 <= cp <= 0x9FFF) or (0x3400 <= cp <= 0x4DBF):
+            counts["cjk"] += 1
+            total_letters += 1
+        elif unicodedata.category(ch).startswith("L"):
+            # Latin-script letter (etc.)
+            counts["ascii_letter"] += 1
+            total_letters += 1
+    if total_letters == 0:
+        return "mixed/other"
+    threshold = max(1, total_letters // 20)  # 5%
+    if counts["hiragana_katakana"] >= threshold:
+        return "ja"
+    if counts["hangul"] >= threshold:
+        return "ko"
+    if counts["cjk"] >= threshold:
+        return "zh"
+    if counts["ascii_letter"] >= total_letters * 0.7:
+        return "en"
+    return "mixed/other"
+
+
+# ---------------------------------------------------------------------------
+# Setup: connector / credential / cc-pair / docs
+# ---------------------------------------------------------------------------
+
+
+def confirm_destructive(skip: bool) -> None:
+    engine = get_sqlalchemy_engine()
+    url = engine.url
+    safe_url = f"{url.drivername}://{url.username}@{url.host}:{url.port}/{url.database}"
+    if skip:
+        print(f"[--yes] Proceeding against {safe_url}")
+        return
+    print(f"This script writes/deletes tagged ({SEED_PREFIX!r}) data in:")
+    print(f"  {safe_url}")
+    print("It also indexes a small set of test docs into Vespa.")
+    answer = input("Type 'yes' to continue: ")
+    if answer.strip().lower() != "yes":
+        print("Aborted.")
+        sys.exit(1)
+
+
+def get_or_create_test_cc_pair(db: Session) -> ConnectorCredentialPair:
+    existing = db.execute(
+        select(ConnectorCredentialPair).where(
+            ConnectorCredentialPair.name == f"{SEED_PREFIX}ccp"
+        )
+    ).scalar_one_or_none()
+    if existing is not None:
+        return existing
+
+    connector = Connector(
+        name=f"{SEED_PREFIX}connector",
+        source=DocumentSource.INGESTION_API,
+        input_type=InputType.LOAD_STATE,
+        connector_specific_config={"_test_multilang": True},
+        refresh_freq=None,
+        disabled=False,
+    )
+    credential = Credential(admin_public=True, credential_json={})
+    db.add_all([connector, credential])
+    db.flush()
+    ccp = ConnectorCredentialPair(
+        connector_id=connector.id,
+        credential_id=credential.id,
+        name=f"{SEED_PREFIX}ccp",
+        is_public=True,
+        total_docs_indexed=0,
+    )
+    db.add(ccp)
+    db.commit()
+    return ccp
+
+
+def seed_vespa_docs(db: Session, ccp: ConnectorCredentialPair) -> int:
+    """Push the SEED_CORPUS through the real indexing pipeline so they
+    land in Vespa with embeddings + BM25. Returns the number indexed."""
+    embedding_model = get_current_db_embedding_model(db)
+    document_index = get_default_document_index(
+        primary_index_name=embedding_model.index_name,
+        secondary_index_name=None,
+    )
+
+    embedder = DefaultIndexingEmbedder(
+        model_name=embedding_model.model_name,
+        normalize=embedding_model.normalize,
+        query_prefix=embedding_model.query_prefix,
+        passage_prefix=embedding_model.passage_prefix,
+    )
+
+    pipeline = build_indexing_pipeline(
+        embedder=embedder,
+        document_index=document_index,
+        ignore_time_skip=True,
+        db_session=db,
+    )
+
+    docs = [
+        Document(
+            id=sd.doc_id,
+            sections=[Section(text=f"{sd.title}\n\n{sd.body}", link=None)],
+            source=DocumentSource.INGESTION_API,
+            semantic_identifier=sd.title,
+            metadata={"_test_multilang": "true"},
+            from_ingestion_api=True,
+        )
+        for sd in SEED_CORPUS
+    ]
+
+    new_doc, chunks = pipeline(
+        documents=docs,
+        index_attempt_metadata=IndexAttemptMetadata(
+            connector_id=ccp.connector_id,
+            credential_id=ccp.credential_id,
+        ),
+    )
+    return new_doc
+
+
+# ---------------------------------------------------------------------------
+# Persona helpers
+# ---------------------------------------------------------------------------
+
+
+def upsert_test_persona(db: Session, name: str, multilingual: bool) -> Persona:
+    default_prompt = get_default_prompt(db)
+    # Without the SearchTool attached, the chat flow has nothing to
+    # retrieve with — the LLM falls back to its training knowledge and
+    # never sees our seeded docs. Look it up by in_code_tool_id so the
+    # test isn't tied to a hardcoded id.
+    search_tool_row = db.execute(
+        select(ToolDBModel).where(ToolDBModel.in_code_tool_id == SearchTool.__name__)
+    ).scalar_one_or_none()
+    if search_tool_row is None:
+        raise RuntimeError(
+            "Built-in SearchTool not found in DB; ensure api-server has "
+            "started at least once so it can seed in-code tools."
+        )
+    persona = upsert_persona(
+        user=None,
+        name=name,
+        description=f"{SEED_PREFIX} persona for multilingual e2e test",
+        num_chunks=10,
+        llm_relevance_filter=False,
+        llm_filter_extraction=False,
+        recency_bias=RecencyBiasSetting.BASE_DECAY,
+        llm_model_provider_override=None,
+        llm_model_version_override=None,
+        starter_messages=None,
+        is_public=True,
+        prompt_ids=[default_prompt.id],
+        document_set_ids=[],
+        tool_ids=[search_tool_row.id],
+        multilingual_query_expansion=multilingual,
+        db_session=db,
+    )
+    return persona
+
+
+# ---------------------------------------------------------------------------
+# Drive a single chat-message call and collect what we need.
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class ChatProbeResult:
+    answer_text: str
+    retrieved_doc_ids: list[str]
+    retrieved_titles: list[str]
+    error: str | None
+
+
+def probe_chat(persona: Persona, query: str) -> ChatProbeResult:
+    """One-shot: create chat session, send message, drain the stream."""
+    with get_session_context_manager() as db_session:
+        chat_session = create_chat_session(
+            db_session=db_session,
+            description=f"{SEED_PREFIX}probe",
+            user_id=None,
+            persona_id=persona.id,
+        )
+        root = get_or_create_root_message(
+            chat_session_id=chat_session.id, db_session=db_session
+        )
+
+        req = CreateChatMessageRequest(
+            chat_session_id=chat_session.id,
+            parent_message_id=root.id,
+            message=query,
+            file_descriptors=[],
+            prompt_id=None,
+            search_doc_ids=None,
+            retrieval_options=RetrievalDetails(
+                run_search=OptionalSearchSetting.ALWAYS, real_time=True
+            ),
+        )
+
+        answer_pieces: list[str] = []
+        retrieved_doc_ids: list[str] = []
+        retrieved_titles: list[str] = []
+        error: str | None = None
+
+        try:
+            for obj in stream_chat_message_objects(
+                new_msg_req=req,
+                user=None,
+                db_session=db_session,
+            ):
+                if isinstance(obj, DanswerAnswerPiece):
+                    if obj.answer_piece:
+                        answer_pieces.append(obj.answer_piece)
+                elif isinstance(obj, QADocsResponse):
+                    for d in obj.top_documents or []:
+                        retrieved_doc_ids.append(d.document_id)
+                        retrieved_titles.append(d.semantic_identifier or "")
+                elif isinstance(obj, StreamingError):
+                    error = obj.error
+        except Exception as exc:
+            error = f"{type(exc).__name__}: {exc}"
+
+        return ChatProbeResult(
+            answer_text="".join(answer_pieces).strip(),
+            retrieved_doc_ids=retrieved_doc_ids,
+            retrieved_titles=retrieved_titles,
+            error=error,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Cleanup
+# ---------------------------------------------------------------------------
+
+
+def cleanup(db: Session) -> None:
+    section("Cleanup")
+    # FK dependency graph (collected via pg_constraint):
+    #   chat_message__search_doc -> chat_message
+    #   tool_call -> chat_message
+    #   chat_feedback -> chat_message
+    #   document_retrieval_feedback -> chat_message
+    #   chat_message -> chat_session
+    #   chat_session -> persona
+    # We must drop dependents before parents. Use raw SQL — much
+    # cleaner than walking the ORM for a destructive teardown.
+    #
+    # Match scope: any chat session whose description carries our
+    # SEED_PREFIX *or* whose persona is one of our test personas.
+    # That covers prior aborted runs, runs that crashed mid-test, and
+    # the case where the chat UI was used to talk to our test persona.
+    db.execute(
+        text(
+            """
+            CREATE TEMP TABLE _ml_test_sessions ON COMMIT DROP AS
+            SELECT cs.id
+            FROM chat_session cs
+            WHERE cs.description LIKE :prefix
+               OR cs.persona_id IN (
+                   SELECT id FROM persona WHERE name LIKE :prefix
+               );
+            """
+        ),
+        {"prefix": f"{SEED_PREFIX}%"},
+    )
+    db.execute(
+        text(
+            """
+            CREATE TEMP TABLE _ml_test_messages ON COMMIT DROP AS
+            SELECT id FROM chat_message
+            WHERE chat_session_id IN (SELECT id FROM _ml_test_sessions);
+            """
+        )
+    )
+
+    deleted_msdoc = db.execute(
+        text(
+            """
+            DELETE FROM chat_message__search_doc
+            WHERE chat_message_id IN (SELECT id FROM _ml_test_messages);
+            """
+        )
+    ).rowcount
+    deleted_toolcall = db.execute(
+        text(
+            """
+            DELETE FROM tool_call
+            WHERE message_id IN (SELECT id FROM _ml_test_messages);
+            """
+        )
+    ).rowcount
+    deleted_cfeedback = db.execute(
+        text(
+            """
+            DELETE FROM chat_feedback
+            WHERE chat_message_id IN (SELECT id FROM _ml_test_messages);
+            """
+        )
+    ).rowcount
+    deleted_drfeedback = db.execute(
+        text(
+            """
+            DELETE FROM document_retrieval_feedback
+            WHERE chat_message_id IN (SELECT id FROM _ml_test_messages);
+            """
+        )
+    ).rowcount
+    deleted_msgs = db.execute(
+        text(
+            """
+            DELETE FROM chat_message
+            WHERE id IN (SELECT id FROM _ml_test_messages);
+            """
+        )
+    ).rowcount
+    deleted_sessions = db.execute(
+        text(
+            """
+            DELETE FROM chat_session
+            WHERE id IN (SELECT id FROM _ml_test_sessions);
+            """
+        )
+    ).rowcount
+
+    info(
+        f"deleted {deleted_sessions} chat session(s), {deleted_msgs} "
+        f"message(s); cascaded: msg__search_doc={deleted_msdoc}, "
+        f"tool_call={deleted_toolcall}, chat_feedback={deleted_cfeedback}, "
+        f"document_retrieval_feedback={deleted_drfeedback}"
+    )
+
+    # Personas (now safe to drop — no chat session points at them).
+    personas = (
+        db.execute(select(Persona).where(Persona.name.like(f"{SEED_PREFIX}%")))
+        .scalars()
+        .all()
+    )
+    for p in personas:
+        db.delete(p)
+    info(f"deleted {len(personas)} test persona(s)")
+
+    # Documents (Postgres rows; Vespa cleanup is best-effort below)
+    db_docs = (
+        db.execute(select(DbDocument).where(DbDocument.id.like(f"{SEED_PREFIX}%")))
+        .scalars()
+        .all()
+    )
+    for d in db_docs:
+        db.execute(
+            DocumentByConnectorCredentialPair.__table__.delete().where(
+                DocumentByConnectorCredentialPair.id == d.id
+            )
+        )
+        db.delete(d)
+    info(f"deleted {len(db_docs)} document row(s)")
+
+    # cc-pair, connector, credential
+    ccp = db.execute(
+        select(ConnectorCredentialPair).where(
+            ConnectorCredentialPair.name == f"{SEED_PREFIX}ccp"
+        )
+    ).scalar_one_or_none()
+    if ccp is not None:
+        connector_id = ccp.connector_id
+        credential_id = ccp.credential_id
+        db.delete(ccp)
+        connector = db.get(Connector, connector_id)
+        if connector is not None:
+            db.delete(connector)
+        credential = db.get(Credential, credential_id)
+        if credential is not None:
+            db.delete(credential)
+        info("deleted test cc-pair / connector / credential")
+
+    db.commit()
+    info(
+        "Vespa: tagged test docs intentionally left in the index "
+        "(deletion goes through the connector framework). Re-running "
+        "this test reindexes them in place."
+    )
+
+
+# ---------------------------------------------------------------------------
+# Phases
+# ---------------------------------------------------------------------------
+
+
+def phase_setup(db: Session) -> tuple[ConnectorCredentialPair, Persona, Persona]:
+    section("Phase 1 — setup test fixtures")
+    ccp = get_or_create_test_cc_pair(db)
+    ok(f"cc-pair {ccp.id} ({ccp.name}) ready")
+    n_indexed = seed_vespa_docs(db, ccp)
+    if n_indexed != len(SEED_CORPUS):
+        # n_indexed is the count of *new* docs, so a re-run yields 0.
+        info(
+            f"indexing pipeline reported {n_indexed} new docs "
+            f"(re-runs reindex existing docs in place)"
+        )
+    ok(f"seeded {len(SEED_CORPUS)} English doc(s) into Vespa")
+
+    persona_ml = upsert_test_persona(db, PERSONA_ML_NAME, multilingual=True)
+    persona_ctrl = upsert_test_persona(db, PERSONA_CONTROL_NAME, multilingual=False)
+    ok(
+        f"persona [{persona_ml.name}] id={persona_ml.id}, "
+        f"multilingual_query_expansion={persona_ml.multilingual_query_expansion}"
+    )
+    ok(
+        f"persona [{persona_ctrl.name}] id={persona_ctrl.id}, "
+        f"multilingual_query_expansion={persona_ctrl.multilingual_query_expansion}"
+    )
+    return ccp, persona_ml, persona_ctrl
+
+
+def phase_english_baseline(persona_ml: Persona) -> bool:
+    section("Phase 2 — English baseline (sanity check)")
+    case = next(c for c in CASES if c.code == "en")
+    failures = 0
+    for query, expected_doc in case.queries:
+        result = probe_chat(persona_ml, query)
+        if result.error:
+            fail(f"[en] '{query[:60]}' streaming error: {result.error}")
+            failures += 1
+            continue
+        if expected_doc.doc_id in result.retrieved_doc_ids:
+            ok(f"[en] retrieval hit expected doc for: '{query[:60]}'")
+        else:
+            fail(f"[en] expected doc NOT in top docs for: '{query[:60]}'")
+            info(f"      retrieved: {result.retrieved_titles[:3]}")
+            failures += 1
+        # Sanity: did the expected entity appear in the answer?
+        if expected_doc.expected_entity.lower() in result.answer_text.lower():
+            ok(
+                f"[en] answer contains expected entity "
+                f"'{expected_doc.expected_entity}'"
+            )
+        else:
+            info(
+                f"[en] answer does NOT contain '{expected_doc.expected_entity}' "
+                f"(LLM may have paraphrased; check manually). "
+                f"Answer head: {result.answer_text[:120]!r}"
+            )
+    return failures == 0
+
+
+def phase_non_english(persona_ml: Persona) -> bool:
+    """Hard contract for the persona flag: when on, non-English queries
+    must (a) translate-for-retrieval so the right English doc is found,
+    (b) the answer must contain the factual entity from that doc
+    (numeric / proper-noun entities survive translation), AND (c) the
+    final answer text is in the user's language. (c) is enforced by the
+    post-translation pass in process_message.py — the answering LLM
+    might still produce English internally, but the second pass
+    translates that to the user's language before we yield it."""
+    section("Phase 3 — non-English queries with multilingual flag ON")
+    failures = 0
+    lang_match = 0
+    lang_total = 0
+    for case in CASES:
+        if case.code == "en":
+            continue
+        for query, expected_doc in case.queries:
+            result = probe_chat(persona_ml, query)
+            if result.error:
+                fail(
+                    f"[{case.code}] '{query[:60]}' streaming error: " f"{result.error}"
+                )
+                failures += 1
+                continue
+
+            # 3a — retrieval brought back the right English doc.
+            # This proves the persona flag wired translate-to-English
+            # into retrieval.
+            if expected_doc.doc_id in result.retrieved_doc_ids:
+                ok(f"[{case.code}] retrieval hit expected doc for: " f"'{query[:60]}'")
+            else:
+                fail(
+                    f"[{case.code}] expected doc NOT in top docs for: "
+                    f"'{query[:60]}'"
+                )
+                info(f"      retrieved: {result.retrieved_titles[:3]}")
+                failures += 1
+
+            # 3b — answer contains the expected entity. Entities are
+            # numerals / proper nouns that survive translation, so the
+            # LLM should keep them verbatim regardless of output
+            # language. This is the strongest correctness signal.
+            if expected_doc.expected_entity.lower() in result.answer_text.lower():
+                ok(
+                    f"[{case.code}] answer contains expected entity "
+                    f"'{expected_doc.expected_entity}'"
+                )
+            else:
+                fail(
+                    f"[{case.code}] answer missing entity "
+                    f"'{expected_doc.expected_entity}'. Answer head: "
+                    f"{result.answer_text[:120]!r}"
+                )
+                failures += 1
+
+            # 3c — answer is in the user's language. Now a hard
+            # assertion because the post-translation pass guarantees
+            # this regardless of the answering LLM's behavior. If the
+            # detected language doesn't match, either the post-pass
+            # was not invoked (wiring bug) or it returned the English
+            # fallback (translate LLM call failed).
+            detected = detect_language(result.answer_text)
+            lang_total += 1
+            if detected == case.code:
+                lang_match += 1
+                ok(f"[{case.code}] answer language: {detected}")
+            else:
+                fail(
+                    f"[{case.code}] expected {case.code} answer, detected "
+                    f"{detected}. Answer head: {result.answer_text[:200]!r}"
+                )
+                failures += 1
+    info(
+        f"language-match summary: {lang_match}/{lang_total} non-English "
+        f"answers came back in the user's language"
+    )
+    return failures == 0
+
+
+def probe_slack(persona: Persona, query: str) -> ChatProbeResult:
+    """Drive the one-shot answer path that the Slack listener uses.
+    `get_search_answer` runs the same Answer pipeline as chat but with
+    its own retry loop and citation enforcement."""
+    with get_session_context_manager() as db_session:
+        req = DirectQARequest(
+            messages=[ThreadMessage(message=query, sender=None)],
+            prompt_id=None,
+            persona_id=persona.id,
+            retrieval_options=RetrievalDetails(
+                run_search=OptionalSearchSetting.ALWAYS, real_time=True
+            ),
+        )
+        try:
+            response = get_search_answer(
+                query_req=req,
+                user=None,
+                max_document_tokens=None,
+                max_history_tokens=None,
+                db_session=db_session,
+                use_citations=True,
+                danswerbot_flow=True,
+            )
+        except Exception as exc:
+            return ChatProbeResult(
+                answer_text="",
+                retrieved_doc_ids=[],
+                retrieved_titles=[],
+                error=f"{type(exc).__name__}: {exc}",
+            )
+
+        retrieved_doc_ids: list[str] = []
+        retrieved_titles: list[str] = []
+        if response.docs and response.docs.top_documents:
+            for d in response.docs.top_documents:
+                retrieved_doc_ids.append(d.document_id)
+                retrieved_titles.append(d.semantic_identifier or "")
+        return ChatProbeResult(
+            answer_text=(response.answer or "").strip(),
+            retrieved_doc_ids=retrieved_doc_ids,
+            retrieved_titles=retrieved_titles,
+            error=response.error_msg,
+        )
+
+
+def phase_slack(persona_ml: Persona) -> bool:
+    """Smoke test for the Slack one-shot path. Same hard contract as
+    Phase 3 (retrieval hit + entity in answer + answer in user's
+    language), but driven through `get_search_answer` — the function
+    the slack listener calls."""
+    section("Phase 5 — Slack one-shot path with multilingual flag ON")
+    failures = 0
+    lang_match = 0
+    lang_total = 0
+    for case in CASES:
+        if case.code == "en":
+            continue
+        # One query per language is plenty for a smoke test (each
+        # query takes 2× LLM round-trips: answer + translate).
+        query, expected_doc = case.queries[0]
+        result = probe_slack(persona_ml, query)
+        if result.error:
+            fail(f"[slack {case.code}] '{query[:60]}' error: {result.error}")
+            failures += 1
+            continue
+
+        if expected_doc.doc_id in result.retrieved_doc_ids:
+            ok(f"[slack {case.code}] retrieval hit expected doc")
+        else:
+            fail(
+                f"[slack {case.code}] expected doc NOT in top docs. "
+                f"retrieved: {result.retrieved_titles[:3]}"
+            )
+            failures += 1
+
+        if expected_doc.expected_entity.lower() in result.answer_text.lower():
+            ok(
+                f"[slack {case.code}] answer contains entity "
+                f"'{expected_doc.expected_entity}'"
+            )
+        else:
+            fail(
+                f"[slack {case.code}] answer missing entity "
+                f"'{expected_doc.expected_entity}'. Answer head: "
+                f"{result.answer_text[:120]!r}"
+            )
+            failures += 1
+
+        detected = detect_language(result.answer_text)
+        lang_total += 1
+        if detected == case.code:
+            lang_match += 1
+            ok(f"[slack {case.code}] answer language: {detected}")
+        else:
+            fail(
+                f"[slack {case.code}] expected {case.code}, detected "
+                f"{detected}. Answer head: {result.answer_text[:200]!r}"
+            )
+            failures += 1
+    info(
+        f"slack-path language-match summary: {lang_match}/{lang_total} "
+        f"non-English answers came back in the user's language"
+    )
+    return failures == 0
+
+
+def phase_control(persona_ctrl: Persona) -> None:
+    section("Phase 4 — control: same queries with flag OFF (informational)")
+    for case in CASES:
+        if case.code == "en":
+            continue
+        # Just one query per language is enough to see the contrast.
+        query, expected_doc = case.queries[0]
+        result = probe_chat(persona_ctrl, query)
+        if result.error:
+            info(f"[{case.code}] streaming error: {result.error}")
+            continue
+        retrieved = expected_doc.doc_id in result.retrieved_doc_ids
+        detected = detect_language(result.answer_text)
+        info(
+            f"[{case.code}] flag-OFF persona | retrieval-hit={retrieved} | "
+            f"answer-lang={detected}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--yes",
+        action="store_true",
+        help="Skip the destructive-action confirmation prompt",
+    )
+    parser.add_argument(
+        "--clean", action="store_true", help="Remove tagged test data and exit"
+    )
+    parser.add_argument(
+        "--keep-data",
+        action="store_true",
+        help="Skip cleanup at the end of a successful run",
+    )
+    args = parser.parse_args()
+
+    confirm_destructive(args.yes)
+
+    if args.clean:
+        with get_session_context_manager() as db:
+            cleanup(db)
+        return 0
+
+    overall_ok = True
+    with get_session_context_manager() as db:
+        try:
+            ccp, persona_ml, persona_ctrl = phase_setup(db)
+        except Exception as exc:
+            fail(f"setup failed: {type(exc).__name__}: {exc}")
+            return 2
+
+    if not phase_english_baseline(persona_ml):
+        overall_ok = False
+
+    if not phase_non_english(persona_ml):
+        overall_ok = False
+
+    if not phase_slack(persona_ml):
+        overall_ok = False
+
+    phase_control(persona_ctrl)
+
+    if not args.keep_data:
+        with get_session_context_manager() as db:
+            cleanup(db)
+
+    print()
+    if overall_ok:
+        print(f"[{_PASS}] multi-language e2e: all hard assertions passed")
+        return 0
+    print(f"[{_FAIL}] multi-language e2e: see failures above")
+    return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/web/src/app/admin/assistants/AssistantEditor.tsx b/web/src/app/admin/assistants/AssistantEditor.tsx
index c58cdcdadf9..4c4e7152bb8 100644
--- a/web/src/app/admin/assistants/AssistantEditor.tsx
+++ b/web/src/app/admin/assistants/AssistantEditor.tsx
@@ -178,6 +178,8 @@ export function AssistantEditor({
     num_chunks: existingPersona?.num_chunks ?? null,
     include_citations: existingPersona?.prompts[0]?.include_citations ?? true,
     llm_relevance_filter: existingPersona?.llm_relevance_filter ?? false,
+    multilingual_query_expansion:
+      existingPersona?.multilingual_query_expansion ?? false,
     llm_model_provider_override:
       existingPersona?.llm_model_provider_override ?? null,
     llm_model_version_override:
@@ -213,6 +215,7 @@ export function AssistantEditor({
             num_chunks: Yup.number().nullable(),
             include_citations: Yup.boolean().required(),
             llm_relevance_filter: Yup.boolean().required(),
+            multilingual_query_expansion: Yup.boolean().required(),
             llm_model_version_override: Yup.string().nullable(),
             llm_model_provider_override: Yup.string().nullable(),
             starter_messages: Yup.array().of(
@@ -580,6 +583,14 @@ export function AssistantEditor({
                                     }
                                   />
 
+                                  <BooleanFormField
+                                    name="multilingual_query_expansion"
+                                    label="Enable multi-language support"
+                                    subtext={
+                                      "If enabled, non-English questions will be translated to English before retrieval, and the assistant will reply in the user's original language. Adds one extra LLM call per non-English query — leave off if your traffic is mostly English."
+                                    }
+                                  />
+
                                   <BooleanFormField
                                     name="include_citations"
                                     label="Include Citations"
@@ -952,4 +963,4 @@ export function AssistantEditor({
       </Formik>
     </div>
   );
-}
\ No newline at end of file
+}
diff --git a/web/src/app/admin/assistants/interfaces.ts b/web/src/app/admin/assistants/interfaces.ts
index 0a06ac4cc82..5c9869f78d7 100644
--- a/web/src/app/admin/assistants/interfaces.ts
+++ b/web/src/app/admin/assistants/interfaces.ts
@@ -38,4 +38,5 @@ export interface Persona {
   default_persona: boolean;
   users: MinimalUserSnapshot[];
   groups: number[];
+  multilingual_query_expansion?: boolean;
 }
diff --git a/web/src/app/admin/assistants/lib.ts b/web/src/app/admin/assistants/lib.ts
index 4d42789d810..4eadb98f243 100644
--- a/web/src/app/admin/assistants/lib.ts
+++ b/web/src/app/admin/assistants/lib.ts
@@ -16,6 +16,7 @@ interface PersonaCreationRequest {
   users?: string[];
   groups: number[];
   tool_ids: number[]; // Added tool_ids to the interface
+  multilingual_query_expansion: boolean;
 }
 
 interface PersonaUpdateRequest {
@@ -36,6 +37,7 @@ interface PersonaUpdateRequest {
   users?: string[];
   groups: number[];
   tool_ids: number[]; // Added tool_ids to the interface
+  multilingual_query_expansion: boolean;
 }
 
 function promptNameFromPersonaName(personaName: string) {
@@ -110,6 +112,7 @@ function buildPersonaAPIBody(
     groups,
     users,
     tool_ids, // Added tool_ids to the destructuring
+    multilingual_query_expansion,
   } = creationRequest;
 
   return {
@@ -128,6 +131,7 @@ function buildPersonaAPIBody(
     users,
     groups,
     tool_ids, // Added tool_ids to the return object
+    multilingual_query_expansion,
   };
 }
 
diff --git a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx
index 55497149ea0..032e47aad00 100644
--- a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx
+++ b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx
@@ -167,8 +167,21 @@ export const SlackBotCreationForm = ({
             }),
             curated_response_config: Yup.object().shape({
               enable_curated_response_integration: Yup.boolean().required(),
-              response_message: Yup.string().required(
-                "Response message is required when curated response integration is enabled"
+              // Mirror jira_config: only require this when the integration
+              // is enabled. Without the .when() guard the field is required
+              // unconditionally, but the UI hides the input when the toggle
+              // is off — Formik silently rejects submit and no error is
+              // visible since the (errored) field isn't on screen.
+              response_message: Yup.string().when(
+                "enable_curated_response_integration",
+                {
+                  is: true,
+                  then: (schema) =>
+                    schema.required(
+                      "Response message is required when curated response integration is enabled"
+                    ),
+                  otherwise: (schema) => schema.notRequired(),
+                }
               ),
             }),
             jira_title_filter: Yup.array()
diff --git a/web/src/components/table/DragHandle.tsx b/web/src/components/table/DragHandle.tsx
index a288d58b46c..3bebed22822 100644
--- a/web/src/components/table/DragHandle.tsx
+++ b/web/src/components/table/DragHandle.tsx
@@ -1,13 +1,14 @@
 import React from "react";
 import { MdDragIndicator } from "react-icons/md";
 
-export const DragHandle = (props: any) => {
+export const DragHandle = ({ isDragging, ...rest }: any) => {
+  // `isDragging` is a logical prop from @dnd-kit/sortable; pull it
+  // out before spreading so React doesn't warn about an unknown DOM
+  // attribute on the div.
   return (
     <div
-      className={
-        props.isDragging ? "hover:cursor-grabbing" : "hover:cursor-grab"
-      }
-      {...props}
+      className={isDragging ? "hover:cursor-grabbing" : "hover:cursor-grab"}
+      {...rest}
     >
       <MdDragIndicator />
     </div>