From 3ccd108867fa92925727bab9413ae53d8c14aabf Mon Sep 17 00:00:00 2001 From: Onur Yilmaz Date: Wed, 27 May 2026 16:30:04 -0400 Subject: [PATCH 1/2] Fix tokenizer issue with chat template Signed-off-by: Onur Yilmaz --- nemo_deploy/llm/megatronllm_deployable.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/nemo_deploy/llm/megatronllm_deployable.py b/nemo_deploy/llm/megatronllm_deployable.py index dc3a3786f..ccbeddb12 100755 --- a/nemo_deploy/llm/megatronllm_deployable.py +++ b/nemo_deploy/llm/megatronllm_deployable.py @@ -233,6 +233,14 @@ def apply_chat_template(self, messages, add_generation_prompt=True): # Some tokenizers might not have bos_token, use empty string as fallback bos_token = "" + # Try to get eos_token - many chat templates reference it + eos_token = None + try: + eos_token = self.mcore_tokenizer._tokenizer.eos_token + except AttributeError: + # Some tokenizers might not have eos_token, use empty string as fallback + eos_token = "" + # Check if chat_template is None or empty if tokenizer_chat_template is None: raise ValueError( @@ -251,6 +259,7 @@ def apply_chat_template(self, messages, add_generation_prompt=True): rendered_output = template.render( messages=messages, bos_token=bos_token, + eos_token=eos_token, add_generation_prompt=add_generation_prompt, ) From 307a32f9b4e3ea631832a846f5a9182377fb1f92 Mon Sep 17 00:00:00 2001 From: Onur Yilmaz Date: Fri, 29 May 2026 10:38:09 -0400 Subject: [PATCH 2/2] Add tests for chat template Signed-off-by: Onur Yilmaz --- .../deploy/test_megatronllm_deployable.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tests/unit_tests/deploy/test_megatronllm_deployable.py b/tests/unit_tests/deploy/test_megatronllm_deployable.py index 4fa3466be..af6902770 100644 --- a/tests/unit_tests/deploy/test_megatronllm_deployable.py +++ b/tests/unit_tests/deploy/test_megatronllm_deployable.py @@ -466,6 +466,52 @@ def test_apply_chat_template(deployable): template_mock.render.assert_called_once() +@pytest.mark.run_only_on("GPU") +def test_apply_chat_template_passes_eos_token(deployable): + """Test that eos_token is forwarded to template.render.""" + deployable.mcore_tokenizer._tokenizer.eos_token = "" + messages = [{"role": "user", "content": "Hello"}] + + template_mock = MagicMock() + template_mock.render.return_value = "rendered" + + with patch("nemo_deploy.llm.megatronllm_deployable.Template", return_value=template_mock): + deployable.apply_chat_template(messages) + + call_kwargs = template_mock.render.call_args[1] + assert call_kwargs["eos_token"] == "" + assert call_kwargs["bos_token"] == "" + + +@pytest.mark.run_only_on("GPU") +def test_apply_chat_template_eos_token_fallback(deployable): + """Test eos_token falls back to empty string when tokenizer lacks the attribute.""" + del deployable.mcore_tokenizer._tokenizer.eos_token + messages = [{"role": "user", "content": "Hello"}] + + template_mock = MagicMock() + template_mock.render.return_value = "rendered" + + with patch("nemo_deploy.llm.megatronllm_deployable.Template", return_value=template_mock): + deployable.apply_chat_template(messages) + + call_kwargs = template_mock.render.call_args[1] + assert call_kwargs["eos_token"] == "" + + +@pytest.mark.run_only_on("GPU") +def test_apply_chat_template_renders_template_using_eos_token(deployable): + """Regression test: chat templates that reference eos_token must render without UndefinedError.""" + deployable.mcore_tokenizer._tokenizer.chat_template = ( + "{{ bos_token }}{% for m in messages %}{{ m['content'] }}{{ eos_token }}{% endfor %}" + ) + messages = [{"role": "user", "content": "Hello"}] + + rendered = deployable.apply_chat_template(messages) + + assert rendered == "Hello" + + @pytest.mark.run_only_on("GPU") def test_remove_eos_token(deployable): """Test EOS token removal covering all code paths."""