Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions nemo_deploy/llm/megatronllm_deployable.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,14 @@ def apply_chat_template(self, messages, add_generation_prompt=True):
# Some tokenizers might not have bos_token, use empty string as fallback
bos_token = ""

# Try to get eos_token - many chat templates reference it
eos_token = None
try:
eos_token = self.mcore_tokenizer._tokenizer.eos_token
except AttributeError:
# Some tokenizers might not have eos_token, use empty string as fallback
eos_token = ""

# Check if chat_template is None or empty
if tokenizer_chat_template is None:
raise ValueError(
Expand All @@ -251,6 +259,7 @@ def apply_chat_template(self, messages, add_generation_prompt=True):
rendered_output = template.render(
messages=messages,
bos_token=bos_token,
eos_token=eos_token,
add_generation_prompt=add_generation_prompt,
)

Expand Down
46 changes: 46 additions & 0 deletions tests/unit_tests/deploy/test_megatronllm_deployable.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,52 @@ def test_apply_chat_template(deployable):
template_mock.render.assert_called_once()


@pytest.mark.run_only_on("GPU")
def test_apply_chat_template_passes_eos_token(deployable):
"""Test that eos_token is forwarded to template.render."""
deployable.mcore_tokenizer._tokenizer.eos_token = "<custom_eos>"
messages = [{"role": "user", "content": "Hello"}]

template_mock = MagicMock()
template_mock.render.return_value = "rendered"

with patch("nemo_deploy.llm.megatronllm_deployable.Template", return_value=template_mock):
deployable.apply_chat_template(messages)

call_kwargs = template_mock.render.call_args[1]
assert call_kwargs["eos_token"] == "<custom_eos>"
assert call_kwargs["bos_token"] == "<bos>"


@pytest.mark.run_only_on("GPU")
def test_apply_chat_template_eos_token_fallback(deployable):
"""Test eos_token falls back to empty string when tokenizer lacks the attribute."""
del deployable.mcore_tokenizer._tokenizer.eos_token
messages = [{"role": "user", "content": "Hello"}]

template_mock = MagicMock()
template_mock.render.return_value = "rendered"

with patch("nemo_deploy.llm.megatronllm_deployable.Template", return_value=template_mock):
deployable.apply_chat_template(messages)

call_kwargs = template_mock.render.call_args[1]
assert call_kwargs["eos_token"] == ""


@pytest.mark.run_only_on("GPU")
def test_apply_chat_template_renders_template_using_eos_token(deployable):
"""Regression test: chat templates that reference eos_token must render without UndefinedError."""
deployable.mcore_tokenizer._tokenizer.chat_template = (
"{{ bos_token }}{% for m in messages %}{{ m['content'] }}{{ eos_token }}{% endfor %}"
)
messages = [{"role": "user", "content": "Hello"}]

rendered = deployable.apply_chat_template(messages)

assert rendered == "<bos>Hello<eos>"


@pytest.mark.run_only_on("GPU")
def test_remove_eos_token(deployable):
"""Test EOS token removal covering all code paths."""
Expand Down
Loading