Files
axolotl/tests/prompt_strategies/test_dpo_chat_templates.py
NanoCode012 9de5b76336 feat: move to uv first (#3545)
* feat: move to uv first

* fix: update doc to uv first

* fix: merge dev/tests into uv pyproject

* fix: update docker docs to match current config

* fix: migrate examples to readme

* fix: add llmcompressor to conflict

* feat: rec uv sync with lockfile for dev/ci

* fix: update docker docs to clarify how to use uv images

* chore: docs

* fix: use system python, no venv

* fix: set backend cpu

* fix: only set for installing pytorch step

* fix: remove unsloth kernel and installs

* fix: remove U in tests

* fix: set backend in deps too

* chore: test

* chore: comments

* fix: attempt to lock torch

* fix: workaround torch cuda and not upgraded

* fix: forgot to push

* fix: missed source

* fix: nightly upstream loralinear config

* fix: nightly phi3 long rope not work

* fix: forgot commit

* fix: test phi3 template change

* fix: no more requirements

* fix: carry over changes from new requirements to pyproject

* chore: remove lockfile per discussion

* fix: set match-runtime

* fix: remove unneeded hf hub buildtime

* fix: duplicate cache delete on nightly

* fix: torchvision being overridden

* fix: migrate to uv images

* fix: leftover from merge

* fix: simplify base readme

* fix: update assertion message to be clearer

* chore: docs

* fix: change fallback for cicd script

* fix: match against main exactly

* fix: peft 0.19.1 change

* fix: e2e test

* fix: ci

* fix: e2e test
2026-04-21 10:16:03 -04:00

382 lines
13 KiB
Python

"""
tests for chat_template prompt strategy
"""
import unittest
import pytest
from datasets import Dataset
from transformers import AutoTokenizer
from axolotl.prompt_strategies.dpo.chat_template import argilla_chat, default
from axolotl.utils.dict import DictDefault
from tests.hf_offline_utils import enable_hf_offline
@pytest.fixture(name="assistant_dataset")
def fixture_assistant_dataset():
return Dataset.from_list(
[
{
"messages": [
{
"role": "user",
"content": "hello",
},
{
"role": "assistant",
"content": "hello",
},
{
"role": "user",
"content": "goodbye",
},
],
"chosen": {
"role": "assistant",
"content": "goodbye",
},
"rejected": {
"role": "assistant",
"content": "party on",
},
}
]
)
@pytest.fixture(name="custom_assistant_dataset")
def fixture_custom_assistant_dataset():
return Dataset.from_list(
[
{
"conversation": [
{
"speaker": "human",
"text": "hello",
},
{
"speaker": "agent",
"text": "hello",
},
{
"speaker": "human",
"text": "goodbye",
},
],
"better": {
"speaker": "agent",
"text": "goodbye",
},
"worse": {
"speaker": "agent",
"text": "party on",
},
}
]
)
@pytest.fixture(name="argilla_chat_dataset")
def fixture_argilla_chat_dataset():
return Dataset.from_list(
[
{
"chosen": [
{
"role": "user",
"content": "hello",
},
{
"role": "assistant",
"content": "goodbye",
},
],
"rejected": [
{
"role": "user",
"content": "hello",
},
{
"role": "assistant",
"content": "party on",
},
],
}
]
)
@pytest.fixture(name="phi3_tokenizer")
@enable_hf_offline
def fixture_phi3_tokenizer():
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
return tokenizer
@pytest.fixture(name="gemma_tokenizer")
@enable_hf_offline
def fixture_gemma_tokenizer():
tokenizer = AutoTokenizer.from_pretrained("unsloth/gemma-2b-it", revision="703fb4a")
return tokenizer
class TestAssistantDPOChatTemplateLlama3:
"""
Test class for assistant style datasets with llama-3 prompts using the chat_template strategy.
"""
def test_llama3_defaults(self, llama3_tokenizer, assistant_dataset):
transform_fn, _ = default(
DictDefault(
{
"chat_template": "llama3",
"datasets": [
{
"type": "chat_template",
}
],
}
)
)
result = transform_fn(assistant_dataset[0], tokenizer=llama3_tokenizer)
assert result["prompt"] == (
"<|begin_of_text|>"
+ "<|start_header_id|>user<|end_header_id|>\n\nhello<|eot_id|>"
+ "<|start_header_id|>assistant<|end_header_id|>\n\nhello<|eot_id|>"
+ "<|start_header_id|>user<|end_header_id|>\n\ngoodbye<|eot_id|>"
+ "<|start_header_id|>assistant<|end_header_id|>\n\n"
)
assert result["chosen"] == "goodbye<|eot_id|>"
assert result["rejected"] == "party on<|eot_id|>"
def test_llama3_configured(self, llama3_tokenizer, custom_assistant_dataset):
transform_fn, _ = default(
DictDefault(
{
"chat_template": "llama3",
"datasets": [
{
"type": "chat_template",
"field_messages": "conversation",
"field_chosen": "better",
"field_rejected": "worse",
"message_field_role": "speaker",
"message_field_content": "text",
"roles": {
"user": ["human"],
"assistant": ["agent"],
"system": ["sys"],
},
}
],
}
)
)
result = transform_fn(custom_assistant_dataset[0], tokenizer=llama3_tokenizer)
assert result["prompt"] == (
"<|begin_of_text|>"
+ "<|start_header_id|>user<|end_header_id|>\n\nhello<|eot_id|>"
+ "<|start_header_id|>assistant<|end_header_id|>\n\nhello<|eot_id|>"
+ "<|start_header_id|>user<|end_header_id|>\n\ngoodbye<|eot_id|>"
+ "<|start_header_id|>assistant<|end_header_id|>\n\n"
)
assert result["chosen"] == "goodbye<|eot_id|>"
assert result["rejected"] == "party on<|eot_id|>"
class TestAssistantDPOChatTemplatePhi3:
"""
Test class for assistant style datasets with phi-3 prompts using the tokenizer's chat_template strategy.
"""
@pytest.mark.xfail(reason="likely upstream issue from v5.4.0")
def test_phi3_defaults(self, phi3_tokenizer, assistant_dataset):
transform_fn, _ = default(
DictDefault(
{
"chat_template": "tokenizer_default",
"datasets": [
{
"type": "chat_template",
}
],
}
)
)
result = transform_fn(assistant_dataset[0], tokenizer=phi3_tokenizer)
assert result["prompt"] == (
"<|user|>\nhello<|end|>\n"
+ "<|assistant|>\nhello<|end|>\n"
+ "<|user|>\ngoodbye<|end|>\n"
+ "<|assistant|>\n"
)
assert result["chosen"] == "goodbye<|end|>\n<|endoftext|>"
assert result["rejected"] == "party on<|end|>\n<|endoftext|>"
class TestAssistantDPOChatTemplateGemma:
"""
Test class for assistant style datasets with gemma prompts using the tokenizer's chat_template strategy.
"""
def test_gemma_defaults(self, gemma_tokenizer, assistant_dataset):
transform_fn, _ = default(
DictDefault(
{
"chat_template": "tokenizer_default",
"datasets": [
{
"type": "chat_template",
}
],
}
)
)
result = transform_fn(assistant_dataset[0], tokenizer=gemma_tokenizer)
assert result["prompt"] == (
"<bos><start_of_turn>user\nhello<end_of_turn>\n"
+ "<start_of_turn>model\nhello<end_of_turn>\n"
+ "<start_of_turn>user\ngoodbye<end_of_turn>\n"
+ "<start_of_turn>model\n"
)
assert result["chosen"] == "goodbye<end_of_turn>"
assert result["rejected"] == "party on<end_of_turn>"
class TestArgillaChatDPOChatTemplate:
"""
Test class for argilla_chat style datasets (chosen/rejected contain full conversations).
"""
def test_llama3_argilla_chat(self, llama3_tokenizer, argilla_chat_dataset):
transform_fn, _ = argilla_chat(
DictDefault(
{
"chat_template": "llama3",
"datasets": [
{
"type": "chat_template.argilla_chat",
}
],
}
)
)
result = transform_fn(argilla_chat_dataset[0], tokenizer=llama3_tokenizer)
assert result["prompt"] == (
"<|begin_of_text|>"
+ "<|start_header_id|>user<|end_header_id|>\n\nhello<|eot_id|>"
+ "<|start_header_id|>assistant<|end_header_id|>\n\n"
)
assert result["chosen"] == "goodbye<|eot_id|>"
assert result["rejected"] == "party on<|eot_id|>"
@pytest.mark.xfail(reason="likely upstream issue from v5.4.0")
def test_phi3_argilla_chat(self, phi3_tokenizer, argilla_chat_dataset):
transform_fn, _ = argilla_chat(
DictDefault(
{
"chat_template": "tokenizer_default",
"datasets": [
{
"type": "chat_template.argilla_chat",
}
],
}
)
)
result = transform_fn(argilla_chat_dataset[0], tokenizer=phi3_tokenizer)
assert result["prompt"] == "<|user|>\nhello<|end|>\n" + "<|assistant|>\n"
assert result["chosen"] == "goodbye<|end|>\n<|endoftext|>"
assert result["rejected"] == "party on<|end|>\n<|endoftext|>"
class TestDPOChatTemplateToolRole:
"""
Test that DPO chat template strategy handles tool role messages without KeyError.
Regression test for https://github.com/axolotl-ai-cloud/axolotl/issues/3217
"""
def test_tool_role_default_no_key_error(self, llama3_tokenizer):
"""Messages list with a 'tool' role should not raise KeyError."""
dataset = Dataset.from_list(
[
{
"messages": [
{"role": "user", "content": "What is the weather?"},
{
"role": "assistant",
"content": "Let me check.",
},
{
"role": "tool",
"content": "22°C, sunny.",
},
],
"chosen": {
"role": "assistant",
"content": "It is 22°C and sunny.",
},
"rejected": {
"role": "assistant",
"content": "I don't know.",
},
}
]
)
transform_fn, _ = default(
DictDefault(
{
"chat_template": "llama3",
"datasets": [{"type": "chat_template"}],
}
)
)
# Should not raise KeyError: 'tool'
result = transform_fn(dataset[0], tokenizer=llama3_tokenizer)
assert "prompt" in result
assert "chosen" in result
assert "rejected" in result
def test_tool_role_custom_mapping_preserved(self, llama3_tokenizer):
"""A user-supplied roles mapping that overrides 'tool' is still respected."""
dataset = Dataset.from_list(
[
{
"messages": [
{"role": "user", "content": "hello"},
{"role": "tool_result", "content": "42"},
],
"chosen": {"role": "assistant", "content": "The answer is 42."},
"rejected": {"role": "assistant", "content": "Unknown."},
}
]
)
transform_fn, _ = default(
DictDefault(
{
"chat_template": "llama3",
"datasets": [
{
"type": "chat_template",
"roles": {
"user": ["user"],
"assistant": ["assistant"],
"system": ["system"],
"tool": ["tool_result"],
},
}
],
}
)
)
result = transform_fn(dataset[0], tokenizer=llama3_tokenizer)
assert "prompt" in result
if __name__ == "__main__":
unittest.main()