refactor and fixing test isolation issues

This commit is contained in:
Dan Saunders
2024-12-21 16:56:57 +00:00
parent 0d56582090
commit fcbfa86373
7 changed files with 131 additions and 124 deletions

View File

@@ -1,4 +1,5 @@
"""CLI to convert a transformers model's attns to diff attns.""" """CLI to convert a transformers model's attention layers to differential attention layers."""
import logging import logging
import warnings import warnings
from pathlib import Path from pathlib import Path
@@ -127,6 +128,7 @@ def convert_diff_transformer(cfg, cli_args, config_path):
else: else:
modified_cfg["plugins"] = [plugin_class] modified_cfg["plugins"] = [plugin_class]
# Write out the updated axolotl config while preserving original ordering / formatting
dump_yaml_preserved_order( dump_yaml_preserved_order(
data=modified_cfg, data=modified_cfg,
reference_yaml_path=config_path, reference_yaml_path=config_path,

View File

@@ -12,14 +12,12 @@ from axolotl.utils.dict import DictDefault
from axolotl.utils.models import load_model, load_tokenizer from axolotl.utils.models import load_model, load_tokenizer
configure_logging() configure_logging()
LOG = logging.getLogger("axolotl.common.cli") LOG = logging.getLogger(__name__)
@dataclass @dataclass
class PreprocessCliArgs: class PreprocessCliArgs:
""" """dataclass with arguments for preprocessing only"""
dataclass with arguments for preprocessing only
"""
debug: bool = field(default=False) debug: bool = field(default=False)
debug_text_only: bool = field(default=False) debug_text_only: bool = field(default=False)
@@ -30,9 +28,7 @@ class PreprocessCliArgs:
@dataclass @dataclass
class TrainerCliArgs: class TrainerCliArgs:
""" """dataclass with various non-training arguments"""
dataclass with various non-training arguments
"""
debug: bool = field(default=False) debug: bool = field(default=False)
debug_text_only: bool = field(default=False) debug_text_only: bool = field(default=False)
@@ -45,9 +41,7 @@ class TrainerCliArgs:
@dataclass @dataclass
class EvaluateCliArgs: class EvaluateCliArgs:
""" """dataclass with various evaluation arguments"""
dataclass with various evaluation arguments
"""
debug: bool = field(default=False) debug: bool = field(default=False)
debug_text_only: bool = field(default=False) debug_text_only: bool = field(default=False)
@@ -56,9 +50,7 @@ class EvaluateCliArgs:
@dataclass @dataclass
class ConvertDiffTransformerCliArgs: class ConvertDiffTransformerCliArgs:
""" """dataclass with arguments for convert-diff-transformer CLI"""
dataclass with arguments for convert-diff-transformer CLI
"""
debug: bool = field(default=False) debug: bool = field(default=False)
zero_init: bool = field(default=False) zero_init: bool = field(default=False)

View File

@@ -98,9 +98,13 @@ def convert_to_diff_attn(
# Iterate through module children, convert any attn layers to diff attn # Iterate through module children, convert any attn layers to diff attn
for name, child in module.named_children(): for name, child in module.named_children():
if isinstance(child, tuple(ATTENTION_MAPPING.keys())): child_class_name = type(child).__name__
# Choose appropriate differential attention class if child_class_name in [k.__name__ for k in ATTENTION_MAPPING]:
attention_class = ATTENTION_MAPPING[type(child)] # Find matching attention class by name
for orig_class, diff_class in ATTENTION_MAPPING.items():
if orig_class.__name__ == child_class_name:
attention_class = diff_class
break
layer_type = type(child).__name__ layer_type = type(child).__name__
logger.info( logger.info(

View File

@@ -21,7 +21,6 @@ logger = logging.getLogger(__name__)
def repeat_kv(x: torch.Tensor, n_rep: int) -> torch.Tensor: def repeat_kv(x: torch.Tensor, n_rep: int) -> torch.Tensor:
"""torch.repeat_interleave(x, dim=1, repeats=n_rep)"""
batch_size, n_kv_heads, slen, head_dim = x.shape batch_size, n_kv_heads, slen, head_dim = x.shape
if n_rep == 1: if n_rep == 1:
return x return x
@@ -249,6 +248,7 @@ class LlamaDifferentialAttention(DifferentialAttentionBase):
class LlamaDifferentialSdpaAttention(DifferentialAttentionBase): class LlamaDifferentialSdpaAttention(DifferentialAttentionBase):
"""SDPA-based implementation of differential attention.""" """SDPA-based implementation of differential attention."""
# pylint: disable=duplicate-code
def forward( def forward(
self, self,
hidden_states: torch.Tensor, hidden_states: torch.Tensor,
@@ -312,6 +312,7 @@ class LlamaDifferentialSdpaAttention(DifferentialAttentionBase):
class LlamaDifferentialFlashAttention2(DifferentialAttentionBase): class LlamaDifferentialFlashAttention2(DifferentialAttentionBase):
"""Flash Attention 2-based implementation of differential attention.""" """Flash Attention 2-based implementation of differential attention."""
# pylint: disable=duplicate-code
def forward( def forward(
self, self,
hidden_states: torch.Tensor, hidden_states: torch.Tensor,

View File

@@ -84,6 +84,11 @@ class OrderedDumper(yaml.SafeDumper):
"""Custom YAML dumper that maintains dictionary order.""" """Custom YAML dumper that maintains dictionary order."""
def represent_none(self, _):
"""Represent None values as empty fields."""
return self.represent_scalar("tag:yaml.org,2002:null", "")
def ordered_dict_representer(dumper: OrderedDumper, data: Dict) -> Any: def ordered_dict_representer(dumper: OrderedDumper, data: Dict) -> Any:
"""Custom representer for dictionaries that maintains order.""" """Custom representer for dictionaries that maintains order."""
return dumper.represent_mapping("tag:yaml.org,2002:map", data.items()) return dumper.represent_mapping("tag:yaml.org,2002:map", data.items())
@@ -121,7 +126,8 @@ def dump_yaml_preserved_order(
# Reorder the data # Reorder the data
ordered_data = reorder_dict(data, tracker.structure) ordered_data = reorder_dict(data, tracker.structure)
# Register the custom representer # Register the custom representers
OrderedDumper.add_representer(type(None), represent_none)
OrderedDumper.add_representer(dict, ordered_dict_representer) OrderedDumper.add_representer(dict, ordered_dict_representer)
OrderedDumper.add_representer(OrderedDict, ordered_dict_representer) OrderedDumper.add_representer(OrderedDict, ordered_dict_representer)

View File

@@ -4,7 +4,7 @@ import pytest
from click.testing import CliRunner from click.testing import CliRunner
@pytest.fixture() @pytest.fixture(scope="class")
def base_config(): def base_config():
"""Basic config for testing.""" """Basic config for testing."""
return { return {
@@ -26,6 +26,6 @@ def base_config():
} }
@pytest.fixture @pytest.fixture(scope="class")
def cli_runner(): def cli_runner():
return CliRunner() return CliRunner()

View File

@@ -15,133 +15,135 @@ from axolotl.cli.main import cli
from axolotl.common.cli import ConvertDiffTransformerCliArgs from axolotl.common.cli import ConvertDiffTransformerCliArgs
def test_cli_validation(cli_runner): @pytest.mark.usefixtures("base_config", "cli_runner")
# Test missing config file class TestDiffTransformer:
result = cli_runner.invoke(cli, ["convert-diff-transformer"]) """Tests for convert-diff-transformer CLI command"""
assert result.exit_code != 0
assert "Error: Missing argument 'CONFIG'." in result.output
# Test non-existent config file def test_cli_validation(self, cli_runner):
result = cli_runner.invoke(cli, ["convert-diff-transformer", "nonexistent.yml"]) # Test missing config file
assert result.exit_code != 0 result = cli_runner.invoke(cli, ["convert-diff-transformer"])
assert "Error: Invalid value for 'CONFIG'" in result.output assert result.exit_code != 0
assert "Error: Missing argument 'CONFIG'." in result.output
# Test non-existent config file
result = cli_runner.invoke(cli, ["convert-diff-transformer", "nonexistent.yml"])
assert result.exit_code != 0
assert "Error: Invalid value for 'CONFIG'" in result.output
def test_basic_execution(cli_runner, tmp_path: Path, base_config): def test_basic_execution(self, cli_runner, tmp_path: Path, base_config):
config_path = tmp_path / "config.yml" config_path = tmp_path / "config.yml"
with open(config_path, "w", encoding="utf-8") as file: with open(config_path, "w", encoding="utf-8") as file:
yaml.dump(base_config, file) yaml.dump(base_config, file)
with patch( with patch(
"axolotl.cli.integrations.convert_diff_transformer.do_cli" "axolotl.cli.integrations.convert_diff_transformer.do_cli"
) as mock_do_cli: ) as mock_do_cli:
result = cli_runner.invoke(cli, ["convert-diff-transformer", str(config_path)]) result = cli_runner.invoke(
assert result.exit_code == 0 cli, ["convert-diff-transformer", str(config_path)]
)
assert result.exit_code == 0
mock_do_cli.assert_called_once() mock_do_cli.assert_called_once()
assert mock_do_cli.call_args.kwargs["config"] == str(config_path) assert mock_do_cli.call_args.kwargs["config"] == str(config_path)
def test_conversion_cli_basic(self, tmp_path: Path, base_config):
output_dir = tmp_path / "converted"
base_config["output_dir"] = str(output_dir)
def test_conversion_cli_basic(tmp_path: Path, base_config): config_path = tmp_path / "config.yml"
output_dir = tmp_path / "converted" with open(config_path, "w", encoding="utf-8") as file:
base_config["output_dir"] = str(output_dir) yaml.dump(base_config, file)
config_path = tmp_path / "config.yml" cfg = load_cfg(str(config_path))
with open(config_path, "w", encoding="utf-8") as file: cli_args = ConvertDiffTransformerCliArgs()
yaml.dump(base_config, file) _, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path))
cfg = load_cfg(str(config_path)) assert not debug_info
cli_args = ConvertDiffTransformerCliArgs() assert (output_dir / "model.safetensors").exists()
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path)) assert (output_dir / "config.json").exists()
assert (output_dir / "axolotl_config.yml").exists()
assert not debug_info def test_conversion_cli_debug(self, tmp_path: Path, base_config):
assert (output_dir / "model.safetensors").exists() output_dir = tmp_path / "converted"
assert (output_dir / "config.json").exists() base_config["output_dir"] = str(output_dir)
assert (output_dir / "axolotl_config.yml").exists()
config_path = tmp_path / "config.yml"
with open(config_path, "w", encoding="utf-8") as file:
yaml.dump(base_config, file)
def test_conversion_cli_debug(tmp_path: Path, base_config): cfg = load_cfg(str(config_path))
output_dir = tmp_path / "converted" cli_args = ConvertDiffTransformerCliArgs(debug=True)
base_config["output_dir"] = str(output_dir) _, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path))
config_path = tmp_path / "config.yml" assert not debug_info["generations_match"]
with open(config_path, "w", encoding="utf-8") as file: assert not debug_info["match_expected"]
yaml.dump(base_config, file) assert (output_dir / "model.safetensors").exists()
assert (output_dir / "config.json").exists()
assert (output_dir / "axolotl_config.yml").exists()
cfg = load_cfg(str(config_path)) def test_conversion_cli_reproduce(self, tmp_path: Path, base_config):
cli_args = ConvertDiffTransformerCliArgs(debug=True) output_dir = tmp_path / "converted"
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path)) base_config["output_dir"] = str(output_dir)
assert not debug_info["generations_match"] config_path = tmp_path / "config.yml"
assert not debug_info["match_expected"] with open(config_path, "w", encoding="utf-8") as file:
assert (output_dir / "model.safetensors").exists() yaml.dump(base_config, file)
assert (output_dir / "config.json").exists()
assert (output_dir / "axolotl_config.yml").exists()
cfg = load_cfg(str(config_path))
cli_args = ConvertDiffTransformerCliArgs(
debug=True, zero_init=True, sublayer_norm=False
)
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path))
def test_conversion_cli_reproduce(tmp_path: Path, base_config): assert debug_info["generations_match"] is True
output_dir = tmp_path / "converted" assert (output_dir / "model.safetensors").exists()
base_config["output_dir"] = str(output_dir) assert (output_dir / "config.json").exists()
assert (output_dir / "axolotl_config.yml").exists()
config_path = tmp_path / "config.yml" @pytest.mark.parametrize(
with open(config_path, "w", encoding="utf-8") as file: "attention", ["eager_attention", "sdp_attention", "flash_attention"]
yaml.dump(base_config, file)
cfg = load_cfg(str(config_path))
cli_args = ConvertDiffTransformerCliArgs(
debug=True, zero_init=True, sublayer_norm=False
) )
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path)) def test_conversion_cli_repoduce_attentions(
self, tmp_path: Path, base_config, attention: Optional[str]
):
output_dir = tmp_path / "converted"
base_config["output_dir"] = str(output_dir)
base_config[attention] = True
assert debug_info["generations_match"] is True config_path = tmp_path / "config.yml"
assert (output_dir / "model.safetensors").exists() with open(config_path, "w", encoding="utf-8") as file:
assert (output_dir / "config.json").exists() yaml.dump(base_config, file)
assert (output_dir / "axolotl_config.yml").exists()
cfg = load_cfg(str(config_path))
cli_args = ConvertDiffTransformerCliArgs(
debug=True, zero_init=True, sublayer_norm=False
)
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path))
@pytest.mark.parametrize( assert debug_info["generations_match"] is True
"attention", ["eager_attention", "sdp_attention", "flash_attention"] assert (output_dir / "model.safetensors").exists()
) assert (output_dir / "config.json").exists()
def test_conversion_cli_repoduce_attentions( assert (output_dir / "axolotl_config.yml").exists()
tmp_path: Path, base_config, attention: Optional[str]
):
output_dir = tmp_path / "converted"
base_config["output_dir"] = str(output_dir)
base_config[attention] = True
config_path = tmp_path / "config.yml" @pytest.mark.parametrize(
with open(config_path, "w", encoding="utf-8") as file: "attention", ["eager_attention", "sdp_attention", "flash_attention"]
yaml.dump(base_config, file)
cfg = load_cfg(str(config_path))
cli_args = ConvertDiffTransformerCliArgs(
debug=True, zero_init=True, sublayer_norm=False
) )
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path)) def test_conversion_cli_split_heads(
self, tmp_path: Path, base_config, attention: str
):
output_dir = tmp_path / "converted"
base_config["output_dir"] = str(output_dir)
base_config[attention] = True
assert debug_info["generations_match"] is True config_path = tmp_path / "config.yml"
assert (output_dir / "model.safetensors").exists() with open(config_path, "w", encoding="utf-8") as file:
assert (output_dir / "config.json").exists() yaml.dump(base_config, file)
assert (output_dir / "axolotl_config.yml").exists()
cfg = load_cfg(str(config_path))
cli_args = ConvertDiffTransformerCliArgs(debug=True, split_heads=True)
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path))
@pytest.mark.parametrize( assert debug_info["generations_match"] is False
"attention", ["eager_attention", "sdp_attention", "flash_attention"] assert (output_dir / "model.safetensors").exists()
) assert (output_dir / "config.json").exists()
def test_conversion_cli_split_heads(tmp_path: Path, base_config, attention: str): assert (output_dir / "axolotl_config.yml").exists()
output_dir = tmp_path / "converted"
base_config["output_dir"] = str(output_dir)
base_config[attention] = True
config_path = tmp_path / "config.yml"
with open(config_path, "w", encoding="utf-8") as file:
yaml.dump(base_config, file)
cfg = load_cfg(str(config_path))
cli_args = ConvertDiffTransformerCliArgs(debug=True, split_heads=True)
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path))
assert debug_info["generations_match"] is False
assert (output_dir / "model.safetensors").exists()
assert (output_dir / "config.json").exists()
assert (output_dir / "axolotl_config.yml").exists()