refactor and fixing test isolation issues
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
"""CLI to convert a transformers model's attns to diff attns."""
|
"""CLI to convert a transformers model's attention layers to differential attention layers."""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import warnings
|
import warnings
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -127,6 +128,7 @@ def convert_diff_transformer(cfg, cli_args, config_path):
|
|||||||
else:
|
else:
|
||||||
modified_cfg["plugins"] = [plugin_class]
|
modified_cfg["plugins"] = [plugin_class]
|
||||||
|
|
||||||
|
# Write out the updated axolotl config while preserving original ordering / formatting
|
||||||
dump_yaml_preserved_order(
|
dump_yaml_preserved_order(
|
||||||
data=modified_cfg,
|
data=modified_cfg,
|
||||||
reference_yaml_path=config_path,
|
reference_yaml_path=config_path,
|
||||||
|
|||||||
@@ -12,14 +12,12 @@ from axolotl.utils.dict import DictDefault
|
|||||||
from axolotl.utils.models import load_model, load_tokenizer
|
from axolotl.utils.models import load_model, load_tokenizer
|
||||||
|
|
||||||
configure_logging()
|
configure_logging()
|
||||||
LOG = logging.getLogger("axolotl.common.cli")
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class PreprocessCliArgs:
|
class PreprocessCliArgs:
|
||||||
"""
|
"""dataclass with arguments for preprocessing only"""
|
||||||
dataclass with arguments for preprocessing only
|
|
||||||
"""
|
|
||||||
|
|
||||||
debug: bool = field(default=False)
|
debug: bool = field(default=False)
|
||||||
debug_text_only: bool = field(default=False)
|
debug_text_only: bool = field(default=False)
|
||||||
@@ -30,9 +28,7 @@ class PreprocessCliArgs:
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TrainerCliArgs:
|
class TrainerCliArgs:
|
||||||
"""
|
"""dataclass with various non-training arguments"""
|
||||||
dataclass with various non-training arguments
|
|
||||||
"""
|
|
||||||
|
|
||||||
debug: bool = field(default=False)
|
debug: bool = field(default=False)
|
||||||
debug_text_only: bool = field(default=False)
|
debug_text_only: bool = field(default=False)
|
||||||
@@ -45,9 +41,7 @@ class TrainerCliArgs:
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class EvaluateCliArgs:
|
class EvaluateCliArgs:
|
||||||
"""
|
"""dataclass with various evaluation arguments"""
|
||||||
dataclass with various evaluation arguments
|
|
||||||
"""
|
|
||||||
|
|
||||||
debug: bool = field(default=False)
|
debug: bool = field(default=False)
|
||||||
debug_text_only: bool = field(default=False)
|
debug_text_only: bool = field(default=False)
|
||||||
@@ -56,9 +50,7 @@ class EvaluateCliArgs:
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ConvertDiffTransformerCliArgs:
|
class ConvertDiffTransformerCliArgs:
|
||||||
"""
|
"""dataclass with arguments for convert-diff-transformer CLI"""
|
||||||
dataclass with arguments for convert-diff-transformer CLI
|
|
||||||
"""
|
|
||||||
|
|
||||||
debug: bool = field(default=False)
|
debug: bool = field(default=False)
|
||||||
zero_init: bool = field(default=False)
|
zero_init: bool = field(default=False)
|
||||||
|
|||||||
@@ -98,9 +98,13 @@ def convert_to_diff_attn(
|
|||||||
|
|
||||||
# Iterate through module children, convert any attn layers to diff attn
|
# Iterate through module children, convert any attn layers to diff attn
|
||||||
for name, child in module.named_children():
|
for name, child in module.named_children():
|
||||||
if isinstance(child, tuple(ATTENTION_MAPPING.keys())):
|
child_class_name = type(child).__name__
|
||||||
# Choose appropriate differential attention class
|
if child_class_name in [k.__name__ for k in ATTENTION_MAPPING]:
|
||||||
attention_class = ATTENTION_MAPPING[type(child)]
|
# Find matching attention class by name
|
||||||
|
for orig_class, diff_class in ATTENTION_MAPPING.items():
|
||||||
|
if orig_class.__name__ == child_class_name:
|
||||||
|
attention_class = diff_class
|
||||||
|
break
|
||||||
|
|
||||||
layer_type = type(child).__name__
|
layer_type = type(child).__name__
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|||||||
@@ -21,7 +21,6 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
def repeat_kv(x: torch.Tensor, n_rep: int) -> torch.Tensor:
|
def repeat_kv(x: torch.Tensor, n_rep: int) -> torch.Tensor:
|
||||||
"""torch.repeat_interleave(x, dim=1, repeats=n_rep)"""
|
|
||||||
batch_size, n_kv_heads, slen, head_dim = x.shape
|
batch_size, n_kv_heads, slen, head_dim = x.shape
|
||||||
if n_rep == 1:
|
if n_rep == 1:
|
||||||
return x
|
return x
|
||||||
@@ -249,6 +248,7 @@ class LlamaDifferentialAttention(DifferentialAttentionBase):
|
|||||||
class LlamaDifferentialSdpaAttention(DifferentialAttentionBase):
|
class LlamaDifferentialSdpaAttention(DifferentialAttentionBase):
|
||||||
"""SDPA-based implementation of differential attention."""
|
"""SDPA-based implementation of differential attention."""
|
||||||
|
|
||||||
|
# pylint: disable=duplicate-code
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
hidden_states: torch.Tensor,
|
hidden_states: torch.Tensor,
|
||||||
@@ -312,6 +312,7 @@ class LlamaDifferentialSdpaAttention(DifferentialAttentionBase):
|
|||||||
class LlamaDifferentialFlashAttention2(DifferentialAttentionBase):
|
class LlamaDifferentialFlashAttention2(DifferentialAttentionBase):
|
||||||
"""Flash Attention 2-based implementation of differential attention."""
|
"""Flash Attention 2-based implementation of differential attention."""
|
||||||
|
|
||||||
|
# pylint: disable=duplicate-code
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
hidden_states: torch.Tensor,
|
hidden_states: torch.Tensor,
|
||||||
|
|||||||
@@ -84,6 +84,11 @@ class OrderedDumper(yaml.SafeDumper):
|
|||||||
"""Custom YAML dumper that maintains dictionary order."""
|
"""Custom YAML dumper that maintains dictionary order."""
|
||||||
|
|
||||||
|
|
||||||
|
def represent_none(self, _):
|
||||||
|
"""Represent None values as empty fields."""
|
||||||
|
return self.represent_scalar("tag:yaml.org,2002:null", "")
|
||||||
|
|
||||||
|
|
||||||
def ordered_dict_representer(dumper: OrderedDumper, data: Dict) -> Any:
|
def ordered_dict_representer(dumper: OrderedDumper, data: Dict) -> Any:
|
||||||
"""Custom representer for dictionaries that maintains order."""
|
"""Custom representer for dictionaries that maintains order."""
|
||||||
return dumper.represent_mapping("tag:yaml.org,2002:map", data.items())
|
return dumper.represent_mapping("tag:yaml.org,2002:map", data.items())
|
||||||
@@ -121,7 +126,8 @@ def dump_yaml_preserved_order(
|
|||||||
# Reorder the data
|
# Reorder the data
|
||||||
ordered_data = reorder_dict(data, tracker.structure)
|
ordered_data = reorder_dict(data, tracker.structure)
|
||||||
|
|
||||||
# Register the custom representer
|
# Register the custom representers
|
||||||
|
OrderedDumper.add_representer(type(None), represent_none)
|
||||||
OrderedDumper.add_representer(dict, ordered_dict_representer)
|
OrderedDumper.add_representer(dict, ordered_dict_representer)
|
||||||
OrderedDumper.add_representer(OrderedDict, ordered_dict_representer)
|
OrderedDumper.add_representer(OrderedDict, ordered_dict_representer)
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import pytest
|
|||||||
from click.testing import CliRunner
|
from click.testing import CliRunner
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture(scope="class")
|
||||||
def base_config():
|
def base_config():
|
||||||
"""Basic config for testing."""
|
"""Basic config for testing."""
|
||||||
return {
|
return {
|
||||||
@@ -26,6 +26,6 @@ def base_config():
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture(scope="class")
|
||||||
def cli_runner():
|
def cli_runner():
|
||||||
return CliRunner()
|
return CliRunner()
|
||||||
|
|||||||
@@ -15,133 +15,135 @@ from axolotl.cli.main import cli
|
|||||||
from axolotl.common.cli import ConvertDiffTransformerCliArgs
|
from axolotl.common.cli import ConvertDiffTransformerCliArgs
|
||||||
|
|
||||||
|
|
||||||
def test_cli_validation(cli_runner):
|
@pytest.mark.usefixtures("base_config", "cli_runner")
|
||||||
# Test missing config file
|
class TestDiffTransformer:
|
||||||
result = cli_runner.invoke(cli, ["convert-diff-transformer"])
|
"""Tests for convert-diff-transformer CLI command"""
|
||||||
assert result.exit_code != 0
|
|
||||||
assert "Error: Missing argument 'CONFIG'." in result.output
|
|
||||||
|
|
||||||
# Test non-existent config file
|
def test_cli_validation(self, cli_runner):
|
||||||
result = cli_runner.invoke(cli, ["convert-diff-transformer", "nonexistent.yml"])
|
# Test missing config file
|
||||||
assert result.exit_code != 0
|
result = cli_runner.invoke(cli, ["convert-diff-transformer"])
|
||||||
assert "Error: Invalid value for 'CONFIG'" in result.output
|
assert result.exit_code != 0
|
||||||
|
assert "Error: Missing argument 'CONFIG'." in result.output
|
||||||
|
|
||||||
|
# Test non-existent config file
|
||||||
|
result = cli_runner.invoke(cli, ["convert-diff-transformer", "nonexistent.yml"])
|
||||||
|
assert result.exit_code != 0
|
||||||
|
assert "Error: Invalid value for 'CONFIG'" in result.output
|
||||||
|
|
||||||
def test_basic_execution(cli_runner, tmp_path: Path, base_config):
|
def test_basic_execution(self, cli_runner, tmp_path: Path, base_config):
|
||||||
config_path = tmp_path / "config.yml"
|
config_path = tmp_path / "config.yml"
|
||||||
with open(config_path, "w", encoding="utf-8") as file:
|
with open(config_path, "w", encoding="utf-8") as file:
|
||||||
yaml.dump(base_config, file)
|
yaml.dump(base_config, file)
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"axolotl.cli.integrations.convert_diff_transformer.do_cli"
|
"axolotl.cli.integrations.convert_diff_transformer.do_cli"
|
||||||
) as mock_do_cli:
|
) as mock_do_cli:
|
||||||
result = cli_runner.invoke(cli, ["convert-diff-transformer", str(config_path)])
|
result = cli_runner.invoke(
|
||||||
assert result.exit_code == 0
|
cli, ["convert-diff-transformer", str(config_path)]
|
||||||
|
)
|
||||||
|
assert result.exit_code == 0
|
||||||
|
|
||||||
mock_do_cli.assert_called_once()
|
mock_do_cli.assert_called_once()
|
||||||
assert mock_do_cli.call_args.kwargs["config"] == str(config_path)
|
assert mock_do_cli.call_args.kwargs["config"] == str(config_path)
|
||||||
|
|
||||||
|
def test_conversion_cli_basic(self, tmp_path: Path, base_config):
|
||||||
|
output_dir = tmp_path / "converted"
|
||||||
|
base_config["output_dir"] = str(output_dir)
|
||||||
|
|
||||||
def test_conversion_cli_basic(tmp_path: Path, base_config):
|
config_path = tmp_path / "config.yml"
|
||||||
output_dir = tmp_path / "converted"
|
with open(config_path, "w", encoding="utf-8") as file:
|
||||||
base_config["output_dir"] = str(output_dir)
|
yaml.dump(base_config, file)
|
||||||
|
|
||||||
config_path = tmp_path / "config.yml"
|
cfg = load_cfg(str(config_path))
|
||||||
with open(config_path, "w", encoding="utf-8") as file:
|
cli_args = ConvertDiffTransformerCliArgs()
|
||||||
yaml.dump(base_config, file)
|
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path))
|
||||||
|
|
||||||
cfg = load_cfg(str(config_path))
|
assert not debug_info
|
||||||
cli_args = ConvertDiffTransformerCliArgs()
|
assert (output_dir / "model.safetensors").exists()
|
||||||
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path))
|
assert (output_dir / "config.json").exists()
|
||||||
|
assert (output_dir / "axolotl_config.yml").exists()
|
||||||
|
|
||||||
assert not debug_info
|
def test_conversion_cli_debug(self, tmp_path: Path, base_config):
|
||||||
assert (output_dir / "model.safetensors").exists()
|
output_dir = tmp_path / "converted"
|
||||||
assert (output_dir / "config.json").exists()
|
base_config["output_dir"] = str(output_dir)
|
||||||
assert (output_dir / "axolotl_config.yml").exists()
|
|
||||||
|
|
||||||
|
config_path = tmp_path / "config.yml"
|
||||||
|
with open(config_path, "w", encoding="utf-8") as file:
|
||||||
|
yaml.dump(base_config, file)
|
||||||
|
|
||||||
def test_conversion_cli_debug(tmp_path: Path, base_config):
|
cfg = load_cfg(str(config_path))
|
||||||
output_dir = tmp_path / "converted"
|
cli_args = ConvertDiffTransformerCliArgs(debug=True)
|
||||||
base_config["output_dir"] = str(output_dir)
|
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path))
|
||||||
|
|
||||||
config_path = tmp_path / "config.yml"
|
assert not debug_info["generations_match"]
|
||||||
with open(config_path, "w", encoding="utf-8") as file:
|
assert not debug_info["match_expected"]
|
||||||
yaml.dump(base_config, file)
|
assert (output_dir / "model.safetensors").exists()
|
||||||
|
assert (output_dir / "config.json").exists()
|
||||||
|
assert (output_dir / "axolotl_config.yml").exists()
|
||||||
|
|
||||||
cfg = load_cfg(str(config_path))
|
def test_conversion_cli_reproduce(self, tmp_path: Path, base_config):
|
||||||
cli_args = ConvertDiffTransformerCliArgs(debug=True)
|
output_dir = tmp_path / "converted"
|
||||||
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path))
|
base_config["output_dir"] = str(output_dir)
|
||||||
|
|
||||||
assert not debug_info["generations_match"]
|
config_path = tmp_path / "config.yml"
|
||||||
assert not debug_info["match_expected"]
|
with open(config_path, "w", encoding="utf-8") as file:
|
||||||
assert (output_dir / "model.safetensors").exists()
|
yaml.dump(base_config, file)
|
||||||
assert (output_dir / "config.json").exists()
|
|
||||||
assert (output_dir / "axolotl_config.yml").exists()
|
|
||||||
|
|
||||||
|
cfg = load_cfg(str(config_path))
|
||||||
|
cli_args = ConvertDiffTransformerCliArgs(
|
||||||
|
debug=True, zero_init=True, sublayer_norm=False
|
||||||
|
)
|
||||||
|
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path))
|
||||||
|
|
||||||
def test_conversion_cli_reproduce(tmp_path: Path, base_config):
|
assert debug_info["generations_match"] is True
|
||||||
output_dir = tmp_path / "converted"
|
assert (output_dir / "model.safetensors").exists()
|
||||||
base_config["output_dir"] = str(output_dir)
|
assert (output_dir / "config.json").exists()
|
||||||
|
assert (output_dir / "axolotl_config.yml").exists()
|
||||||
|
|
||||||
config_path = tmp_path / "config.yml"
|
@pytest.mark.parametrize(
|
||||||
with open(config_path, "w", encoding="utf-8") as file:
|
"attention", ["eager_attention", "sdp_attention", "flash_attention"]
|
||||||
yaml.dump(base_config, file)
|
|
||||||
|
|
||||||
cfg = load_cfg(str(config_path))
|
|
||||||
cli_args = ConvertDiffTransformerCliArgs(
|
|
||||||
debug=True, zero_init=True, sublayer_norm=False
|
|
||||||
)
|
)
|
||||||
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path))
|
def test_conversion_cli_repoduce_attentions(
|
||||||
|
self, tmp_path: Path, base_config, attention: Optional[str]
|
||||||
|
):
|
||||||
|
output_dir = tmp_path / "converted"
|
||||||
|
base_config["output_dir"] = str(output_dir)
|
||||||
|
base_config[attention] = True
|
||||||
|
|
||||||
assert debug_info["generations_match"] is True
|
config_path = tmp_path / "config.yml"
|
||||||
assert (output_dir / "model.safetensors").exists()
|
with open(config_path, "w", encoding="utf-8") as file:
|
||||||
assert (output_dir / "config.json").exists()
|
yaml.dump(base_config, file)
|
||||||
assert (output_dir / "axolotl_config.yml").exists()
|
|
||||||
|
|
||||||
|
cfg = load_cfg(str(config_path))
|
||||||
|
cli_args = ConvertDiffTransformerCliArgs(
|
||||||
|
debug=True, zero_init=True, sublayer_norm=False
|
||||||
|
)
|
||||||
|
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path))
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
assert debug_info["generations_match"] is True
|
||||||
"attention", ["eager_attention", "sdp_attention", "flash_attention"]
|
assert (output_dir / "model.safetensors").exists()
|
||||||
)
|
assert (output_dir / "config.json").exists()
|
||||||
def test_conversion_cli_repoduce_attentions(
|
assert (output_dir / "axolotl_config.yml").exists()
|
||||||
tmp_path: Path, base_config, attention: Optional[str]
|
|
||||||
):
|
|
||||||
output_dir = tmp_path / "converted"
|
|
||||||
base_config["output_dir"] = str(output_dir)
|
|
||||||
base_config[attention] = True
|
|
||||||
|
|
||||||
config_path = tmp_path / "config.yml"
|
@pytest.mark.parametrize(
|
||||||
with open(config_path, "w", encoding="utf-8") as file:
|
"attention", ["eager_attention", "sdp_attention", "flash_attention"]
|
||||||
yaml.dump(base_config, file)
|
|
||||||
|
|
||||||
cfg = load_cfg(str(config_path))
|
|
||||||
cli_args = ConvertDiffTransformerCliArgs(
|
|
||||||
debug=True, zero_init=True, sublayer_norm=False
|
|
||||||
)
|
)
|
||||||
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path))
|
def test_conversion_cli_split_heads(
|
||||||
|
self, tmp_path: Path, base_config, attention: str
|
||||||
|
):
|
||||||
|
output_dir = tmp_path / "converted"
|
||||||
|
base_config["output_dir"] = str(output_dir)
|
||||||
|
base_config[attention] = True
|
||||||
|
|
||||||
assert debug_info["generations_match"] is True
|
config_path = tmp_path / "config.yml"
|
||||||
assert (output_dir / "model.safetensors").exists()
|
with open(config_path, "w", encoding="utf-8") as file:
|
||||||
assert (output_dir / "config.json").exists()
|
yaml.dump(base_config, file)
|
||||||
assert (output_dir / "axolotl_config.yml").exists()
|
|
||||||
|
|
||||||
|
cfg = load_cfg(str(config_path))
|
||||||
|
cli_args = ConvertDiffTransformerCliArgs(debug=True, split_heads=True)
|
||||||
|
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path))
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
assert debug_info["generations_match"] is False
|
||||||
"attention", ["eager_attention", "sdp_attention", "flash_attention"]
|
assert (output_dir / "model.safetensors").exists()
|
||||||
)
|
assert (output_dir / "config.json").exists()
|
||||||
def test_conversion_cli_split_heads(tmp_path: Path, base_config, attention: str):
|
assert (output_dir / "axolotl_config.yml").exists()
|
||||||
output_dir = tmp_path / "converted"
|
|
||||||
base_config["output_dir"] = str(output_dir)
|
|
||||||
base_config[attention] = True
|
|
||||||
|
|
||||||
config_path = tmp_path / "config.yml"
|
|
||||||
with open(config_path, "w", encoding="utf-8") as file:
|
|
||||||
yaml.dump(base_config, file)
|
|
||||||
|
|
||||||
cfg = load_cfg(str(config_path))
|
|
||||||
cli_args = ConvertDiffTransformerCliArgs(debug=True, split_heads=True)
|
|
||||||
_, debug_info = convert_diff_transformer(cfg, cli_args, str(config_path))
|
|
||||||
|
|
||||||
assert debug_info["generations_match"] is False
|
|
||||||
assert (output_dir / "model.safetensors").exists()
|
|
||||||
assert (output_dir / "config.json").exists()
|
|
||||||
assert (output_dir / "axolotl_config.yml").exists()
|
|
||||||
|
|||||||
Reference in New Issue
Block a user