Merge branch 'main' into diff-transformer

small fixes, improvements
merging into main
2025-01-27 14:46:17 +00:00 · 2025-01-24 19:53:54 +00:00 · 2025-01-24 18:03:27 +00:00 · 2025-01-24 17:46:11 +00:00 · 2025-01-23 22:11:35 +00:00 · 2025-01-23 21:33:53 +00:00
14 changed files with 216 additions and 12 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -186,3 +186,6 @@ out/

 # vim
 *.swp
+
+# symlinked to axolotl-artifacts in docker containers
+outputs
--- a/cicd/cicd.sh
+++ b/cicd/cicd.sh
@@ -4,7 +4,6 @@ set -e
 python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"

 pytest -v --durations=10 -n8 --ignore=tests/e2e/ --ignore=tests/patched/ /workspace/axolotl/tests/
-# pytest -v --durations=10 -n8 --dist loadfile /workspace/axolotl/tests/patched/
 pytest -v --durations=10 /workspace/axolotl/tests/e2e/patched/
 pytest -v --durations=10 -n1 /workspace/axolotl/tests/e2e/solo/
 pytest -v --durations=10 /workspace/axolotl/tests/e2e/integrations/
--- a/cicd/multigpu.py
+++ b/cicd/multigpu.py
@@ -1,6 +1,6 @@
 """
- modal application to run axolotl gpu tests in Modal
- """
+modal application to run axolotl gpu tests in Modal
+"""
 # pylint: disable=duplicate-code

 import os
--- a/src/axolotl/cli/evaluate.py
+++ b/src/axolotl/cli/evaluate.py
@@ -19,7 +19,7 @@ from axolotl.utils.dict import DictDefault
 LOG = logging.getLogger(__name__)


-def do_evaluate(cfg: DictDefault, cli_args: TrainerCliArgs) -> None:
+def do_evaluate(cfg: DictDefault, cli_args: TrainerCliArgs) -> dict[str, float]:
    """
    Evaluates a `transformers` model by first loading the dataset(s) specified in the
    `axolotl` config, and then calling `axolotl.evaluate.evaluate`, which computes
@@ -39,7 +39,7 @@ def do_evaluate(cfg: DictDefault, cli_args: TrainerCliArgs) -> None:
    else:
        dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)

-    evaluate(cfg=cfg, dataset_meta=dataset_meta)
+    return evaluate(cfg=cfg, dataset_meta=dataset_meta)


 def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs) -> None:
--- a/src/axolotl/cli/main.py
+++ b/src/axolotl/cli/main.py
@@ -8,6 +8,7 @@ import click

 import axolotl
 from axolotl.cli.args import EvaluateCliArgs, PreprocessCliArgs, TrainerCliArgs
+from axolotl.cli.plugins import setup_plugin_commands
 from axolotl.cli.utils import (
    add_options_from_config,
    add_options_from_dataclass,
@@ -222,6 +223,9 @@ def fetch(directory: str, dest: Optional[str]) -> None:
    fetch_from_github(f"{directory}/", dest)


+setup_plugin_commands(cli)
+
+
 def main():
    cli()

--- a/src/axolotl/cli/plugins.py
+++ b/src/axolotl/cli/plugins.py
@@ -0,0 +1,36 @@
+"""Module for adding click CLI commands from axolotl plugins."""
+
+import logging
+
+import click
+
+from axolotl.cli.utils import add_options_from_config, add_options_from_dataclass
+from axolotl.logging_config import configure_logging
+from axolotl.utils.config.models.input.v0_4_1 import AxolotlInputConfig
+
+configure_logging()
+LOG = logging.getLogger(__name__)
+
+
+def setup_plugin_commands(cli: click.core.Group) -> None:
+    """
+    Setup CLI commands for available plugins.
+
+    Args:
+        cli: Click CLI object to add plugin CLI options to.
+    """
+    try:
+        from axolotl_diff_transformer.convert_diff_transformer import do_cli
+        from axolotl_diff_transformer.plugin.cli import ConvertDiffTransformerCliArgs
+
+        @cli.command()
+        @click.argument("config", type=click.Path(exists=True, path_type=str))
+        @add_options_from_dataclass(ConvertDiffTransformerCliArgs)
+        @add_options_from_config(AxolotlInputConfig)
+        def convert_diff_transformer(config: str, **kwargs):
+            """Convert model attention layers to differential attention layers."""
+            kwargs = {k: v for k, v in kwargs.items() if v is not None}
+            do_cli(config=config, **kwargs)
+
+    except ImportError as exc:
+        LOG.debug("axolotl-diff-transformer not found: %s", exc)
--- a/src/axolotl/cli/utils.py
+++ b/src/axolotl/cli/utils.py
@@ -157,6 +157,8 @@ def build_command(base_cmd: list[str], options: dict[str, Any]) -> list[str]:
        if isinstance(value, bool):
            if value:
                cmd.append(f"--{key}")
+            else:
+                cmd.append(f"--no{key}")
        else:
            cmd.extend([f"--{key}", str(value)])

--- a/src/axolotl/core/trainer_builder.py
+++ b/src/axolotl/core/trainer_builder.py
@@ -297,7 +297,7 @@ class AxolotlTrainingArguments(AxolotlTrainingMixins, TrainingArguments):
    """
    Training arguments for Causal trainer

-    This code is duplicated due to HF TrainingArguments not setting output_dir with a defaujlt value
+    This code is duplicated due to HF TrainingArguments not setting output_dir with a default value
    so it can't be used as a mixin.
    """

--- a/src/axolotl/evaluate.py
+++ b/src/axolotl/evaluate.py
@@ -4,7 +4,7 @@ import csv
 import os
 import sys
 from pathlib import Path
-from typing import Dict, Optional
+from typing import Optional

 import torch
 from accelerate.logging import get_logger
@@ -26,7 +26,7 @@ LOG = get_logger("axolotl.evaluate")

 def evaluate_dataset(
    trainer, dataset, dataset_type: str, flash_optimum: bool = False
-) -> Optional[Dict[str, float]]:
+) -> Optional[dict[str, float]]:
    """Helper function to evaluate a single dataset safely.

    Args:
@@ -61,7 +61,7 @@ def evaluate_dataset(
    return metrics


-def evaluate(*, cfg: DictDefault, dataset_meta: TrainDatasetMeta) -> Dict[str, float]:
+def evaluate(*, cfg: DictDefault, dataset_meta: TrainDatasetMeta) -> dict[str, float]:
    """
    Evaluate a model on training and validation datasets

--- a/src/axolotl/integrations/config.py
+++ b/src/axolotl/integrations/config.py
@@ -43,10 +43,12 @@ def merge_input_args():
    input_args: List[str] = plugin_manager.get_input_args()
    plugin_classes = []
    dynamic_input = ""
+
    for plugin_args in input_args:
        plugin_module, plugin_cls = plugin_args.rsplit(".", 1)
        dynamic_input += f"from {plugin_module} import {plugin_cls}\n"
        plugin_classes.append(plugin_cls)
+
    if dynamic_input:
        dynamic_input += f"class AxolotlConfigWCapabilities(AxolotlConfigWCapabilitiesBase, {', '.join(plugin_classes)}):\n    pass\n"
        dynamic_input += f"class AxolotlInputConfig(AxolotlInputConfigBase, {', '.join(plugin_classes)}):\n    pass\n"
@@ -62,4 +64,5 @@ def merge_input_args():
            "AxolotlConfigWCapabilities"
        ]
        return AxolotlConfigWCapabilities, AxolotlInputConfig
+
    return AxolotlConfigWCapabilitiesBase, AxolotlInputConfigBase
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -812,6 +812,7 @@ class ModelLoader:

            if self.cfg.is_multimodal:
                self.model_config.text_config = self.text_model_config
+
            self.model = self.AutoModelLoader.from_pretrained(
                self.base_model,
                config=self.model_config,
--- a/src/axolotl/utils/yaml.py
+++ b/src/axolotl/utils/yaml.py
@@ -0,0 +1,157 @@
+"""Utilities for YAML files."""
+
+from collections import OrderedDict
+from typing import Any, Dict, List, Set, Tuple, Union
+
+import yaml
+
+
+class YAMLOrderTracker:
+    """Tracks the order of keys and section breaks in YAML files."""
+
+    def __init__(self, yaml_path: str):
+        self.yaml_path = yaml_path
+        self.structure, self.needs_break = self._parse_yaml_structure()
+
+    def _get_indentation_level(self, line: str) -> int:
+        """Get the indentation level of a line."""
+        return len(line) - len(line.lstrip())
+
+    def _parse_yaml_structure(
+        self,
+    ) -> Tuple[Dict[str, Union[List[str], Dict]], Set[str]]:
+        """Parse the YAML file to extract structure and identify section breaks."""
+        with open(self.yaml_path, "r", encoding="utf-8") as file:
+            contents = file.readlines()
+
+        structure: OrderedDict = OrderedDict()
+        needs_break = set()  # Track which keys should have a break before them
+        current_path = []
+        last_indentation = -1
+        had_empty_line = False
+
+        for line in contents:
+            # Track empty lines and comments
+            if not line.strip() or line.strip().startswith("#"):
+                had_empty_line = True
+                continue
+
+            # Get indentation level and content
+            indentation = self._get_indentation_level(line)
+            content = line.strip()
+
+            # Skip lines that don't define keys
+            if ":" not in content:
+                continue
+
+            # Extract key
+            key = content.split(":")[0].strip()
+
+            # If this is a top-level key and we had an empty line, mark it
+            if indentation == 0:
+                if had_empty_line:
+                    needs_break.add(key)
+                had_empty_line = False
+
+            # Handle indentation changes
+            if indentation > last_indentation:
+                current_path.append(key)
+            elif indentation < last_indentation:
+                levels_up = (last_indentation - indentation) // 2
+                current_path = current_path[:-levels_up]
+                current_path[-1] = key
+            else:
+                if current_path:
+                    current_path[-1] = key
+
+            # Update structure
+            current_dict = structure
+            for path_key in current_path[:-1]:
+                if path_key not in current_dict:
+                    current_dict[path_key] = OrderedDict()
+                current_dict = current_dict[path_key]
+
+            if current_path:
+                if current_path[-1] not in current_dict:
+                    current_dict[current_path[-1]] = OrderedDict()
+
+            last_indentation = indentation
+
+        return structure, needs_break
+
+
+class OrderedDumper(yaml.SafeDumper):
+    """Custom YAML dumper that maintains dictionary order."""
+
+
+def represent_none(self, _):
+    """Represent None values as empty fields."""
+    return self.represent_scalar("tag:yaml.org,2002:null", "")
+
+
+def ordered_dict_representer(dumper: OrderedDumper, data: Dict) -> Any:
+    """Custom representer for dictionaries that maintains order."""
+    return dumper.represent_mapping("tag:yaml.org,2002:map", data.items())
+
+
+def reorder_dict(data: Dict, reference_structure: Dict) -> OrderedDict:
+    """Reorder a dictionary based on a reference structure."""
+    ordered = OrderedDict()
+
+    # First add keys that are in the reference order
+    for key in reference_structure:
+        if key in data:
+            if isinstance(reference_structure[key], dict) and isinstance(
+                data[key], dict
+            ):
+                ordered[key] = reorder_dict(data[key], reference_structure[key])
+            else:
+                ordered[key] = data[key]
+
+    # Then add any remaining keys that weren't in the reference
+    for key in data:
+        if key not in ordered:
+            ordered[key] = data[key]
+
+    return ordered
+
+
+def dump_yaml_preserved_order(
+    data: Dict, reference_yaml_path: str, output_path: str
+) -> None:
+    """Dump YAML file while preserving nested order and normalized spacing."""
+    # Get reference structure and spacing
+    tracker = YAMLOrderTracker(reference_yaml_path)
+
+    # Reorder the data
+    ordered_data = reorder_dict(data, tracker.structure)
+
+    # Register the custom representers
+    OrderedDumper.add_representer(type(None), represent_none)
+    OrderedDumper.add_representer(dict, ordered_dict_representer)
+    OrderedDumper.add_representer(OrderedDict, ordered_dict_representer)
+
+    # First dump to string
+    yaml_str = yaml.dump(
+        ordered_data, Dumper=OrderedDumper, sort_keys=False, default_flow_style=False
+    )
+
+    # Add spacing according to reference
+    lines = yaml_str.split("\n")
+    result_lines: List[str] = []
+    current_line = 0
+
+    while current_line < len(lines):
+        line = lines[current_line]
+        if line.strip() and ":" in line and not line.startswith(" "):  # Top-level key
+            key = line.split(":")[0].strip()
+            if key in tracker.needs_break:
+                # Add single empty line before this key
+                if result_lines and result_lines[-1] != "":
+                    result_lines.append("")
+        result_lines.append(line)
+        current_line += 1
+
+    # Write the final result
+    with open(output_path, "w", encoding="utf-8") as file:
+        file.write("\n".join(result_lines))
--- a/tests/cli/test_cli_base.py
+++ b/tests/cli/test_cli_base.py
@@ -43,14 +43,12 @@ class BaseCliTest:
            result = cli_runner.invoke(cli, [command, str(config_path)])

            assert mock.called
-            assert mock.call_args.args[0] == [
+            assert mock.call_args.args[0][:5] == [
                "accelerate",
                "launch",
                "-m",
                f"axolotl.cli.{command}",
                str(config_path),
-                "--debug-num-examples",
-                "0",
            ]
            assert mock.call_args.kwargs == {"check": True}
            assert result.exit_code == 0
--- a/tests/cli/test_cli_interface.py
+++ b/tests/cli/test_cli_interface.py
@@ -23,6 +23,7 @@ def test_build_command():
        "--batch-size",
        "8",
        "--debug",
+        "--nouse-fp16",
    ]
Author	SHA1	Message	Date
Dan Saunders	2daa94080c	Merge branch 'main' into diff-transformer	2025-01-27 14:46:17 +00:00
Dan Saunders	0e9bfa6dee	small fixes, improvements	2025-01-24 19:53:54 +00:00
Dan Saunders	ef38f10274	merging into main	2025-01-24 18:03:27 +00:00
Dan Saunders	66262c3092	moving out all diff attn code to plugin repo	2025-01-24 17:46:11 +00:00
Dan Saunders	016ba124e4	README update	2025-01-23 22:11:35 +00:00
Dan Saunders	7145d52d99	moving diff attn code to separate repo	2025-01-23 21:33:53 +00:00
Dan Saunders	28694219a5	inline comment change	2025-01-14 16:59:43 +00:00
Dan Saunders	fd8ad6fcbf	fixing negative component mixing	2025-01-13 19:21:55 +00:00
Dan Saunders	661d71a14b	adding diff attn negative component warmup (in progress)	2025-01-10 21:57:31 +00:00
Dan Saunders	6dd47edcb8	fire CLI fixes	2025-01-10 18:24:16 +00:00
Dan Saunders	7aca08ff60	adding guard statements	2025-01-10 16:39:21 +00:00
Dan Saunders	4f804f6d88	adding diff attn callback, adding documentation	2025-01-10 16:28:51 +00:00
Dan Saunders	443327c585	CLI build_command bugfix	2025-01-10 16:28:51 +00:00
Dan Saunders	70c4e6fbe6	updates and cleanup	2025-01-10 16:28:51 +00:00
Dan Saunders	2a7f139ad2	pre-commit fix	2025-01-10 16:28:51 +00:00
Dan Saunders	332ce0ae85	fixes and cleanup	2025-01-10 16:28:51 +00:00
Dan Saunders	e5fa842ff8	update	2025-01-10 16:28:51 +00:00
Dan Saunders	78e0ec0aa5	changes	2025-01-10 16:28:51 +00:00
Dan Saunders	3bc568eb27	adding registration function	2025-01-10 16:28:51 +00:00
Dan Saunders	eb6611d55f	progress on modeling code	2025-01-10 16:28:51 +00:00
Dan Saunders	4ff3328e66	updated custom modeling code	2025-01-10 16:28:51 +00:00
Dan Saunders	a3fd5074a9	fix duplicate-code warnings	2025-01-10 16:28:51 +00:00
Dan Saunders	5b90da0be3	added modeling code; cleanup + refactor	2025-01-10 16:28:51 +00:00
Dan Saunders	fcbfa86373	refactor and fixing test isolation issues	2025-01-10 16:28:51 +00:00
Dan Saunders	0d56582090	adding yaml dumper preserving input config format	2025-01-10 16:28:51 +00:00
Dan Saunders	390cb5742e	removing extra pytest xdist args	2025-01-10 16:28:51 +00:00
Dan Saunders	1d935f65c3	moving tests around for flash_attn install	2025-01-10 16:28:51 +00:00
Dan Saunders	66176b3e07	adding split_heads argument for retaining original (Q, K) dimensionanlity	2025-01-10 16:28:51 +00:00
Dan Saunders	505321ac95	isolating problematic test	2025-01-10 16:28:51 +00:00
Dan Saunders	0b382c88da	fixes post-rebase	2025-01-10 16:28:51 +00:00
Dan Saunders	ea07a7086e	plugin implementation	2025-01-10 16:28:51 +00:00
Dan Saunders	d22e1136bc	convert-differential-transformer test coverage	2025-01-10 16:28:51 +00:00
Dan Saunders	63b8e42c6b	duplicate code ignore	2025-01-10 16:28:51 +00:00
Dan Saunders	bda1eed59e	differential flash attention 2; cleanup	2025-01-10 16:28:51 +00:00
Dan Saunders	41ebd93158	moving monkeypatch	2025-01-10 16:28:51 +00:00
Dan Saunders	4c050ce807	pre-commit fix	2025-01-10 16:28:51 +00:00
Dan Saunders	6665acf63d	fix model save / load logic	2025-01-10 16:28:51 +00:00
Dan Saunders	2f9fa4c465	various improvemnents	2025-01-10 16:28:51 +00:00
Dan Saunders	849bc94112	various improvemnents	2025-01-10 16:28:51 +00:00
Dan Saunders	e484ec778d	training fixes, patching, minor cleanup	2025-01-10 16:28:51 +00:00
Dan Saunders	df1504ae14	adding CLI command for convert-diff-transformer	2025-01-10 16:28:51 +00:00
Dan Saunders	7be0d7496c	Adding script for doing conversion; fixes and updates	2025-01-10 16:28:51 +00:00
Dan Saunders	13cdffa91f	initial diff attn layer / model conversion implementation (support for llama arch)	2025-01-10 16:28:51 +00:00
Dan Saunders	7a4b296f60	Basic evaluate CLI command / codepath (#2188 ) * basic evaluate CLI command / codepath * tests for evaluate CLI command * fixes and cleanup * review comments; slightly DRYing up things --------- Co-authored-by: Dan Saunders <danjsaund@gmail.com>	2025-01-10 16:28:51 +00:00