convert-differential-transformer test coverage

2024-12-17 20:46:19 +00:00
parent 63b8e42c6b
commit d22e1136bc
8 changed files with 368 additions and 189 deletions
--- a/src/axolotl/cli/integrations/convert_diff_transformer.py
+++ b/src/axolotl/cli/integrations/convert_diff_transformer.py
@@ -1,185 +0,0 @@
-"""CLI to convert a transformers model's attns to diff attns."""
-import logging
-import warnings
-from pathlib import Path
-from time import time
-from typing import Union
-
-import fire
-import torch
-import yaml
-from colorama import Fore
-from dotenv import load_dotenv
-from transformers import HfArgumentParser
-
-from axolotl.cli import load_cfg, print_axolotl_text_art
-from axolotl.common.cli import ConvertDiffTransformerCliArgs, load_model_and_tokenizer
-from axolotl.integrations.differential_transformer.convert import (
-    convert_to_diff_attention,
-)
-
-LOG = logging.getLogger("axolotl.cli.convert_attention")
-
-
-def test_inference(model, tokenizer, prompt="The quick brown fox"):
-    """Run test inference and return generation time"""
-    try:
-        inputs = tokenizer(prompt, return_tensors="pt")
-        inputs = {
-            k: v.to(device=model.device, dtype=torch.long) for k, v in inputs.items()
-        }
-
-        start = time()
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=20,
-                num_beams=1,
-                do_sample=False,
-                pad_token_id=tokenizer.pad_token_id,
-                use_cache=False,
-            )
-        elapsed = time() - start
-
-        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        LOG.info("Prompt: %s", prompt)
-        LOG.info("Generated: %s", generated_text)
-        LOG.info("Generation time: %.2fs", elapsed)
-
-        return elapsed, generated_text
-
-    except Exception as exc:
-        LOG.error("Inference failed: %s", str(exc))
-        raise
-
-
-def convert_diff_transformer(cfg, cli_args, config_path):
-    try:
-        # Load model and tokenizer
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            model, tokenizer = load_model_and_tokenizer(cfg=cfg, cli_args=cli_args)
-            model.to(cfg.device, dtype=cfg.torch_dtype)
-
-        # Log original model info
-        LOG.info(
-            "Original model config:\n\t- Hidden size: %d\n\t- Num attention heads: %d",
-            model.config.hidden_size,
-            model.config.num_attention_heads,
-        )
-
-        # Test original model
-        if cli_args.debug:
-            LOG.info("Testing original model...")
-            orig_time, orig_text = test_inference(model, tokenizer)
-
-        # Convert attention
-        LOG.info("Converting to differential attention...")
-        try:
-            model = convert_to_diff_attention(
-                model=model,
-                zero_init=cli_args.zero_init,
-                sublayer_norm=cli_args.sublayer_norm,
-            )
-            model.to(cfg.device, dtype=cfg.torch_dtype)
-        except Exception as exc:
-            LOG.error(Fore.RED + "Conversion failed: %s" + Fore.RESET, str(exc))
-            raise
-
-        # Test converted model
-        if cli_args.debug:
-            LOG.info("Testing converted model...")
-            conv_time, conv_text = test_inference(model, tokenizer)
-
-        # Save if requested
-        if cfg.output_dir:
-            # Save model and tokenizer
-            LOG.info("Saving converted model to %s", cfg.output_dir)
-            model.save_pretrained(cfg.output_dir)
-            tokenizer.save_pretrained(cfg.output_dir)
-
-            # Modify config to reflect new path / differential attention
-            output_config_path = Path(cfg.output_dir) / "axolotl_config.yml"
-            LOG.info("Saving updated config to %s", output_config_path)
-
-            with open(config_path, "r", encoding="utf-8") as file:
-                data = yaml.safe_load(file) or {}
-
-            data["base_model"] = cfg.output_dir
-            data["diff_attention"] = True
-
-            with open(output_config_path, "w", encoding="utf-8") as file:
-                yaml.dump(data, file)
-        else:
-            LOG.info("Not saving converted model to disk")
-            LOG.info("Pass --output-dir path/to/save to save model")
-
-        if cli_args.debug:
-            LOG.info(
-                Fore.GREEN
-                + "Conversion successful!\n"
-                + f"Original generation time: {orig_time:.2f}s\n"
-                + f"Converted generation time: {conv_time:.2f}s"
-                + Fore.RESET
-            )
-
-            if orig_text == conv_text:
-                LOG.info(
-                    Fore.GREEN
-                    + "Generations match!\n"
-                    + "Model generation:\n"
-                    + "*" * 50
-                    + "\n"
-                    + f"{orig_text}\n"
-                    + "*" * 50
-                    + "\n"
-                    + Fore.RESET
-                )
-            else:
-                message = (
-                    "Generations do not match.\n"
-                    + "Original generation:\n"
-                    + "*" * 50
-                    + "\n"
-                    + f"{orig_text}\n"
-                    + "*" * 50
-                    + "\n"
-                    + "Converted generation:\n"
-                    + "*" * 50
-                    + "\n"
-                    + f"{conv_text}\n"
-                    + "*" * 50
-                    + "\n"
-                )
-
-                if cli_args.zero_init and not cli_args.sublayer_norm:
-                    LOG.info(Fore.RED + message + Fore.RESET)
-                else:
-                    LOG.info(
-                        Fore.YELLOW
-                        + message
-                        + "However, this is expected since --zero-init"
-                        + " and --no-sublayer-norm were not passed."
-                        + Fore.RESET
-                    )
-
-        return model
-
-    except Exception as exc:
-        LOG.error(Fore.RED + "Process failed: %s" + Fore.RESET, str(exc))
-        raise
-
-
-def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs):
-    print_axolotl_text_art()
-
-    cfg = load_cfg(config, **kwargs)
-    parser = HfArgumentParser(ConvertDiffTransformerCliArgs)
-    cli_args, _ = parser.parse_args_into_dataclasses(return_remaining_strings=True)
-
-    convert_diff_transformer(cfg, cli_args, config)
-
-
-if __name__ == "__main__":
-    load_dotenv()
-    fire.Fire(do_cli)
--- a/src/axolotl/cli/integrations/convert_differential_transformer.py
+++ b/src/axolotl/cli/integrations/convert_differential_transformer.py
@@ -0,0 +1,190 @@
+"""CLI to convert a transformers model's attns to diff attns."""
+import logging
+import warnings
+from pathlib import Path
+from time import time
+from typing import Union
+
+import fire
+import torch
+import yaml
+from colorama import Fore
+from dotenv import load_dotenv
+from transformers import HfArgumentParser
+
+from axolotl.cli import load_cfg, print_axolotl_text_art
+from axolotl.common.cli import ConvertDiffTransformerCliArgs, load_model_and_tokenizer
+from axolotl.integrations.differential_transformer.convert import (
+    convert_to_diff_attention,
+)
+
+LOG = logging.getLogger(__name__)
+
+
+def test_inference(model, tokenizer, prompt="The quick brown fox"):
+    """Run test inference and return generation time"""
+    try:
+        inputs = tokenizer(prompt, return_tensors="pt")
+        inputs = {
+            k: v.to(device=model.device, dtype=torch.long) for k, v in inputs.items()
+        }
+
+        start = time()
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=20,
+                num_beams=1,
+                do_sample=False,
+                pad_token_id=tokenizer.pad_token_id,
+                use_cache=False,
+            )
+        elapsed = time() - start
+
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        LOG.info("Prompt: %s", prompt)
+        LOG.info("Generated: %s", generated_text)
+        LOG.info("Generation time: %.2fs", elapsed)
+
+        return elapsed, generated_text
+
+    except Exception as exc:
+        LOG.error("Inference failed: %s", str(exc))
+        raise
+
+
+def convert_differential_transformer(cfg, cli_args, config_path):
+    debug_info = {}
+
+    # Load model and tokenizer
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        model, tokenizer = load_model_and_tokenizer(cfg=cfg, cli_args=cli_args)
+        model.to(cfg.device, dtype=cfg.torch_dtype)
+
+    # Log original model info
+    LOG.info(
+        "Original model config:\n\t- Hidden size: %d\n\t- Num attention heads: %d",
+        model.config.hidden_size,
+        model.config.num_attention_heads,
+    )
+
+    # Test original model
+    if cli_args.debug:
+        LOG.info("Testing original model...")
+        debug_info["orig_time"], debug_info["orig_text"] = test_inference(
+            model, tokenizer
+        )
+
+    # Convert attention
+    LOG.info("Converting to differential attention...")
+    try:
+        model = convert_to_diff_attention(
+            model=model,
+            zero_init=cli_args.zero_init,
+            sublayer_norm=cli_args.sublayer_norm,
+        )
+        model.to(cfg.device, dtype=cfg.torch_dtype)
+    except Exception as exc:
+        LOG.error(Fore.RED + "Conversion failed: %s" + Fore.RESET, str(exc))
+        raise
+
+    # Test converted model
+    if cli_args.debug:
+        LOG.info("Testing converted model...")
+        debug_info["conv_time"], debug_info["conv_text"] = test_inference(
+            model, tokenizer
+        )
+
+    # Save if requested
+    if cfg.output_dir:
+        # Save model and tokenizer
+        LOG.info("Saving converted model to %s", cfg.output_dir)
+        model.save_pretrained(cfg.output_dir)
+        tokenizer.save_pretrained(cfg.output_dir)
+
+        # Modify config to reflect new path / differential attention
+        output_config_path = Path(cfg.output_dir) / "axolotl_config.yml"
+        LOG.info("Saving updated config to %s", output_config_path)
+
+        with open(config_path, "r", encoding="utf-8") as file:
+            data = yaml.safe_load(file) or {}
+
+        data["base_model"] = cfg.output_dir
+        data["diff_attention"] = True
+
+        with open(output_config_path, "w", encoding="utf-8") as file:
+            yaml.dump(data, file)
+    else:
+        LOG.info("Not saving converted model to disk")
+        LOG.info("Pass --output-dir path/to/save to save model")
+
+    if cli_args.debug:
+        LOG.info(
+            Fore.GREEN
+            + "Conversion successful!\n"
+            + f"Original generation time: {debug_info['orig_time']:.2f}s\n"
+            + f"Converted generation time: {debug_info['conv_time']:.2f}s"
+            + Fore.RESET
+        )
+
+        if debug_info["orig_text"] == debug_info["conv_text"]:
+            LOG.info(
+                Fore.GREEN
+                + "Generations match!\n"
+                + "Model generation:\n"
+                + "*" * 50
+                + "\n"
+                + f"{debug_info['orig_text']}\n"
+                + "*" * 50
+                + "\n"
+                + Fore.RESET
+            )
+            debug_info["generations_match"] = True
+        else:
+            message = (
+                "Generations do not match.\n"
+                + "Original generation:\n"
+                + "*" * 50
+                + "\n"
+                + f"{debug_info['orig_text']}\n"
+                + "*" * 50
+                + "\n"
+                + "Converted generation:\n"
+                + "*" * 50
+                + "\n"
+                + f"{debug_info['conv_text']}\n"
+                + "*" * 50
+                + "\n"
+            )
+            debug_info["generations_match"] = False
+
+            if cli_args.zero_init and not cli_args.sublayer_norm:
+                LOG.info(Fore.RED + message + Fore.RESET)
+                debug_info["match_expected"] = True
+            else:
+                LOG.info(
+                    Fore.YELLOW
+                    + message
+                    + "However, this is expected since --zero-init"
+                    + " and --no-sublayer-norm were not passed."
+                    + Fore.RESET
+                )
+                debug_info["match_expected"] = False
+
+    return model, debug_info
+
+
+def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs):
+    print_axolotl_text_art()
+
+    cfg = load_cfg(config, **kwargs)
+    parser = HfArgumentParser(ConvertDiffTransformerCliArgs)
+    cli_args, _ = parser.parse_args_into_dataclasses(return_remaining_strings=True)
+
+    convert_differential_transformer(cfg, cli_args, config)
+
+
+if __name__ == "__main__":
+    load_dotenv()
+    fire.Fire(do_cli)
--- a/src/axolotl/cli/main.py
+++ b/src/axolotl/cli/main.py
@@ -249,11 +249,11 @@ def merge_lora(
@click.argument("config", type=click.Path(exists=True, path_type=str))
@add_options_from_dataclass(ConvertDiffTransformerCliArgs)
@add_options_from_config(AxolotlInputConfig)
-def convert_diff_transformer(config: str, **kwargs):
+def convert_differential_transformer(config: str, **kwargs):
    """Convert model attention layers to differential attention layers."""
    kwargs = {k: v for k, v in kwargs.items() if v is not None}

-    from axolotl.cli.integrations.convert_diff_transformer import do_cli
+    from axolotl.cli.integrations.convert_differential_transformer import do_cli

    do_cli(config=config, **kwargs)

--- a/src/axolotl/common/cli.py
+++ b/src/axolotl/common/cli.py
@@ -57,7 +57,7 @@ class EvaluateCliArgs:
@dataclass
 class ConvertDiffTransformerCliArgs:
    """
-    dataclass with arguments for convert-diff-transformer CLI
+    dataclass with arguments for convert-differential-transformer CLI
    """

    debug: bool = field(default=False)
--- a/src/axolotl/integrations/differential_transformer/convert.py
+++ b/src/axolotl/integrations/differential_transformer/convert.py
@@ -17,7 +17,6 @@ from .differential_attention import (
    LlamaDifferentialSdpaAttention,
 )

-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)

 ATTENTION_MAPPING = {