support for latest transformers release 4.48.1 (#2256)

2025-01-23 21:17:57 -05:00
parent 8fb72cbc0b
commit 8a7a0b07dc
13 changed files with 98 additions and 363 deletions
--- a/tests/e2e/multigpu/test_llama.py
+++ b/tests/e2e/multigpu/test_llama.py
@@ -63,6 +63,7 @@ class TestMultiGPULlama:
                "lr_scheduler": "cosine",
                "flash_attention": True,
                "use_tensorboard": True,
+                "bf16": True,
            }
        )

@@ -127,6 +128,7 @@ class TestMultiGPULlama:
                "lr_scheduler": "cosine",
                "flash_attention": True,
                "use_tensorboard": True,
+                "bf16": True,
            }
        )

@@ -201,6 +203,7 @@ class TestMultiGPULlama:
                "lr_scheduler": "cosine",
                "flash_attention": True,
                "use_tensorboard": True,
+                "bf16": True,
            }
        )

@@ -223,8 +226,12 @@ class TestMultiGPULlama:
            ]
        )

+        loss_threshold = 2.3
        check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
+            temp_dir + "/runs",
+            "train/train_loss",
+            loss_threshold,
+            "Train Loss is too high",
        )

    def test_dpo_qlora_ddp(self, temp_dir):
@@ -275,6 +282,7 @@ class TestMultiGPULlama:
                "lr_scheduler": "cosine",
                "flash_attention": True,
                "use_tensorboard": True,
+                "bf16": True,
            }
        )

@@ -297,8 +305,12 @@ class TestMultiGPULlama:
            ]
        )

+        loss_threshold = 2.3
        check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
+            temp_dir + "/runs",
+            "train/train_loss",
+            loss_threshold,
+            "Train Loss is too high",
        )

    @pytest.mark.parametrize(
--- a/tests/e2e/patched/test_mixtral_samplepack.py
+++ b/tests/e2e/patched/test_mixtral_samplepack.py
@@ -102,9 +102,5 @@ class TestMixtral(unittest.TestCase):
        cli_args = TrainerCliArgs()
        dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)

-        model, _ = train(cfg=cfg, dataset_meta=dataset_meta)
-        assert (
-            "MixtralFlashAttention2"
-            in model.model.layers[0].self_attn.__class__.__name__
-        )
+        train(cfg=cfg, dataset_meta=dataset_meta)
        check_model_output_exists(temp_dir, cfg)
--- a/tests/e2e/patched/test_model_patches.py
+++ b/tests/e2e/patched/test_model_patches.py
@@ -49,12 +49,7 @@ class TestModelPatches(unittest.TestCase):
        )
        normalize_config(cfg)
        tokenizer = load_tokenizer(cfg)
-        model, _ = load_model(cfg, tokenizer, inference=False)
-
-        assert (
-            "MixtralFlashAttention2"
-            in model.model.layers[0].self_attn.__class__.__name__
-        )
+        load_model(cfg, tokenizer, inference=False)

    @with_temp_dir
    def test_mistral_multipack(self, temp_dir):
--- a/tests/e2e/patched/test_unsloth_integration.py
+++ b/tests/e2e/patched/test_unsloth_integration.py
@@ -3,8 +3,6 @@ import unittest

 import pytest

-from axolotl.monkeypatch.unsloth_ import check_self_attn_is_patchable
-

@pytest.mark.skip(
    reason="Unsloth integration will be broken going into latest transformers"
@@ -13,6 +11,8 @@ class TestUnslothIntegration(unittest.TestCase):
    """Unsloth monkeypatch integration tests."""

    def test_is_self_attn_patchable(self):
+        from axolotl.monkeypatch.unsloth_ import check_self_attn_is_patchable
+
        # ensures the current version of transformers has loss code that matches our patching code
        self.assertTrue(
            check_self_attn_is_patchable(),
--- a/tests/e2e/solo/init.py
+++ b/tests/e2e/solo/init.py
--- a/tests/e2e/solo/test_relora_llama.py
+++ b/tests/e2e/solo/test_relora_llama.py
@@ -13,7 +13,7 @@ from axolotl.train import train
 from axolotl.utils.config import normalize_config
 from axolotl.utils.dict import DictDefault

-from .utils import check_model_output_exists, check_tensorboard, with_temp_dir
+from ..utils import check_model_output_exists, check_tensorboard, with_temp_dir

 LOG = logging.getLogger("axolotl.tests.e2e")
 os.environ["WANDB_DISABLED"] = "true"
--- a/tests/patched/test_llama_trainer_ga.py
+++ b/tests/patched/test_llama_trainer_ga.py
@@ -1,25 +0,0 @@
-""""Test module for checking whether the Hugging Face Transformers is working as expected."""
-import unittest
-
-from axolotl.monkeypatch.trainer_grad_accum import (
-    check_forward_is_patchable,
-    check_training_step_is_patchable,
-)
-
-
-class TestTrainerGAIntegration(unittest.TestCase):
-    """llama monkeypatch integration tests."""
-
-    def test_train_step_patchable(self):
-        # ensures the current version of transformers has loss code that matches our patching code
-        self.assertTrue(
-            check_training_step_is_patchable(),
-            "HF transformers Trainer.training_step has changed and isn't patchable",
-        )
-
-    def test_model_forward_patchable(self):
-        # ensures the current version of transformers has loss code that matches our patching code
-        self.assertTrue(
-            check_forward_is_patchable(),
-            "HF transformers LlamaForCausalLM.forward has changed and isn't patchable",
-        )