diff --git a/requirements.txt b/requirements.txt index 1f7ac7bba..4de3d3470 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ liger-kernel==0.5.2 packaging==23.2 peft==0.14.0 -transformers==4.47.1 +transformers==4.48.0 tokenizers>=0.21.0 accelerate==1.2.1 datasets==3.2.0 diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 1cc374514..b3e97e3b2 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -387,12 +387,13 @@ class ModelLoader: self.patch_attention() if self.cfg.model_config_type == "llama": - from axolotl.monkeypatch.trainer_grad_accum import ( # patch_flash_attention_forward, + from axolotl.monkeypatch.trainer_grad_accum import ( + patch_flash_attention_forward, patch_forward_for_ga, patch_training_step_for_ga, ) - # patch_flash_attention_forward() + patch_flash_attention_forward() patch_forward_for_ga() patch_training_step_for_ga()