need to update deepspeed version in extras too (#2161) [skip ci]

* need to update deepspeed version in extras too

* fix patch import

* fix monkeypatch reloading in tests and deepspeed patch

* remove duplicated functionality fixture

* reset LlamaForCausalLM too in fixtures for cce patch

* reset llama attn too

* disable xformers patch for cce

* skip problematic test on low usage functionality
This commit is contained in:
Wing Lian
2024-12-09 14:01:44 -05:00
committed by GitHub
parent 5d6b088997
commit ab4b32187d
10 changed files with 60 additions and 45 deletions

View File

@@ -4,7 +4,7 @@ fix for FSDP optimizer save in trainer w 4.47.0
import inspect
import logging
from transformers.trainer import Trainer
from transformers import Trainer
from axolotl.monkeypatch.unsloth_ import detab_code

View File

@@ -5,8 +5,7 @@ see https://github.com/huggingface/transformers/pull/35128
import inspect
import logging
from transformers import LlamaForCausalLM
from transformers.trainer import Trainer
from transformers import LlamaForCausalLM, Trainer
from axolotl.monkeypatch.unsloth_ import detab_code
@@ -220,7 +219,7 @@ ORIGINAL_TRAINER_CODE = """
PATCHED_TRAINER_CODE = """
disable_deepspeed_no_sync = (
self.accelerator.distributed_type == DistributedType.DEEPSPEED
and self.accelerator.deepspeed_engine_wrapped.engine.zero_optimization_partition_gradients()
# and self.accelerator.deepspeed_engine_wrapped.engine.zero_optimization_partition_gradients()
)
context = (
functools.partial(self.accelerator.no_sync, model=model)

View File

@@ -386,7 +386,7 @@ class ModelLoader:
)
patch_training_loop_for_fsdp()
elif self.cfg.deepspeed:
elif self.cfg.deepspeed and self.cfg.gradient_accumulation_steps > 1:
from axolotl.monkeypatch.trainer_grad_accum import (
patch_training_loop_for_deepspeed_0_16_x,
)