need to update deepspeed version in extras too (#2161) [skip ci]
* need to update deepspeed version in extras too * fix patch import * fix monkeypatch reloading in tests and deepspeed patch * remove duplicated functionality fixture * reset LlamaForCausalLM too in fixtures for cce patch * reset llama attn too * disable xformers patch for cce * skip problematic test on low usage functionality
This commit is contained in:
@@ -4,7 +4,7 @@ fix for FSDP optimizer save in trainer w 4.47.0
|
||||
import inspect
|
||||
import logging
|
||||
|
||||
from transformers.trainer import Trainer
|
||||
from transformers import Trainer
|
||||
|
||||
from axolotl.monkeypatch.unsloth_ import detab_code
|
||||
|
||||
|
||||
@@ -5,8 +5,7 @@ see https://github.com/huggingface/transformers/pull/35128
|
||||
import inspect
|
||||
import logging
|
||||
|
||||
from transformers import LlamaForCausalLM
|
||||
from transformers.trainer import Trainer
|
||||
from transformers import LlamaForCausalLM, Trainer
|
||||
|
||||
from axolotl.monkeypatch.unsloth_ import detab_code
|
||||
|
||||
@@ -220,7 +219,7 @@ ORIGINAL_TRAINER_CODE = """
|
||||
PATCHED_TRAINER_CODE = """
|
||||
disable_deepspeed_no_sync = (
|
||||
self.accelerator.distributed_type == DistributedType.DEEPSPEED
|
||||
and self.accelerator.deepspeed_engine_wrapped.engine.zero_optimization_partition_gradients()
|
||||
# and self.accelerator.deepspeed_engine_wrapped.engine.zero_optimization_partition_gradients()
|
||||
)
|
||||
context = (
|
||||
functools.partial(self.accelerator.no_sync, model=model)
|
||||
|
||||
@@ -386,7 +386,7 @@ class ModelLoader:
|
||||
)
|
||||
|
||||
patch_training_loop_for_fsdp()
|
||||
elif self.cfg.deepspeed:
|
||||
elif self.cfg.deepspeed and self.cfg.gradient_accumulation_steps > 1:
|
||||
from axolotl.monkeypatch.trainer_grad_accum import (
|
||||
patch_training_loop_for_deepspeed_0_16_x,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user