diff --git a/requirements.txt b/requirements.txt index f8fe20bf9..60b533372 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,12 @@ --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ packaging==23.2 peft==0.13.2 -transformers==4.46.2 +transformers==4.46.3 tokenizers>=0.20.1 bitsandbytes==0.44.1 accelerate==1.1.0 datasets==3.1.0 -deepspeed==0.15.3 +deepspeed==0.15.4 pydantic==2.6.3 addict fire diff --git a/src/axolotl/monkeypatch/trainer_fsdp_grad_accum.py b/src/axolotl/monkeypatch/trainer_fsdp_grad_accum.py deleted file mode 100644 index 6819fde11..000000000 --- a/src/axolotl/monkeypatch/trainer_fsdp_grad_accum.py +++ /dev/null @@ -1,83 +0,0 @@ -""" -fix for FSDP gradient accumulation -see https://github.com/huggingface/transformers/pull/34645 -""" -import inspect - -from accelerate.logging import get_logger -from transformers.trainer import Trainer - -from axolotl.monkeypatch.unsloth_ import detab_code - -LOG = get_logger("axolotl.monkeypatch.trainer_fsdp_grad_accumulation") - -ORIGINAL_CONTEXT_CODE = """ - context = ( - functools.partial(self.accelerator.no_sync, model=model) - if i == len(batch_samples) - 1 - else contextlib.nullcontext - ) -""" - -PATCHED_CONTEXT_CODE = """ - context = ( - functools.partial(self.accelerator.no_sync, model=model) - if i != len(batch_samples) - 1 - else contextlib.nullcontext - ) -""" - - -def get_training_loop_code() -> str: - training_loop = inspect.getsource( - Trainer._inner_training_loop # pylint: disable=protected-access - ) - return training_loop - - -def check_training_loop_is_patchable() -> bool: - train_loop = get_training_loop_code() - train_loop, _ = detab_code(train_loop) - return ORIGINAL_CONTEXT_CODE in train_loop - - -def patch_training_loop_for_fsdp_grad_accum(): - """ - monkeypatch for fixing the training loop for FSDP gradient accumulation - """ - - train_loop = get_training_loop_code() - Trainer._original_inner_training_loop = ( # pylint: disable=protected-access - train_loop - ) - train_loop, _ = detab_code(train_loop) - assert ( - ORIGINAL_CONTEXT_CODE in train_loop - ), "Original _inner_training_loop code not found" - - train_loop = train_loop.replace(ORIGINAL_CONTEXT_CODE, PATCHED_CONTEXT_CODE) - train_loop = train_loop.replace( - "def _inner_training_loop(", - "def _fixed_inner_training_loop(", - 1, - ) - - # load imports necessary - import transformers.trainer - - items_to_import = [] - for item in dir(transformers.trainer): - if item in train_loop: - items_to_import.append(item) - - exec( # pylint: disable=exec-used # nosec B102 - "from transformers.trainer import (" - + ", ".join(x for x in items_to_import) - + ")", - globals(), - ) - exec(train_loop, globals()) # pylint: disable=exec-used # nosec B102 - LOG.info("patching _inner_training_loop", main_process_only=True) - Trainer._inner_training_loop = ( # pylint: disable=protected-access - _fixed_inner_training_loop # pylint: disable=undefined-variable # noqa: F821 - ) diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index 5c9bfd663..2d3a6944f 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -16,9 +16,6 @@ from torch.utils.data import DataLoader, RandomSampler from transformers.utils import is_torch_bf16_gpu_available from axolotl.core.trainer_builder import HFCausalTrainerBuilder, HFRLTrainerBuilder -from axolotl.monkeypatch.trainer_fsdp_grad_accum import ( - patch_training_loop_for_fsdp_grad_accum, -) from axolotl.utils.distributed import reduce_and_broadcast from axolotl.utils.environment import check_cuda_p2p_ib_support from axolotl.utils.samplers import MultipackBatchSampler, get_dataset_lengths @@ -496,11 +493,6 @@ def prepare_opinionated_env(cfg): def setup_trainer( cfg, train_dataset, eval_dataset, model, tokenizer, processor, total_num_steps ): - if cfg.fsdp: - try: - patch_training_loop_for_fsdp_grad_accum() - except AssertionError: - pass if cfg.rl in ["dpo", "ipo", "orpo", "kto", "simpo"]: trainer_builder = HFRLTrainerBuilder(cfg, model[0], tokenizer, processor) trainer_builder.model_ref = model[1] diff --git a/tests/e2e/patched/test_trainer_fsdp.py b/tests/e2e/patched/test_trainer_fsdp.py deleted file mode 100644 index 1095cff3c..000000000 --- a/tests/e2e/patched/test_trainer_fsdp.py +++ /dev/null @@ -1,15 +0,0 @@ -"""Test module for checking whether the integration of Unsloth with Hugging Face Transformers is working as expected.""" -import unittest - -from axolotl.monkeypatch.trainer_fsdp_grad_accum import check_training_loop_is_patchable - - -class TestTrainerFSDPIntegration(unittest.TestCase): - """Unsloth monkeypatch integration tests.""" - - def test_train_loop_patchable(self): - # ensures the current version of transformers has loss code that matches our patching code - self.assertTrue( - check_training_loop_is_patchable(), - "HF transformers _inner_training_loop has changed and isn't patchable", - )