From 1d2aa1e4670012c3a4dea62b883abc489629790a Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 27 Jul 2025 17:05:12 -0400 Subject: [PATCH] upgrade to support latest transformers release (#2984) * upgrade to support latest transformers release * bump mistral common too * Fix dependencies --- .github/workflows/multi-gpu-e2e.yml | 4 ++-- cicd/multigpu.sh | 6 ++++-- requirements.txt | 6 +++--- setup.py | 16 +++++++++------- src/axolotl/core/builders/base.py | 1 + .../monkeypatch/ring_attn/adapters/batch.py | 15 ++++++++++----- 6 files changed, 29 insertions(+), 19 deletions(-) diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml index f26201ef0..308526151 100644 --- a/.github/workflows/multi-gpu-e2e.yml +++ b/.github/workflows/multi-gpu-e2e.yml @@ -37,14 +37,14 @@ jobs: cuda_version: 12.6.3 python_version: "3.11" pytorch: 2.7.0 - axolotl_extras: vllm + axolotl_extras: num_gpus: 2 nightly_build: "true" - cuda: 126 cuda_version: 12.6.3 python_version: "3.11" pytorch: 2.7.1 - axolotl_extras: + axolotl_extras: vllm num_gpus: 2 nightly_build: "true" runs-on: [self-hosted, modal] diff --git a/cicd/multigpu.sh b/cicd/multigpu.sh index 1f74cd67d..4fd5672be 100755 --- a/cicd/multigpu.sh +++ b/cicd/multigpu.sh @@ -19,5 +19,7 @@ pytest -v --durations=10 -n1 /workspace/axolotl/tests/e2e/multigpu/patched/ \ --cov-append \ --cov-report=xml:multigpu-coverage.xml -# Upload coverage to Codecov -codecov upload-process -t "${CODECOV_TOKEN}" -f multigpu-coverage.xml -F multigpu,docker-tests,pytorch-${PYTORCH_VERSION} || true +# Upload coverage to Codecov if CODECOV_TOKEN is available +if [ -n "$CODECOV_TOKEN" ]; then + codecov upload-process -t "${CODECOV_TOKEN}" -f multigpu-coverage.xml -F multigpu,docker-tests,pytorch-${PYTORCH_VERSION} || true +fi diff --git a/requirements.txt b/requirements.txt index 6e3c1097e..8e473bf6b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,13 +13,13 @@ packaging==23.2 huggingface_hub>=0.33.0 peft==0.16.0 -transformers==4.53.2 +transformers==4.54.0 tokenizers>=0.21.1 accelerate==1.9.0 datasets==4.0.0 deepspeed>=0.17.0 trl==0.19.1 -hf_xet==1.1.2 +hf_xet==1.1.5 optimum==1.16.2 hf_transfer @@ -68,4 +68,4 @@ schedulefree==1.4.1 axolotl-contribs-lgpl==0.0.6 axolotl-contribs-mit==0.0.3 -mistral-common==1.7.0 +mistral-common==1.8.3 diff --git a/setup.py b/setup.py index df9a23154..6576c44e5 100644 --- a/setup.py +++ b/setup.py @@ -68,9 +68,10 @@ def parse_requirements(extras_require_map): _install_requires.pop(_install_requires.index(xformers_version)) if patch == 0: _install_requires.append("xformers==0.0.30") + # vllm 0.9.x is incompatible with latest transformers + extras_require_map.pop("vllm") else: - _install_requires.append("xformers==0.0.31.post1") - extras_require_map["vllm"] = ["vllm>=0.9.0"] + _install_requires.append("xformers==0.0.31") elif (major, minor) >= (2, 6): _install_requires.pop(_install_requires.index(xformers_version)) _install_requires.append("xformers==0.0.29.post3") @@ -84,7 +85,9 @@ def parse_requirements(extras_require_map): else: _install_requires.append("xformers>=0.0.28.post3") _install_requires.pop(_install_requires.index(autoawq_version)) + extras_require_map.pop("vllm") elif (major, minor) >= (2, 4): + extras_require_map.pop("vllm") if patch == 0: _install_requires.pop(_install_requires.index(xformers_version)) _install_requires.append("xformers>=0.0.27") @@ -114,10 +117,10 @@ def get_package_version(): extras_require = { - "flash-attn": ["flash-attn==2.8.0.post2"], + "flash-attn": ["flash-attn==2.8.2"], "ring-flash-attn": [ - "flash-attn==2.8.0.post2", - "ring-flash-attn>=0.1.5", + "flash-attn==2.8.2", + "ring-flash-attn>=0.1.7", "yunchang==0.6.0", ], "deepspeed": [ @@ -151,13 +154,12 @@ extras_require = { "ray[train]", ], "vllm": [ - "vllm==0.7.2", + "vllm==0.10.0", ], "llmcompressor": [ "llmcompressor==0.5.1", ], } - install_requires, dependency_links, extras_require_build = parse_requirements( extras_require ) diff --git a/src/axolotl/core/builders/base.py b/src/axolotl/core/builders/base.py index d3a3b3242..0a37d2766 100644 --- a/src/axolotl/core/builders/base.py +++ b/src/axolotl/core/builders/base.py @@ -500,6 +500,7 @@ class TrainerBuilderBase(abc.ABC): training_args_kwargs[arg] = getattr(self.cfg, arg) training_args_kwargs["per_device_train_batch_size"] = self.cfg.micro_batch_size + training_args_kwargs["average_tokens_across_devices"] = False if self.cfg.eval_batch_size: training_args_kwargs["per_device_eval_batch_size"] = ( diff --git a/src/axolotl/monkeypatch/ring_attn/adapters/batch.py b/src/axolotl/monkeypatch/ring_attn/adapters/batch.py index 5e56bdd04..ebed9ebdc 100644 --- a/src/axolotl/monkeypatch/ring_attn/adapters/batch.py +++ b/src/axolotl/monkeypatch/ring_attn/adapters/batch.py @@ -18,10 +18,15 @@ import transformers import transformers.modeling_flash_attention_utils from ring_flash_attn import ring_flash_attn_func from ring_flash_attn.adapters.hf_adapter import check_params -from transformers.modeling_flash_attention_utils import ( - _flash_supports_window_size, - is_flash_attn_greater_or_equal, -) +from transformers.modeling_flash_attention_utils import is_flash_attn_greater_or_equal + +try: + from transformers.modeling_flash_attention_utils import _flash_supports_window +except ImportError: + from transformers.modeling_flash_attention_utils import ( + _flash_supports_window_size as _flash_supports_window, + ) + from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS from axolotl.utils.schemas.enums import RingAttnFunc @@ -112,7 +117,7 @@ def create_flash_attn_forward_varlen_llama3( # Handle sliding window use_sliding_windows = ( - _flash_supports_window_size + _flash_supports_window and sliding_window is not None and key_states.shape[1] > sliding_window )