From a531e9d946b83f04f56ac46442b5d7b3b3f8cdc0 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 21 Jan 2026 20:00:18 -0500 Subject: [PATCH] upgrade vllm to v0.14.0 (#3345) --- .github/workflows/main.yml | 2 +- .github/workflows/multi-gpu-e2e.yml | 14 +++++++++----- .github/workflows/pypi.yml | 2 +- .github/workflows/tests-nightly.yml | 2 +- .github/workflows/tests.yml | 6 +++--- README.md | 2 +- cicd/Dockerfile-uv.jinja | 2 +- cicd/Dockerfile.jinja | 2 +- cicd/multigpu.py | 6 +++++- docker/Dockerfile-base | 2 +- docker/Dockerfile-base-nightly | 2 +- examples/apertus/README.md | 2 +- examples/arcee/README.md | 2 +- examples/devstral/README.md | 2 +- examples/gemma3n/README.md | 2 +- examples/gpt-oss/README.md | 2 +- examples/granite4/README.md | 2 +- examples/hunyuan/README.md | 2 +- examples/magistral/README.md | 2 +- examples/qwen3-next/README.md | 2 +- examples/voxtral/README.md | 2 +- pyproject.toml | 2 +- requirements.txt | 4 ++-- setup.py | 5 +++++ 24 files changed, 43 insertions(+), 30 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e081f2127..0e1ccb89a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -38,7 +38,7 @@ jobs: cuda_version: 12.9.1 python_version: "3.12" pytorch: 2.9.1 - axolotl_extras: + axolotl_extras: vllm platforms: "linux/amd64,linux/arm64" - cuda: 130 cuda_version: 13.0.0 diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml index 833dc4f29..107572ad6 100644 --- a/.github/workflows/multi-gpu-e2e.yml +++ b/.github/workflows/multi-gpu-e2e.yml @@ -35,14 +35,19 @@ jobs: pytorch: 2.8.0 axolotl_extras: fbgemm-gpu num_gpus: 2 - nightly_build: "true" - cuda: 128 cuda_version: 12.8.1 python_version: "3.11" pytorch: 2.9.1 - axolotl_extras: fbgemm-gpu + axolotl_extras: "fbgemm-gpu" num_gpus: 2 - nightly_build: "true" + - cuda: 129 + cuda_version: 12.9.1 + python_version: "3.12" + pytorch: 2.9.1 + axolotl_extras: "fbgemm-gpu,vllm" + num_gpus: 2 + dockerfile: "Dockerfile-uv.jinja" - cuda: 130 cuda_version: 13.0.0 python_version: "3.11" @@ -50,7 +55,6 @@ jobs: axolotl_extras: # axolotl_extras: fbgemm-gpu num_gpus: 2 - nightly_build: "true" runs-on: [self-hosted, modal] timeout-minutes: 120 steps: @@ -72,8 +76,8 @@ jobs: echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV - echo "NIGHTLY_BUILD=${{ matrix.nightly_build }}" >> $GITHUB_ENV echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV + echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile.jinja'}}" >> $GITHUB_ENV - name: Run tests job on Modal run: | modal run -m cicd.multigpu diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index af2ad73a4..3bf66b497 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -40,7 +40,7 @@ jobs: - name: Install dependencies run: | - pip3 install wheel packaging==23.2 + pip3 install wheel packaging==26.0 pip3 install --no-build-isolation -e . pip3 install -r requirements-dev.txt -r requirements-tests.txt diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml index 67b68a7e6..21446e548 100644 --- a/.github/workflows/tests-nightly.yml +++ b/.github/workflows/tests-nightly.yml @@ -48,7 +48,7 @@ jobs: - name: upgrade pip run: | pip3 install --upgrade pip - pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel + pip3 install --upgrade packaging==26.0 setuptools==75.8.0 wheel - name: Install PyTorch run: | diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 75c70a24a..bcbb76df3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -87,7 +87,7 @@ jobs: - name: upgrade pip run: | pip3 install --upgrade pip - pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel + pip3 install --upgrade packaging==26.0 setuptools==75.8.0 wheel - name: Install PyTorch run: | @@ -182,7 +182,7 @@ jobs: - name: upgrade pip run: | pip3 install --upgrade pip - pip3 install --upgrade packaging==23.2 setuptools==75.8.0 setuptools_scm build wheel psutil + pip3 install --upgrade packaging==26.0 setuptools==75.8.0 setuptools_scm build wheel psutil - name: Install PyTorch run: | @@ -269,7 +269,7 @@ jobs: python_version: "3.12" pytorch: 2.9.1 num_gpus: 1 - axolotl_extras: + axolotl_extras: vllm dockerfile: "Dockerfile-uv.jinja" steps: - name: Checkout diff --git a/README.md b/README.md index 0521f7bed..b56cdf0e8 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ Features: #### Using pip ```bash -pip3 install -U packaging==23.2 setuptools==75.8.0 wheel ninja +pip3 install -U packaging==26.0 setuptools==75.8.0 wheel ninja pip3 install --no-build-isolation axolotl[flash-attn,deepspeed] # Download example axolotl configs, deepspeed configs diff --git a/cicd/Dockerfile-uv.jinja b/cicd/Dockerfile-uv.jinja index 6a4d8a7d3..9a49cfca5 100644 --- a/cicd/Dockerfile-uv.jinja +++ b/cicd/Dockerfile-uv.jinja @@ -31,7 +31,7 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \ sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \ fi -RUN uv pip install packaging==23.2 setuptools==75.8.0 +RUN uv pip install packaging==26.0 setuptools==75.8.0 RUN uv pip install torchvision RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ uv pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \ diff --git a/cicd/Dockerfile.jinja b/cicd/Dockerfile.jinja index 81ed5453e..1c397b011 100644 --- a/cicd/Dockerfile.jinja +++ b/cicd/Dockerfile.jinja @@ -32,7 +32,7 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \ sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \ fi -RUN pip install packaging==23.2 setuptools==75.8.0 psutil +RUN pip install packaging==26.0 setuptools==75.8.0 psutil RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \ else \ diff --git a/cicd/multigpu.py b/cicd/multigpu.py index 5bd8d3c04..ed022c851 100644 --- a/cicd/multigpu.py +++ b/cicd/multigpu.py @@ -17,7 +17,8 @@ template_loader = jinja2.FileSystemLoader(searchpath=cicd_path) template_env = jinja2.Environment( loader=template_loader, autoescape=select_autoescape() ) -df_template = template_env.get_template("Dockerfile.jinja") +dockerfile = os.environ.get("E2E_DOCKERFILE", "Dockerfile.jinja") +df_template = template_env.get_template(dockerfile) df_args = { "AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""), @@ -27,8 +28,11 @@ df_args = { "CUDA": os.environ.get("CUDA", "126"), "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"), "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""), + "NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""), "CODECOV_TOKEN": os.environ.get("CODECOV_TOKEN", ""), "HF_HOME": "/workspace/data/huggingface-cache/hub", + "PYTHONUNBUFFERED": os.environ.get("PYTHONUNBUFFERED", "1"), + "DEEPSPEED_LOG_LEVEL": os.environ.get("DEEPSPEED_LOG_LEVEL", "WARNING"), } dockerfile_contents = df_template.render(**df_args) diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base index 96367207f..547c45f49 100644 --- a/docker/Dockerfile-base +++ b/docker/Dockerfile-base @@ -43,7 +43,7 @@ ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}" WORKDIR /workspace -RUN python3 -m pip install --upgrade pip && pip3 install -U packaging==23.2 setuptools==75.8.0 wheel psutil && \ +RUN python3 -m pip install --upgrade pip && pip3 install -U packaging==26.0 setuptools==75.8.0 wheel psutil && \ python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} torchvision --extra-index-url https://download.pytorch.org/whl/cu$CUDA && \ python3 -m pip cache purge diff --git a/docker/Dockerfile-base-nightly b/docker/Dockerfile-base-nightly index cc74e6bb9..98dc9e880 100644 --- a/docker/Dockerfile-base-nightly +++ b/docker/Dockerfile-base-nightly @@ -30,7 +30,7 @@ ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}" WORKDIR /workspace -RUN python3 -m pip install --upgrade pip && pip3 install -U packaging==23.2 setuptools==75.8.0 wheel && \ +RUN python3 -m pip install --upgrade pip && pip3 install -U packaging==26.0 setuptools==75.8.0 wheel && \ python3 -m pip install --no-cache-dir -U torch --extra-index-url https://download.pytorch.org/whl/nightly/cu$CUDA && \ python3 -m pip install --no-cache-dir "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" && \ python3 -m pip install --no-cache-dir "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main" && \ diff --git a/examples/apertus/README.md b/examples/apertus/README.md index 774286333..1cb4d413c 100644 --- a/examples/apertus/README.md +++ b/examples/apertus/README.md @@ -15,7 +15,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations git clone https://github.com/axolotl-ai-cloud/axolotl.git cd axolotl -pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja +pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja pip3 install --no-build-isolation -e '.[flash-attn]' # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy diff --git a/examples/arcee/README.md b/examples/arcee/README.md index 23f63663e..ad554532c 100644 --- a/examples/arcee/README.md +++ b/examples/arcee/README.md @@ -17,7 +17,7 @@ Thanks to the team at Arcee.ai for using Axolotl in supervised fine-tuning the A git clone https://github.com/axolotl-ai-cloud/axolotl.git cd axolotl -pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja +pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja pip3 install --no-build-isolation -e '.[flash-attn]' # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy diff --git a/examples/devstral/README.md b/examples/devstral/README.md index ae0860662..5a0145f10 100644 --- a/examples/devstral/README.md +++ b/examples/devstral/README.md @@ -16,7 +16,7 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r ```bash # Ensure you have Pytorch installed (Pytorch 2.6.0 min) -pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja +pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' ``` diff --git a/examples/gemma3n/README.md b/examples/gemma3n/README.md index ff3946c90..4808ed81b 100644 --- a/examples/gemma3n/README.md +++ b/examples/gemma3n/README.md @@ -10,7 +10,7 @@ Gemma-3n is a family of multimodal models from Google found on [HuggingFace](htt ```bash # Ensure you have Pytorch installed (Pytorch 2.6.0 min) -pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja +pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' ``` diff --git a/examples/gpt-oss/README.md b/examples/gpt-oss/README.md index 9ab02b122..8c407540e 100644 --- a/examples/gpt-oss/README.md +++ b/examples/gpt-oss/README.md @@ -14,7 +14,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations ```bash # Ensure you have Pytorch installed (Pytorch 2.6.0 min) -pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja +pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' ``` diff --git a/examples/granite4/README.md b/examples/granite4/README.md index d5efd3349..049539405 100644 --- a/examples/granite4/README.md +++ b/examples/granite4/README.md @@ -15,7 +15,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations git clone https://github.com/axolotl-ai-cloud/axolotl.git cd axolotl -pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja +pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja pip3 install --no-build-isolation -e '.[flash-attn]' # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy diff --git a/examples/hunyuan/README.md b/examples/hunyuan/README.md index 96c6bbcfa..59e9a28c7 100644 --- a/examples/hunyuan/README.md +++ b/examples/hunyuan/README.md @@ -13,7 +13,7 @@ Tencent released a family of opensource models called HunYuan with varying param git clone https://github.com/axolotl-ai-cloud/axolotl.git cd axolotl -pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja +pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja pip3 install --no-build-isolation -e '.[flash-attn]' # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy diff --git a/examples/magistral/README.md b/examples/magistral/README.md index 40a793f10..2e162df6b 100644 --- a/examples/magistral/README.md +++ b/examples/magistral/README.md @@ -14,7 +14,7 @@ Thanks to the team at MistralAI for giving us early access to prepare for these ```bash # Ensure you have Pytorch installed (Pytorch 2.7.0 min) -pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja +pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' ``` diff --git a/examples/qwen3-next/README.md b/examples/qwen3-next/README.md index 678175fd4..3c3a26a76 100644 --- a/examples/qwen3-next/README.md +++ b/examples/qwen3-next/README.md @@ -15,7 +15,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations git clone https://github.com/axolotl-ai-cloud/axolotl.git cd axolotl -pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja +pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja pip3 install --no-build-isolation -e '.[flash-attn]' # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy diff --git a/examples/voxtral/README.md b/examples/voxtral/README.md index b77691d72..2d3cad4e9 100644 --- a/examples/voxtral/README.md +++ b/examples/voxtral/README.md @@ -12,7 +12,7 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r ```bash # Ensure you have Pytorch installed (Pytorch 2.6.0 min) -pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja +pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' ``` diff --git a/pyproject.toml b/pyproject.toml index c5b7deb05..bca758576 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools>=64", "wheel", "setuptools_scm>=8", "packaging==23.2"] +requires = ["setuptools>=64", "wheel", "setuptools_scm>=8", "packaging==26.0"] build-backend = "setuptools.build_meta" [project] diff --git a/requirements.txt b/requirements.txt index 2b5ec0c38..2d5fa12fc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ xformers>=0.0.23.post1 liger-kernel==0.6.4 # END section -packaging==23.2 +packaging==26.0 huggingface_hub>=0.36.0 peft>=0.18.1 @@ -72,4 +72,4 @@ axolotl-contribs-mit==0.0.6 # telemetry posthog==6.7.11 -mistral-common==1.8.6 +mistral-common==1.8.8 diff --git a/setup.py b/setup.py index 101c5b8c4..00a8486e2 100644 --- a/setup.py +++ b/setup.py @@ -78,6 +78,11 @@ def parse_requirements(extras_require_map): extras_require_map["vllm"] = ["vllm==0.11.1"] if not install_xformers: _install_requires.pop(_install_requires.index(xformers_version)) + extras_require_map["vllm"] = ["vllm==0.13.0"] + if patch == 0: + extras_require_map["vllm"] = ["vllm==0.13.0"] + else: + extras_require_map["vllm"] = ["vllm==0.14.0"] elif (major, minor) >= (2, 8): extras_require_map.pop("fbgemm-gpu") extras_require_map["fbgemm-gpu"] = ["fbgemm-gpu-genai==1.3.0"]