From a531e9d946b83f04f56ac46442b5d7b3b3f8cdc0 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Wed, 21 Jan 2026 20:00:18 -0500
Subject: [PATCH] upgrade vllm to v0.14.0 (#3345)

---
 .github/workflows/main.yml          |  2 +-
 .github/workflows/multi-gpu-e2e.yml | 14 +++++++++-----
 .github/workflows/pypi.yml          |  2 +-
 .github/workflows/tests-nightly.yml |  2 +-
 .github/workflows/tests.yml         |  6 +++---
 README.md                           |  2 +-
 cicd/Dockerfile-uv.jinja            |  2 +-
 cicd/Dockerfile.jinja               |  2 +-
 cicd/multigpu.py                    |  6 +++++-
 docker/Dockerfile-base              |  2 +-
 docker/Dockerfile-base-nightly      |  2 +-
 examples/apertus/README.md          |  2 +-
 examples/arcee/README.md            |  2 +-
 examples/devstral/README.md         |  2 +-
 examples/gemma3n/README.md          |  2 +-
 examples/gpt-oss/README.md          |  2 +-
 examples/granite4/README.md         |  2 +-
 examples/hunyuan/README.md          |  2 +-
 examples/magistral/README.md        |  2 +-
 examples/qwen3-next/README.md       |  2 +-
 examples/voxtral/README.md          |  2 +-
 pyproject.toml                      |  2 +-
 requirements.txt                    |  4 ++--
 setup.py                            |  5 +++++
 24 files changed, 43 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index e081f2127..0e1ccb89a 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -38,7 +38,7 @@ jobs:
             cuda_version: 12.9.1
             python_version: "3.12"
             pytorch: 2.9.1
-            axolotl_extras:
+            axolotl_extras: vllm
             platforms: "linux/amd64,linux/arm64"
           - cuda: 130
             cuda_version: 13.0.0
diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml
index 833dc4f29..107572ad6 100644
--- a/.github/workflows/multi-gpu-e2e.yml
+++ b/.github/workflows/multi-gpu-e2e.yml
@@ -35,14 +35,19 @@ jobs:
             pytorch: 2.8.0
             axolotl_extras: fbgemm-gpu
             num_gpus: 2
-            nightly_build: "true"
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
             pytorch: 2.9.1
-            axolotl_extras: fbgemm-gpu
+            axolotl_extras: "fbgemm-gpu"
             num_gpus: 2
-            nightly_build: "true"
+          - cuda: 129
+            cuda_version: 12.9.1
+            python_version: "3.12"
+            pytorch: 2.9.1
+            axolotl_extras: "fbgemm-gpu,vllm"
+            num_gpus: 2
+            dockerfile: "Dockerfile-uv.jinja"
           - cuda: 130
             cuda_version: 13.0.0
             python_version: "3.11"
@@ -50,7 +55,6 @@ jobs:
             axolotl_extras:
 #            axolotl_extras: fbgemm-gpu
             num_gpus: 2
-            nightly_build: "true"
     runs-on: [self-hosted, modal]
     timeout-minutes: 120
     steps:
@@ -72,8 +76,8 @@ jobs:
           echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
           echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
           echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
-          echo "NIGHTLY_BUILD=${{ matrix.nightly_build }}" >> $GITHUB_ENV
           echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
+          echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile.jinja'}}" >> $GITHUB_ENV
       - name: Run tests job on Modal
         run: |
           modal run -m cicd.multigpu
diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
index af2ad73a4..3bf66b497 100644
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@@ -40,7 +40,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip3 install wheel packaging==23.2
+          pip3 install wheel packaging==26.0
           pip3 install --no-build-isolation -e .
           pip3 install -r requirements-dev.txt -r requirements-tests.txt
 
diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml
index 67b68a7e6..21446e548 100644
--- a/.github/workflows/tests-nightly.yml
+++ b/.github/workflows/tests-nightly.yml
@@ -48,7 +48,7 @@ jobs:
       - name: upgrade pip
         run: |
           pip3 install --upgrade pip
-          pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel
+          pip3 install --upgrade packaging==26.0 setuptools==75.8.0 wheel
 
       - name: Install PyTorch
         run: |
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 75c70a24a..bcbb76df3 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -87,7 +87,7 @@ jobs:
       - name: upgrade pip
         run: |
           pip3 install --upgrade pip
-          pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel
+          pip3 install --upgrade packaging==26.0 setuptools==75.8.0 wheel
 
       - name: Install PyTorch
         run: |
@@ -182,7 +182,7 @@ jobs:
       - name: upgrade pip
         run: |
           pip3 install --upgrade pip
-          pip3 install --upgrade packaging==23.2 setuptools==75.8.0 setuptools_scm build wheel psutil
+          pip3 install --upgrade packaging==26.0 setuptools==75.8.0 setuptools_scm build wheel psutil
 
       - name: Install PyTorch
         run: |
@@ -269,7 +269,7 @@ jobs:
             python_version: "3.12"
             pytorch: 2.9.1
             num_gpus: 1
-            axolotl_extras:
+            axolotl_extras: vllm
             dockerfile: "Dockerfile-uv.jinja"
     steps:
       - name: Checkout
diff --git a/README.md b/README.md
index 0521f7bed..b56cdf0e8 100644
--- a/README.md
+++ b/README.md
@@ -88,7 +88,7 @@ Features:
 #### Using pip
 
 ```bash
-pip3 install -U packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install -U packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation axolotl[flash-attn,deepspeed]
 
 # Download example axolotl configs, deepspeed configs
diff --git a/cicd/Dockerfile-uv.jinja b/cicd/Dockerfile-uv.jinja
index 6a4d8a7d3..9a49cfca5 100644
--- a/cicd/Dockerfile-uv.jinja
+++ b/cicd/Dockerfile-uv.jinja
@@ -31,7 +31,7 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
         sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \
     fi
 
-RUN uv pip install packaging==23.2 setuptools==75.8.0
+RUN uv pip install packaging==26.0 setuptools==75.8.0
 RUN uv pip install torchvision
 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
         uv pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
diff --git a/cicd/Dockerfile.jinja b/cicd/Dockerfile.jinja
index 81ed5453e..1c397b011 100644
--- a/cicd/Dockerfile.jinja
+++ b/cicd/Dockerfile.jinja
@@ -32,7 +32,7 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
         sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \
     fi
 
-RUN pip install packaging==23.2 setuptools==75.8.0 psutil
+RUN pip install packaging==26.0 setuptools==75.8.0 psutil
 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
         pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
     else \
diff --git a/cicd/multigpu.py b/cicd/multigpu.py
index 5bd8d3c04..ed022c851 100644
--- a/cicd/multigpu.py
+++ b/cicd/multigpu.py
@@ -17,7 +17,8 @@ template_loader = jinja2.FileSystemLoader(searchpath=cicd_path)
 template_env = jinja2.Environment(
     loader=template_loader, autoescape=select_autoescape()
 )
-df_template = template_env.get_template("Dockerfile.jinja")
+dockerfile = os.environ.get("E2E_DOCKERFILE", "Dockerfile.jinja")
+df_template = template_env.get_template(dockerfile)
 
 df_args = {
     "AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
@@ -27,8 +28,11 @@ df_args = {
     "CUDA": os.environ.get("CUDA", "126"),
     "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
     "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
+    "NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""),
     "CODECOV_TOKEN": os.environ.get("CODECOV_TOKEN", ""),
     "HF_HOME": "/workspace/data/huggingface-cache/hub",
+    "PYTHONUNBUFFERED": os.environ.get("PYTHONUNBUFFERED", "1"),
+    "DEEPSPEED_LOG_LEVEL": os.environ.get("DEEPSPEED_LOG_LEVEL", "WARNING"),
 }
 
 dockerfile_contents = df_template.render(**df_args)
diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base
index 96367207f..547c45f49 100644
--- a/docker/Dockerfile-base
+++ b/docker/Dockerfile-base
@@ -43,7 +43,7 @@ ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}"
 
 WORKDIR /workspace
 
-RUN python3 -m pip install --upgrade pip && pip3 install -U packaging==23.2 setuptools==75.8.0 wheel psutil && \
+RUN python3 -m pip install --upgrade pip && pip3 install -U packaging==26.0 setuptools==75.8.0 wheel psutil && \
     python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} torchvision --extra-index-url https://download.pytorch.org/whl/cu$CUDA && \
     python3 -m pip cache purge
 
diff --git a/docker/Dockerfile-base-nightly b/docker/Dockerfile-base-nightly
index cc74e6bb9..98dc9e880 100644
--- a/docker/Dockerfile-base-nightly
+++ b/docker/Dockerfile-base-nightly
@@ -30,7 +30,7 @@ ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}"
 
 WORKDIR /workspace
 
-RUN python3 -m pip install --upgrade pip && pip3 install -U packaging==23.2 setuptools==75.8.0 wheel && \
+RUN python3 -m pip install --upgrade pip && pip3 install -U packaging==26.0 setuptools==75.8.0 wheel && \
     python3 -m pip install --no-cache-dir -U torch --extra-index-url https://download.pytorch.org/whl/nightly/cu$CUDA && \
     python3 -m pip install --no-cache-dir "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" && \
     python3 -m pip install --no-cache-dir "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main" && \
diff --git a/examples/apertus/README.md b/examples/apertus/README.md
index 774286333..1cb4d413c 100644
--- a/examples/apertus/README.md
+++ b/examples/apertus/README.md
@@ -15,7 +15,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
 git clone https://github.com/axolotl-ai-cloud/axolotl.git
 cd axolotl
 
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation -e '.[flash-attn]'
 
 # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
diff --git a/examples/arcee/README.md b/examples/arcee/README.md
index 23f63663e..ad554532c 100644
--- a/examples/arcee/README.md
+++ b/examples/arcee/README.md
@@ -17,7 +17,7 @@ Thanks to the team at Arcee.ai for using Axolotl in supervised fine-tuning the A
 git clone https://github.com/axolotl-ai-cloud/axolotl.git
 cd axolotl
 
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation -e '.[flash-attn]'
 
 # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
diff --git a/examples/devstral/README.md b/examples/devstral/README.md
index ae0860662..5a0145f10 100644
--- a/examples/devstral/README.md
+++ b/examples/devstral/README.md
@@ -16,7 +16,7 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r
 
 ```bash
 # Ensure you have Pytorch installed (Pytorch 2.6.0 min)
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
 ```
 
diff --git a/examples/gemma3n/README.md b/examples/gemma3n/README.md
index ff3946c90..4808ed81b 100644
--- a/examples/gemma3n/README.md
+++ b/examples/gemma3n/README.md
@@ -10,7 +10,7 @@ Gemma-3n is a family of multimodal models from Google found on [HuggingFace](htt
 
 ```bash
 # Ensure you have Pytorch installed (Pytorch 2.6.0 min)
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
 ```
 
diff --git a/examples/gpt-oss/README.md b/examples/gpt-oss/README.md
index 9ab02b122..8c407540e 100644
--- a/examples/gpt-oss/README.md
+++ b/examples/gpt-oss/README.md
@@ -14,7 +14,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
 
 ```bash
 # Ensure you have Pytorch installed (Pytorch 2.6.0 min)
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
 ```
 
diff --git a/examples/granite4/README.md b/examples/granite4/README.md
index d5efd3349..049539405 100644
--- a/examples/granite4/README.md
+++ b/examples/granite4/README.md
@@ -15,7 +15,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
 git clone https://github.com/axolotl-ai-cloud/axolotl.git
 cd axolotl
 
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation -e '.[flash-attn]'
 
 # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
diff --git a/examples/hunyuan/README.md b/examples/hunyuan/README.md
index 96c6bbcfa..59e9a28c7 100644
--- a/examples/hunyuan/README.md
+++ b/examples/hunyuan/README.md
@@ -13,7 +13,7 @@ Tencent released a family of opensource models called HunYuan with varying param
 git clone https://github.com/axolotl-ai-cloud/axolotl.git
 cd axolotl
 
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation -e '.[flash-attn]'
 
 # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
diff --git a/examples/magistral/README.md b/examples/magistral/README.md
index 40a793f10..2e162df6b 100644
--- a/examples/magistral/README.md
+++ b/examples/magistral/README.md
@@ -14,7 +14,7 @@ Thanks to the team at MistralAI for giving us early access to prepare for these
 
 ```bash
 # Ensure you have Pytorch installed (Pytorch 2.7.0 min)
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
 ```
 
diff --git a/examples/qwen3-next/README.md b/examples/qwen3-next/README.md
index 678175fd4..3c3a26a76 100644
--- a/examples/qwen3-next/README.md
+++ b/examples/qwen3-next/README.md
@@ -15,7 +15,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
 git clone https://github.com/axolotl-ai-cloud/axolotl.git
 cd axolotl
 
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation -e '.[flash-attn]'
 
 # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
diff --git a/examples/voxtral/README.md b/examples/voxtral/README.md
index b77691d72..2d3cad4e9 100644
--- a/examples/voxtral/README.md
+++ b/examples/voxtral/README.md
@@ -12,7 +12,7 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r
 
 ```bash
 # Ensure you have Pytorch installed (Pytorch 2.6.0 min)
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
 ```
 
diff --git a/pyproject.toml b/pyproject.toml
index c5b7deb05..bca758576 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools>=64", "wheel", "setuptools_scm>=8", "packaging==23.2"]
+requires = ["setuptools>=64", "wheel", "setuptools_scm>=8", "packaging==26.0"]
 build-backend = "setuptools.build_meta"
 
 [project]
diff --git a/requirements.txt b/requirements.txt
index 2b5ec0c38..2d5fa12fc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,7 +8,7 @@ xformers>=0.0.23.post1
 liger-kernel==0.6.4
 # END section
 
-packaging==23.2
+packaging==26.0
 
 huggingface_hub>=0.36.0
 peft>=0.18.1
@@ -72,4 +72,4 @@ axolotl-contribs-mit==0.0.6
 # telemetry
 posthog==6.7.11
 
-mistral-common==1.8.6
+mistral-common==1.8.8
diff --git a/setup.py b/setup.py
index 101c5b8c4..00a8486e2 100644
--- a/setup.py
+++ b/setup.py
@@ -78,6 +78,11 @@ def parse_requirements(extras_require_map):
                 extras_require_map["vllm"] = ["vllm==0.11.1"]
                 if not install_xformers:
                     _install_requires.pop(_install_requires.index(xformers_version))
+                extras_require_map["vllm"] = ["vllm==0.13.0"]
+                if patch == 0:
+                    extras_require_map["vllm"] = ["vllm==0.13.0"]
+                else:
+                    extras_require_map["vllm"] = ["vllm==0.14.0"]
             elif (major, minor) >= (2, 8):
                 extras_require_map.pop("fbgemm-gpu")
                 extras_require_map["fbgemm-gpu"] = ["fbgemm-gpu-genai==1.3.0"]