From 80f7088ad18820f896a72eb4e4e40069b066b7d5 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Fri, 6 Mar 2026 14:59:25 -0500 Subject: [PATCH] update setuptools so trl can be installed from main for nightlies (#3471) * update setuptools so trl can be installed from main for nightlies * run the nightly in the PR CI on change * use range request, don't use cu129 in CI since it's not supported with AO * run multigpu ci if CCE install script changes --- .github/workflows/multi-gpu-e2e.yml | 15 ++++++++------- .github/workflows/tests-nightly.yml | 9 ++++++--- .github/workflows/tests.yml | 2 +- cicd/Dockerfile-uv.jinja | 2 +- cicd/Dockerfile.jinja | 2 +- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml index c1e5c5d75..6063c24c7 100644 --- a/.github/workflows/multi-gpu-e2e.yml +++ b/.github/workflows/multi-gpu-e2e.yml @@ -8,6 +8,7 @@ on: - 'setup.py' - 'pyproject.toml' - '.github/workflows/multi-gpu-e2e.yml' + - 'scripts/cutcrossentropy_install.py' - 'src/axolotl/core/trainers/mixins/sequence_parallel.py' - 'src/axolotl/utils/distributed.py' workflow_dispatch: @@ -35,13 +36,13 @@ jobs: pytorch: 2.8.0 axolotl_extras: fbgemm-gpu num_gpus: 2 - - cuda: 129 - cuda_version: 12.9.1 - python_version: "3.12" - pytorch: 2.9.1 - axolotl_extras: "fbgemm-gpu" - num_gpus: 2 - dockerfile: "Dockerfile-uv.jinja" +# - cuda: 129 +# cuda_version: 12.9.1 +# python_version: "3.12" +# pytorch: 2.9.1 +# axolotl_extras: "fbgemm-gpu" +# num_gpus: 2 +# dockerfile: "Dockerfile-uv.jinja" - cuda: 130 cuda_version: 13.0.0 python_version: "3.11" diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml index 45596a2e1..d5a533fbc 100644 --- a/.github/workflows/tests-nightly.yml +++ b/.github/workflows/tests-nightly.yml @@ -3,6 +3,10 @@ on: workflow_dispatch: schedule: - cron: '0 0 * * *' # Runs at 00:00 UTC every day + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths: + - '.github/workflows/tests-nightly.yml' jobs: pre-commit: @@ -27,7 +31,7 @@ jobs: - name: Restore Cache from S3 id: hf-cache-restore-s3 run: | - curl -L https://axolotl-ci.b-cdn.net/hf-cache.tar.zst > /dev/null + curl -v -H "Range: bytes=0-1023" -L https://axolotl-ci.b-cdn.net/hf-cache.tar.zst > /dev/null pytest: name: PyTest @@ -35,7 +39,6 @@ jobs: needs: [prime-cdn-s3-cache] strategy: fail-fast: false - max-parallel: 2 matrix: python_version: ["3.12"] # TODO include py3.14 once https://github.com/mistralai/mistral-common/pull/194 is merged pytorch_version: ["2.8.0", "2.9.1", "2.10.0"] @@ -60,7 +63,7 @@ jobs: - name: upgrade pip run: | pip3 install --upgrade pip - pip3 install --upgrade packaging==26.0 setuptools==75.8.0 wheel + pip3 install --upgrade packaging==26.0 setuptools==78.1.1 wheel - name: Install PyTorch run: | diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f8c9a37bb..23e9d39e3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -55,7 +55,7 @@ jobs: - name: Restore Cache from S3 id: hf-cache-restore-s3 run: | - curl -L https://axolotl-ci.b-cdn.net/hf-cache.tar.zst > /dev/null + curl -v -H "Range: bytes=0-1023" -L https://axolotl-ci.b-cdn.net/hf-cache.tar.zst > /dev/null pytest: name: PyTest diff --git a/cicd/Dockerfile-uv.jinja b/cicd/Dockerfile-uv.jinja index 103f1eb99..29c2e79d5 100644 --- a/cicd/Dockerfile-uv.jinja +++ b/cicd/Dockerfile-uv.jinja @@ -31,7 +31,7 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \ sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \ fi -RUN uv pip install packaging==26.0 setuptools==75.8.0 +RUN uv pip install packaging==26.0 setuptools==78.1.1 RUN uv pip install torchvision RUN uv pip uninstall causal_conv1d RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ diff --git a/cicd/Dockerfile.jinja b/cicd/Dockerfile.jinja index 13d2f4e69..4f0140fc6 100644 --- a/cicd/Dockerfile.jinja +++ b/cicd/Dockerfile.jinja @@ -32,7 +32,7 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \ sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \ fi -RUN pip install packaging==26.0 setuptools==75.8.0 psutil +RUN pip install packaging==26.0 setuptools==78.1.1 psutil RUN pip uninstall -y causal_conv1d RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \