feat: move to uv first (#3545)

* feat: move to uv first

* fix: update doc to uv first

* fix: merge dev/tests into uv pyproject

* fix: update docker docs to match current config

* fix: migrate examples to readme

* fix: add llmcompressor to conflict

* feat: rec uv sync with lockfile for dev/ci

* fix: update docker docs to clarify how to use uv images

* chore: docs

* fix: use system python, no venv

* fix: set backend cpu

* fix: only set for installing pytorch step

* fix: remove unsloth kernel and installs

* fix: remove U in tests

* fix: set backend in deps too

* chore: test

* chore: comments

* fix: attempt to lock torch

* fix: workaround torch cuda and not upgraded

* fix: forgot to push

* fix: missed source

* fix: nightly upstream loralinear config

* fix: nightly phi3 long rope not work

* fix: forgot commit

* fix: test phi3 template change

* fix: no more requirements

* fix: carry over changes from new requirements to pyproject

* chore: remove lockfile per discussion

* fix: set match-runtime

* fix: remove unneeded hf hub buildtime

* fix: duplicate cache delete on nightly

* fix: torchvision being overridden

* fix: migrate to uv images

* fix: leftover from merge

* fix: simplify base readme

* fix: update assertion message to be clearer

* chore: docs

* fix: change fallback for cicd script

* fix: match against main exactly

* fix: peft 0.19.1 change

* fix: e2e test

* fix: ci

* fix: e2e test
This commit is contained in:
NanoCode012
2026-04-21 21:16:03 +07:00
committed by GitHub
parent 323da791eb
commit 9de5b76336
58 changed files with 496 additions and 1520 deletions

View File

@@ -3,17 +3,15 @@ name: docker-multigpu-tests-biweekly
on:
pull_request:
paths:
- 'tests/e2e/multigpu/**.py'
- 'requirements.txt'
- 'setup.py'
- 'pyproject.toml'
- '.github/workflows/multi-gpu-e2e.yml'
- 'scripts/cutcrossentropy_install.py'
- 'src/axolotl/core/trainers/mixins/sequence_parallel.py'
- 'src/axolotl/utils/distributed.py'
- "tests/e2e/multigpu/**.py"
- "pyproject.toml"
- ".github/workflows/multi-gpu-e2e.yml"
- "scripts/cutcrossentropy_install.py"
- "src/axolotl/core/trainers/mixins/sequence_parallel.py"
- "src/axolotl/utils/distributed.py"
workflow_dispatch:
schedule:
- cron: '0 0 * * 1,4' # Runs at 00:00 UTC every monday & thursday
- cron: "0 0 * * 1,4" # Runs at 00:00 UTC every monday & thursday
# Cancel jobs on the same ref if a new one is triggered
concurrency:
@@ -33,19 +31,19 @@ jobs:
fail-fast: false
matrix:
include:
# - cuda: 129
# cuda_version: 12.9.1
# python_version: "3.12"
# pytorch: 2.9.1
# axolotl_extras: "fbgemm-gpu"
# num_gpus: 2
# dockerfile: "Dockerfile-uv.jinja"
# - cuda: 129
# cuda_version: 12.9.1
# python_version: "3.12"
# pytorch: 2.9.1
# axolotl_extras: "fbgemm-gpu"
# num_gpus: 2
# dockerfile: "Dockerfile-uv.jinja"
- cuda: 130
cuda_version: 13.0.0
python_version: "3.11"
pytorch: 2.9.1
axolotl_extras:
# axolotl_extras: fbgemm-gpu
# axolotl_extras: fbgemm-gpu
num_gpus: 2
- cuda: 128
cuda_version: 12.8.1
@@ -53,7 +51,6 @@ jobs:
pytorch: 2.10.0
axolotl_extras: "fbgemm-gpu"
num_gpus: 2
dockerfile: "Dockerfile-uv.jinja"
runs-on: [self-hosted, modal]
timeout-minutes: 120
steps:
@@ -75,7 +72,7 @@ jobs:
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile.jinja'}}" >> $GITHUB_ENV
echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile-uv.jinja'}}" >> $GITHUB_ENV
- name: Run tests job on Modal
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}