feat: move to uv first (#3545)

* feat: move to uv first * fix: update doc to uv first * fix: merge dev/tests into uv pyproject * fix: update docker docs to match current config * fix: migrate examples to readme * fix: add llmcompressor to conflict * feat: rec uv sync with lockfile for dev/ci * fix: update docker docs to clarify how to use uv images * chore: docs * fix: use system python, no venv * fix: set backend cpu * fix: only set for installing pytorch step * fix: remove unsloth kernel and installs * fix: remove U in tests * fix: set backend in deps too * chore: test * chore: comments * fix: attempt to lock torch * fix: workaround torch cuda and not upgraded * fix: forgot to push * fix: missed source * fix: nightly upstream loralinear config * fix: nightly phi3 long rope not work * fix: forgot commit * fix: test phi3 template change * fix: no more requirements * fix: carry over changes from new requirements to pyproject * chore: remove lockfile per discussion * fix: set match-runtime * fix: remove unneeded hf hub buildtime * fix: duplicate cache delete on nightly * fix: torchvision being overridden * fix: migrate to uv images * fix: leftover from merge * fix: simplify base readme * fix: update assertion message to be clearer * chore: docs * fix: change fallback for cicd script * fix: match against main exactly * fix: peft 0.19.1 change * fix: e2e test * fix: ci * fix: e2e test
2026-04-21 21:16:03 +07:00
parent 323da791eb
commit 9de5b76336
58 changed files with 496 additions and 1520 deletions
--- a/.github/workflows/multi-gpu-e2e.yml
+++ b/.github/workflows/multi-gpu-e2e.yml
@@ -3,17 +3,15 @@ name: docker-multigpu-tests-biweekly
 on:
  pull_request:
    paths:
-      - 'tests/e2e/multigpu/**.py'
-      - 'requirements.txt'
-      - 'setup.py'
-      - 'pyproject.toml'
-      - '.github/workflows/multi-gpu-e2e.yml'
-      - 'scripts/cutcrossentropy_install.py'
-      - 'src/axolotl/core/trainers/mixins/sequence_parallel.py'
-      - 'src/axolotl/utils/distributed.py'
+      - "tests/e2e/multigpu/**.py"
+      - "pyproject.toml"
+      - ".github/workflows/multi-gpu-e2e.yml"
+      - "scripts/cutcrossentropy_install.py"
+      - "src/axolotl/core/trainers/mixins/sequence_parallel.py"
+      - "src/axolotl/utils/distributed.py"
  workflow_dispatch:
  schedule:
-    - cron: '0 0 * * 1,4'  # Runs at 00:00 UTC every monday & thursday
+    - cron: "0 0 * * 1,4" # Runs at 00:00 UTC every monday & thursday

 # Cancel jobs on the same ref if a new one is triggered
 concurrency:
@@ -33,19 +31,19 @@ jobs:
      fail-fast: false
      matrix:
        include:
-#          - cuda: 129
-#            cuda_version: 12.9.1
-#            python_version: "3.12"
-#            pytorch: 2.9.1
-#            axolotl_extras: "fbgemm-gpu"
-#            num_gpus: 2
-#            dockerfile: "Dockerfile-uv.jinja"
+          #          - cuda: 129
+          #            cuda_version: 12.9.1
+          #            python_version: "3.12"
+          #            pytorch: 2.9.1
+          #            axolotl_extras: "fbgemm-gpu"
+          #            num_gpus: 2
+          #            dockerfile: "Dockerfile-uv.jinja"
          - cuda: 130
            cuda_version: 13.0.0
            python_version: "3.11"
            pytorch: 2.9.1
            axolotl_extras:
-#            axolotl_extras: fbgemm-gpu
+            #            axolotl_extras: fbgemm-gpu
            num_gpus: 2
          - cuda: 128
            cuda_version: 12.8.1
@@ -53,7 +51,6 @@ jobs:
            pytorch: 2.10.0
            axolotl_extras: "fbgemm-gpu"
            num_gpus: 2
-            dockerfile: "Dockerfile-uv.jinja"
    runs-on: [self-hosted, modal]
    timeout-minutes: 120
    steps:
@@ -75,7 +72,7 @@ jobs:
          echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
          echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
          echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
-          echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile.jinja'}}" >> $GITHUB_ENV
+          echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile-uv.jinja'}}" >> $GITHUB_ENV
      - name: Run tests job on Modal
        env:
          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}