diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml index 1dd019dc7..745e177bb 100644 --- a/.github/workflows/multi-gpu-e2e.yml +++ b/.github/workflows/multi-gpu-e2e.yml @@ -43,6 +43,13 @@ jobs: axolotl_extras: fbgemm-gpu num_gpus: 2 nightly_build: "true" + - cuda: 130 + cuda_version: 13.0.0 + python_version: "3.11" + pytorch: 2.9.1 + axolotl_extras: fbgemm-gpu + num_gpus: 2 + nightly_build: "true" runs-on: [self-hosted, modal] timeout-minutes: 120 steps: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ae5ba1740..10c0e9bf1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -316,6 +316,12 @@ jobs: pytorch: 2.9.1 num_gpus: 1 axolotl_extras: + - cuda: 130 + cuda_version: 13.0.0 + python_version: "3.11" + pytorch: 2.9.1 + num_gpus: 1 + axolotl_extras: steps: - name: Checkout uses: actions/checkout@v4 diff --git a/docker/Dockerfile-uv-base b/docker/Dockerfile-uv-base index 0b4dfc33f..1b54c05e6 100644 --- a/docker/Dockerfile-uv-base +++ b/docker/Dockerfile-uv-base @@ -39,11 +39,11 @@ RUN case "$PYTORCH_VERSION" in \ 2.9.[0-9]*) \ if [ "$CUDA" = "128" ]; then \ wget -nv https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.5.4/flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \ - uv pip3 install --no-cache-dir flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \ + uv pip install --no-cache-dir flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \ rm flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \ elif [ "$CUDA" = "130" ]; then \ wget -nv https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.5.4/flash_attn-2.8.3+cu130torch2.9-cp311-cp311-linux_x86_64.whl; \ - uv pip3 install --no-cache-dir flash_attn-2.8.3+cu130torch2.9-cp311-cp311-linux_x86_64.whl; \ + uv pip install --no-cache-dir flash_attn-2.8.3+cu130torch2.9-cp311-cp311-linux_x86_64.whl; \ rm flash_attn-2.8.3+cu130torch2.9-cp311-cp311-linux_x86_64.whl; \ fi \ ;; \