* Prepare for transformers v5 upgrade * fix hf cli * update for hf hub changes * fix tokenizer apply_chat_template args * remap include_tokens_per_second * fix tps * handle migration for warmup * use latest hf hub * Fix scan -> ls * fix import * fix for renaming of mistral common tokenizer -> backend * update for fixed tokenziation for llama * Skip phi35 tests for now * remove mistral patch fixed upstream in huggingface/transformers#41439 * use namespacing for patch * don't rely on sdist for e2e tests for now * run modal ci without waiting too * Fix dep for ci * fix imports * Fix fp8 check * fsdp2 fixes * fix version handling * update fsdp version tests for new v5 behavior * Fail multigpu tests after 3 failures * skip known v5 broken tests for now and cleanup * bump deps * unmark skipped test * re-enable test_fsdp_qlora_prequant_packed test * increase multigpu ci timeout * skip broken gemma3 test * reduce timout back to original 120min now that the hanging test is skipped * fix for un-necessary collator for pretraining with bsz=1 * fix: safe_serialization deprecated in transformers v5 rc01 (#3318) * torch_dtype deprecated * load model in float32 for consistency with tests * revert some test fixtures back * use hf cache ls instead of scan * don't strip fsdp_version more fdsp_Version fixes for v5 fix version in fsdp_config fix aliasing fix fsdp_version check check fsdp_version is 2 in both places * Transformers v5 rc2 (#3347) * bump dep * use latest fbgemm, grab model config as part of fixture, un-skip test * import AutoConfig * don't need more problematic autoconfig when specifying config.json manually * add fixtures for argilla ultrafeedback datasets * download phi4-reasoning * fix arg * update tests for phi fast tokenizer changes * use explicit model types for gemma3 --------- Co-authored-by: Wing Lian <wing@axolotl.ai> * fix: AutoModelForVision2Seq -> AutoModelForImageTextToText * chore: remove duplicate * fix: attempt fix gemma3 text mode * chore: lint * ga release of v5 * need property setter for name_or_path for mistral tokenizer * vllm not compatible with transformers v5 * setter for chat_template w mistral too --------- Co-authored-by: NanoCode012 <nano@axolotl.ai> Co-authored-by: salman <salman.mohammadi@outlook.com>
215 lines
7.9 KiB
YAML
215 lines
7.9 KiB
YAML
name: ci-cd
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- "main"
|
|
tags:
|
|
- "v*"
|
|
workflow_dispatch:
|
|
|
|
jobs:
|
|
build-axolotl:
|
|
if: ${{ ! contains(github.event.commits[0].message, '[skip docker]') && github.repository_owner == 'axolotl-ai-cloud' }}
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
include:
|
|
- cuda: 128
|
|
cuda_version: 12.8.1
|
|
python_version: "3.11"
|
|
pytorch: 2.8.0
|
|
axolotl_extras:
|
|
platforms: "linux/amd64"
|
|
- cuda: 128
|
|
cuda_version: 12.8.1
|
|
python_version: "3.11"
|
|
pytorch: 2.9.0
|
|
axolotl_extras:
|
|
platforms: "linux/amd64,linux/arm64"
|
|
- cuda: 128
|
|
cuda_version: 12.8.1
|
|
python_version: "3.11"
|
|
pytorch: 2.9.1
|
|
axolotl_extras:
|
|
platforms: "linux/amd64,linux/arm64"
|
|
is_latest: true
|
|
- cuda: 129
|
|
cuda_version: 12.9.1
|
|
python_version: "3.12"
|
|
pytorch: 2.9.1
|
|
axolotl_extras:
|
|
platforms: "linux/amd64,linux/arm64"
|
|
- cuda: 130
|
|
cuda_version: 13.0.0
|
|
python_version: "3.11"
|
|
pytorch: 2.9.1
|
|
axolotl_extras:
|
|
platforms: "linux/amd64,linux/arm64"
|
|
runs-on: axolotl-gpu-runner
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
- name: Docker metadata
|
|
id: metadata
|
|
uses: docker/metadata-action@v5
|
|
with:
|
|
images: |
|
|
axolotlai/axolotl
|
|
tags: |
|
|
type=ref,event=branch
|
|
type=pep440,pattern={{version}}
|
|
- name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3
|
|
- name: Login to Docker Hub
|
|
uses: docker/login-action@v3
|
|
with:
|
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
|
# guidance for testing before pushing: https://docs.docker.com/build/ci/github-actions/test-before-push/
|
|
- name: Build and export to Docker
|
|
uses: docker/build-push-action@v5
|
|
with:
|
|
context: .
|
|
platforms: ${{ matrix.platforms }}
|
|
build-args: |
|
|
BASE_TAG=${{ github.ref_type == 'tag' && 'main' || github.ref_name }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}
|
|
CUDA=${{ matrix.cuda }}
|
|
PYTORCH_VERSION=${{ matrix.pytorch }}
|
|
AXOLOTL_ARGS=${{ matrix.axolotl_args }}
|
|
AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}
|
|
file: ./docker/Dockerfile
|
|
push: ${{ github.event_name != 'pull_request' }}
|
|
tags: |
|
|
${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
|
${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}
|
|
${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }}
|
|
labels: ${{ steps.metadata.outputs.labels }}
|
|
|
|
build-axolotl-cloud:
|
|
needs: build-axolotl
|
|
if: ${{ ! contains(github.event.commits[0].message, '[skip docker]') && github.repository_owner == 'axolotl-ai-cloud' }}
|
|
# this job needs to be run on self-hosted GPU runners...
|
|
strategy:
|
|
matrix:
|
|
include:
|
|
- cuda: 128
|
|
cuda_version: 12.8.1
|
|
python_version: "3.11"
|
|
pytorch: 2.8.0
|
|
axolotl_extras:
|
|
platforms: "linux/amd64"
|
|
- cuda: 128
|
|
cuda_version: 12.8.1
|
|
python_version: "3.11"
|
|
pytorch: 2.9.0
|
|
axolotl_extras:
|
|
platforms: "linux/amd64,linux/arm64"
|
|
- cuda: 128
|
|
cuda_version: 12.8.1
|
|
python_version: "3.11"
|
|
pytorch: 2.9.1
|
|
axolotl_extras:
|
|
is_latest: true
|
|
platforms: "linux/amd64,linux/arm64"
|
|
- cuda: 129
|
|
cuda_version: 12.9.1
|
|
python_version: "3.12"
|
|
pytorch: 2.9.1
|
|
axolotl_extras:
|
|
platforms: "linux/amd64,linux/arm64"
|
|
- cuda: 130
|
|
cuda_version: 13.0.0
|
|
python_version: "3.11"
|
|
pytorch: 2.9.1
|
|
axolotl_extras:
|
|
platforms: "linux/amd64,linux/arm64"
|
|
runs-on: axolotl-gpu-runner
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
- name: Docker metadata
|
|
id: metadata
|
|
uses: docker/metadata-action@v5
|
|
with:
|
|
images: |
|
|
axolotlai/axolotl-cloud
|
|
tags: |
|
|
type=ref,event=branch
|
|
type=pep440,pattern={{version}}
|
|
- name: Login to Docker Hub
|
|
uses: docker/login-action@v3
|
|
with:
|
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
|
- name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3
|
|
- name: Build
|
|
uses: docker/build-push-action@v5
|
|
with:
|
|
context: .
|
|
platforms: ${{ matrix.platforms }}
|
|
build-args: |
|
|
BASE_TAG=${{ github.ref_type == 'tag' && 'main' || github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
|
CUDA=${{ matrix.cuda }}
|
|
file: ./docker/Dockerfile-cloud
|
|
push: ${{ github.event_name != 'pull_request' }}
|
|
tags: |
|
|
${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
|
${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }}
|
|
labels: ${{ steps.metadata.outputs.labels }}
|
|
|
|
build-axolotl-cloud-no-tmux:
|
|
needs: build-axolotl
|
|
if: ${{ ! contains(github.event.commits[0].message, '[skip docker]') && github.repository_owner == 'axolotl-ai-cloud' }}
|
|
# this job needs to be run on self-hosted GPU runners...
|
|
strategy:
|
|
matrix:
|
|
include:
|
|
- cuda: 128
|
|
cuda_version: 12.8.1
|
|
python_version: "3.11"
|
|
pytorch: 2.9.1
|
|
axolotl_extras:
|
|
is_latest: true
|
|
- cuda: 130
|
|
cuda_version: 13.0.0
|
|
python_version: "3.11"
|
|
pytorch: 2.9.1
|
|
axolotl_extras:
|
|
is_latest:
|
|
runs-on: axolotl-gpu-runner
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
- name: Docker metadata
|
|
id: metadata
|
|
uses: docker/metadata-action@v5
|
|
with:
|
|
images: |
|
|
axolotlai/axolotl-cloud-term
|
|
tags: |
|
|
type=ref,event=branch
|
|
type=pep440,pattern={{version}}
|
|
- name: Login to Docker Hub
|
|
uses: docker/login-action@v3
|
|
with:
|
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
|
- name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3
|
|
- name: Build
|
|
uses: docker/build-push-action@v5
|
|
with:
|
|
context: .
|
|
platforms: linux/amd64,linux/arm64
|
|
build-args: |
|
|
BASE_TAG=${{ github.ref_type == 'tag' && 'main' || github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
|
CUDA=${{ matrix.cuda }}
|
|
file: ./docker/Dockerfile-cloud-no-tmux
|
|
push: ${{ github.event_name != 'pull_request' }}
|
|
tags: |
|
|
${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
|
${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }}
|
|
labels: ${{ steps.metadata.outputs.labels }}
|