name: Tests on: # check on push/merge to main, PRs, and manual triggers merge_group: push: branches: - "main" paths: - '**.py' - 'requirements.txt' - '.github/workflows/*.yml' - 'requirements-tests.txt' - 'cicd/cicd.sh' - 'cicd/Dockerfile.jinja' pull_request: paths: - '**.py' - 'requirements.txt' - '.github/workflows/*.yml' - 'requirements-tests.txt' - 'cicd/cicd.sh' - 'cicd/Dockerfile.jinja' workflow_dispatch: # Cancel jobs on the same ref if a new one is triggered concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} env: TRANSFORMERS_IS_CI: "yes" jobs: pre-commit: name: pre-commit runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: "3.11" cache: 'pip' # caching pip dependencies - uses: pre-commit/action@v3.0.1 env: SKIP: no-commit-to-branch pytest: name: PyTest runs-on: ubuntu-latest # needs: [preload-cache] strategy: fail-fast: false matrix: python_version: ["3.11"] pytorch_version: ["2.5.1", "2.6.0", "2.7.1"] timeout-minutes: 20 steps: - name: Check out repository code uses: actions/checkout@v4 - name: Restore Cache from S3 id: hf-cache-restore-s3 run: | mkdir -p /home/runner/.cache/huggingface/hub curl -L https://d1dttdx32dkk5p.cloudfront.net/hf-cache.tar.zst | tar -xf - -C /home/runner/.cache/huggingface/hub/ --use-compress-program unzstd - name: Setup Python uses: actions/setup-python@v5 with: python-version: ${{ matrix.python_version }} cache: 'pip' # caching pip dependencies - name: upgrade pip run: | pip3 install --upgrade pip pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel - name: Install PyTorch run: | pip3 install torch==${{ matrix.pytorch_version }} - name: Install dependencies run: | pip3 show torch pip3 install --no-build-isolation -U -e . python scripts/unsloth_install.py | sh python scripts/cutcrossentropy_install.py | sh pip3 install -r requirements-dev.txt -r requirements-tests.txt - name: Make sure PyTorch version wasn't clobbered run: | python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__" - name: Ensure axolotl CLI was installed run: | axolotl --help - name: Pre-Download dataset fixture run: | huggingface-cli download --repo-type=dataset axolotl-ai-internal/axolotl-oss-dataset-fixtures - name: Run tests run: | pytest -v -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ tests/ --cov=axolotl --cov-report=xml pytest -v tests/patched/ --cov=axolotl --cov-append --cov-report=xml pytest -v tests/cli/ --cov=axolotl --cov-append --cov-report=xml - name: Upload coverage to Codecov uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} files: ./coverage.xml flags: unittests,pytorch-${{ matrix.pytorch_version }} fail_ci_if_error: false - name: cleanup pip cache run: | find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \; pytest-sdist: name: PyTest from Source Dist runs-on: ubuntu-latest strategy: fail-fast: false matrix: python_version: ["3.11"] pytorch_version: ["2.5.1", "2.6.0", "2.7.1"] timeout-minutes: 20 steps: - name: Check out repository code uses: actions/checkout@v4 - name: Restore Cache from S3 id: hf-cache-restore-s3 run: | mkdir -p /home/runner/.cache/huggingface/hub curl -L https://d1dttdx32dkk5p.cloudfront.net/hf-cache.tar.zst | tar -xf - -C /home/runner/.cache/huggingface/hub/ --use-compress-program unzstd - name: Setup Python uses: actions/setup-python@v5 with: python-version: ${{ matrix.python_version }} cache: 'pip' # caching pip dependencies - name: upgrade pip run: | pip3 install --upgrade pip pip3 install --upgrade packaging==23.2 setuptools==75.8.0 setuptools_scm build wheel - name: Install PyTorch run: | pip3 install torch==${{ matrix.pytorch_version }} - name: Install dependencies run: | pip3 show torch python -m build --no-isolation --sdist pip3 install --no-build-isolation dist/axolotl*.tar.gz python scripts/unsloth_install.py | sh python scripts/cutcrossentropy_install.py | sh pip3 install -r requirements-dev.txt -r requirements-tests.txt - name: Make sure PyTorch version wasn't clobbered run: | python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__" - name: Ensure axolotl CLI was installed run: | axolotl --help - name: Show HF cache run: huggingface-cli scan-cache - name: Run tests run: | pytest -v -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ tests/ pytest -v tests/patched/ pytest -v tests/cli/ - name: cleanup pip cache run: | find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \; docker-e2e-tests-1st: # Run this job first as a gate for running the remainder of the test matrix if: ${{ ! contains(github.event.commits[0].message, '[skip e2e]') && github.repository_owner == 'axolotl-ai-cloud' }} # this job needs to be run on self-hosted GPU runners... runs-on: [self-hosted, modal] timeout-minutes: 120 needs: [pre-commit, pytest, pytest-sdist] strategy: fail-fast: false matrix: include: - cuda: 124 cuda_version: 12.4.1 python_version: "3.11" pytorch: 2.6.0 num_gpus: 1 axolotl_extras: vllm - cuda: 126 cuda_version: 12.6.3 python_version: "3.11" pytorch: 2.6.0 num_gpus: 1 axolotl_extras: dockerfile: "Dockerfile-uv.jinja" steps: - name: Checkout uses: actions/checkout@v4 - name: Install Python uses: actions/setup-python@v5 with: python-version: "3.11" - name: Install Modal run: | python -m pip install --upgrade pip pip install modal==1.0.2 jinja2 - name: Update env vars run: | echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV echo "MODAL_IMAGE_BUILDER_VERSION=2024.10" >> $GITHUB_ENV echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile.jinja'}}" >> $GITHUB_ENV - name: Run tests job on Modal run: | modal run cicd.e2e_tests docker-e2e-tests: if: github.repository_owner == 'axolotl-ai-cloud' # this job needs to be run on self-hosted GPU runners... runs-on: [self-hosted, modal] timeout-minutes: 120 # Only run the remainder of the matrix if the first e2e check passed; # this is to save on wasted compute costs for known failures that get caught in the first run needs: [pre-commit, pytest, docker-e2e-tests-1st] strategy: fail-fast: false matrix: include: - cuda: 124 cuda_version: 12.4.1 python_version: "3.11" pytorch: 2.6.0 num_gpus: 1 axolotl_extras: llmcompressor - cuda: 124 cuda_version: 12.4.1 python_version: "3.11" pytorch: 2.5.1 num_gpus: 1 axolotl_extras: - cuda: 126 cuda_version: 12.6.3 python_version: "3.11" pytorch: 2.7.1 num_gpus: 1 axolotl_extras: - cuda: 128 cuda_version: 12.8.1 python_version: "3.11" pytorch: 2.7.1 num_gpus: 1 axolotl_extras: steps: - name: Checkout uses: actions/checkout@v4 - name: Install Python uses: actions/setup-python@v5 with: python-version: "3.11" - name: Install Modal run: | python -m pip install --upgrade pip pip install modal==1.0.2 jinja2 - name: Update env vars run: | echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV echo "MODAL_IMAGE_BUILDER_VERSION=2024.10" >> $GITHUB_ENV echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile.jinja'}}" >> $GITHUB_ENV - name: Run tests job on Modal run: | modal run cicd.e2e_tests docker-e2e-cleanup: runs-on: [self-hosted, modal] timeout-minutes: 90 needs: [docker-e2e-tests] strategy: fail-fast: false matrix: include: - cuda: 124 cuda_version: 12.4.1 python_version: "3.11" pytorch: 2.6.0 num_gpus: 1 axolotl_extras: vllm steps: - name: Checkout uses: actions/checkout@v4 - name: Install Python uses: actions/setup-python@v5 with: python-version: "3.11" - name: Install Modal run: | python -m pip install --upgrade pip pip install modal==1.0.2 jinja2 - name: Update env vars run: | echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV echo "MODAL_IMAGE_BUILDER_VERSION=2024.10" >> $GITHUB_ENV echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV - name: Run tests job on Modal run: | modal run cicd.cleanup