From f720b6e72d486a7b6281fcf94130f817b403f449 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 6 May 2025 11:09:07 -0400 Subject: [PATCH] repop cache (#2639) * repop cache * pre-cache as a step * fix the name * add reason for pytest skipif * restore pytorch matrix * remove max-parallel now that we've optimized this a bit --- .github/workflows/tests.yml | 112 +++++++++++++++++++++++++++++------- tests/conftest.py | 58 ++++++++++--------- 2 files changed, 120 insertions(+), 50 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c2c085fa0..2671cfd33 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -44,12 +44,98 @@ jobs: env: SKIP: no-commit-to-branch - pytest: - name: PyTest + preload-cache: + name: Preload HF cache runs-on: ubuntu-latest strategy: fail-fast: false - max-parallel: 2 + matrix: + python_version: ["3.11"] + pytorch_version: ["2.6.0"] + timeout-minutes: 20 + + env: + AXOLOTL_IS_CI_CACHE_PRELOAD: "1" + + steps: + - name: Check out repository code + uses: actions/checkout@v4 + + - name: Restore HF cache + id: hf-cache-restore + uses: actions/cache/restore@v4 + with: + path: | + /home/runner/.cache/huggingface/hub/datasets--* + /home/runner/.cache/huggingface/hub/models--* + key: ${{ runner.os }}-hf-hub-cache-v2 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python_version }} + cache: 'pip' # caching pip dependencies + + - name: upgrade pip + run: | + pip3 install --upgrade pip + pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel + + - name: Install PyTorch + run: | + pip3 install torch==${{ matrix.pytorch_version }} + + - name: Install dependencies + run: | + pip3 show torch + pip3 install --no-build-isolation -U -e . + python scripts/unsloth_install.py | sh + python scripts/cutcrossentropy_install.py | sh + pip3 install -r requirements-dev.txt -r requirements-tests.txt + + - name: Make sure PyTorch version wasn't clobbered + run: | + python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__" + + - name: Ensure axolotl CLI was installed + run: | + axolotl --help + + - name: Pre-Download dataset fixture + run: | + huggingface-cli download --repo-type=dataset axolotl-ai-internal/axolotl-oss-dataset-fixtures + + - name: Run tests + run: | + pytest -v tests/conftest.py + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: ./coverage.xml + flags: unittests,pytorch-${{ matrix.pytorch_version }} + fail_ci_if_error: false + + - name: cleanup pip cache + run: | + find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \; + + - name: Save HF cache + id: hf-cache + uses: actions/cache/save@v4 + with: + path: | + /home/runner/.cache/huggingface/hub/datasets--* + /home/runner/.cache/huggingface/hub/models--* + key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }} + + pytest: + name: PyTest + runs-on: ubuntu-latest + needs: [preload-cache] + strategy: + fail-fast: false matrix: python_version: ["3.11"] pytorch_version: ["2.5.1", "2.6.0", "2.7.0"] @@ -121,21 +207,12 @@ jobs: run: | find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \; - - name: Save HF cache - id: hf-cache - uses: actions/cache/save@v4 - with: - path: | - /home/runner/.cache/huggingface/hub/datasets--* - /home/runner/.cache/huggingface/hub/models--* - key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }} - pytest-sdist: name: PyTest from Source Dist runs-on: ubuntu-latest + needs: [preload-cache] strategy: fail-fast: false - max-parallel: 1 matrix: python_version: ["3.11"] pytorch_version: ["2.5.1", "2.6.0", "2.7.0"] @@ -199,15 +276,6 @@ jobs: run: | find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \; - - name: Save HF cache - id: hf-cache - uses: actions/cache/save@v4 - with: - path: | - /home/runner/.cache/huggingface/hub/datasets--* - /home/runner/.cache/huggingface/hub/models--* - key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }} - docker-e2e-tests-1st: if: ${{ ! contains(github.event.commits[0].message, '[skip e2e]') && github.repository_owner == 'axolotl-ai-cloud' }} # this job needs to be run on self-hosted GPU runners... diff --git a/tests/conftest.py b/tests/conftest.py index 7fc9a62af..8ab8fd6a4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,7 @@ shared pytest fixtures import functools import importlib +import os import shutil import sys import tempfile @@ -529,31 +530,32 @@ def dataset_fozziethebeat_alpaca_messages_2k_dpo_test_rev_ea82cff( # # pylint: disable=redefined-outer-name,unused-argument -# def test_load_fixtures( -# download_smollm2_135m_model, -# download_llama_68m_random_model, -# download_qwen_2_5_half_billion_model, -# download_tatsu_lab_alpaca_dataset, -# download_mhenrichsen_alpaca_2k_dataset, -# download_mhenrichsen_alpaca_2k_w_revision_dataset, -# download_mlabonne_finetome_100k_dataset, -# download_argilla_distilabel_capybara_dpo_7k_binarized_dataset, -# download_argilla_ultrafeedback_binarized_preferences_cleaned_dataset, -# download_fozzie_alpaca_dpo_dataset, -# download_arcee_ai_distilabel_intel_orca_dpo_pairs_dataset, -# download_argilla_dpo_pairs_dataset, -# download_tiny_shakespeare_dataset, -# download_deepseek_model_fixture, -# download_huggyllama_model_fixture, -# download_llama_1b_model_fixture, -# download_llama3_8b_model_fixture, -# download_llama3_8b_instruct_model_fixture, -# download_phi_35_mini_model_fixture, -# download_phi_3_medium_model_fixture, -# download_mistral_7b_model_fixture, -# download_gemma_2b_model_fixture, -# download_gemma2_9b_model_fixture, -# download_mlx_mistral_7b_model_fixture, -# download_llama2_model_fixture, -# ): -# pass +@pytest.mark.skipif( + os.environ.get("AXOLOTL_IS_CI_CACHE_PRELOAD", "-1") != "1", + reason="Not running in CI cache preload", +) +def test_load_fixtures( + download_smollm2_135m_model, + download_qwen_2_5_half_billion_model, + download_tatsu_lab_alpaca_dataset, + download_mhenrichsen_alpaca_2k_dataset, + download_mhenrichsen_alpaca_2k_w_revision_dataset, + download_mlabonne_finetome_100k_dataset, + download_argilla_distilabel_capybara_dpo_7k_binarized_dataset, + download_arcee_ai_distilabel_intel_orca_dpo_pairs_dataset, + download_argilla_dpo_pairs_dataset, + download_tiny_shakespeare_dataset, + download_deepseek_model_fixture, + download_huggyllama_model_fixture, + download_llama_1b_model_fixture, + download_llama3_8b_model_fixture, + download_llama3_8b_instruct_model_fixture, + download_phi_35_mini_model_fixture, + download_phi_3_medium_model_fixture, + download_mistral_7b_model_fixture, + download_gemma_2b_model_fixture, + download_gemma2_9b_model_fixture, + download_mlx_mistral_7b_model_fixture, + download_llama2_model_fixture, +): + pass