Merge branch 'print_venv' of github.com:axolotl-ai-cloud/axolotl into print_venv
This commit is contained in:
115
.github/workflows/tests-nightly.yml
vendored
115
.github/workflows/tests-nightly.yml
vendored
@@ -18,96 +18,9 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
SKIP: no-commit-to-branch
|
SKIP: no-commit-to-branch
|
||||||
|
|
||||||
preload-cache:
|
|
||||||
name: Preload HF cache
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
python_version: ["3.11"]
|
|
||||||
pytorch_version: ["2.6.0"]
|
|
||||||
timeout-minutes: 20
|
|
||||||
|
|
||||||
env:
|
|
||||||
AXOLOTL_IS_CI_CACHE_PRELOAD: "1"
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Check out repository code
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Restore HF cache
|
|
||||||
id: hf-cache-restore
|
|
||||||
uses: actions/cache/restore@v4
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
/home/runner/.cache/huggingface/hub/datasets--*
|
|
||||||
/home/runner/.cache/huggingface/hub/models--*
|
|
||||||
key: ${{ runner.os }}-hf-hub-cache-v2
|
|
||||||
|
|
||||||
- name: Setup Python
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: ${{ matrix.python_version }}
|
|
||||||
cache: 'pip' # caching pip dependencies
|
|
||||||
|
|
||||||
- name: upgrade pip
|
|
||||||
run: |
|
|
||||||
pip3 install --upgrade pip
|
|
||||||
pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel
|
|
||||||
|
|
||||||
- name: Install PyTorch
|
|
||||||
run: |
|
|
||||||
pip3 install torch==${{ matrix.pytorch_version }}
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
pip3 show torch
|
|
||||||
pip3 install --no-build-isolation -U -e .
|
|
||||||
python scripts/unsloth_install.py | sh
|
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
|
||||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
|
||||||
|
|
||||||
- name: Make sure PyTorch version wasn't clobbered
|
|
||||||
run: |
|
|
||||||
python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__"
|
|
||||||
|
|
||||||
- name: Ensure axolotl CLI was installed
|
|
||||||
run: |
|
|
||||||
axolotl --help
|
|
||||||
|
|
||||||
- name: Pre-Download dataset fixture
|
|
||||||
run: |
|
|
||||||
huggingface-cli download --repo-type=dataset axolotl-ai-internal/axolotl-oss-dataset-fixtures
|
|
||||||
|
|
||||||
- name: Run tests
|
|
||||||
run: |
|
|
||||||
pytest -v tests/conftest.py
|
|
||||||
|
|
||||||
- name: Upload coverage to Codecov
|
|
||||||
uses: codecov/codecov-action@v5
|
|
||||||
with:
|
|
||||||
token: ${{ secrets.CODECOV_TOKEN }}
|
|
||||||
files: ./coverage.xml
|
|
||||||
flags: unittests,pytorch-${{ matrix.pytorch_version }}
|
|
||||||
fail_ci_if_error: false
|
|
||||||
|
|
||||||
- name: cleanup pip cache
|
|
||||||
run: |
|
|
||||||
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
|
||||||
|
|
||||||
- name: Save HF cache
|
|
||||||
id: hf-cache
|
|
||||||
uses: actions/cache/save@v4
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
/home/runner/.cache/huggingface/hub/datasets--*
|
|
||||||
/home/runner/.cache/huggingface/hub/models--*
|
|
||||||
key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }}
|
|
||||||
|
|
||||||
pytest:
|
pytest:
|
||||||
name: PyTest
|
name: PyTest
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: [preload-cache]
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
max-parallel: 2
|
max-parallel: 2
|
||||||
@@ -120,14 +33,11 @@ jobs:
|
|||||||
- name: Check out repository code
|
- name: Check out repository code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Restore HF cache
|
- name: Restore Cache from S3
|
||||||
id: hf-cache-restore
|
id: hf-cache-restore-s3
|
||||||
uses: actions/cache/restore@v4
|
run: |
|
||||||
with:
|
mkdir -p /home/runner/.cache/huggingface/hub
|
||||||
path: |
|
curl -L https://d1dttdx32dkk5p.cloudfront.net/hf-cache.tar.zst | tar -xf - -C /home/runner/.cache/huggingface/hub/ --use-compress-program unzstd
|
||||||
/home/runner/.cache/huggingface/hub/datasets--*
|
|
||||||
/home/runner/.cache/huggingface/hub/models--*
|
|
||||||
key: ${{ runner.os }}-hf-hub-cache-v2
|
|
||||||
|
|
||||||
- name: Setup Python
|
- name: Setup Python
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
@@ -168,10 +78,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
axolotl --help
|
axolotl --help
|
||||||
|
|
||||||
- name: Pre-Download dataset fixture
|
|
||||||
run: |
|
|
||||||
huggingface-cli download --repo-type=dataset axolotl-ai-internal/axolotl-oss-dataset-fixtures
|
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: |
|
run: |
|
||||||
pytest -v -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ tests/
|
pytest -v -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ tests/
|
||||||
@@ -193,15 +99,8 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- cuda: 124
|
- cuda: 126
|
||||||
cuda_version: 12.4.1
|
cuda_version: 12.6.3
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.5.1
|
|
||||||
num_gpus: 1
|
|
||||||
axolotl_extras:
|
|
||||||
nightly_build: "true"
|
|
||||||
- cuda: 124
|
|
||||||
cuda_version: 12.4.1
|
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.6.0
|
pytorch: 2.6.0
|
||||||
num_gpus: 1
|
num_gpus: 1
|
||||||
|
|||||||
@@ -219,7 +219,9 @@ class TrainerBuilderBase(abc.ABC):
|
|||||||
if self.cfg.bf16 == "full":
|
if self.cfg.bf16 == "full":
|
||||||
training_args_kwargs["bf16_full_eval"] = True
|
training_args_kwargs["bf16_full_eval"] = True
|
||||||
else:
|
else:
|
||||||
training_args_kwargs["bf16"] = self.cfg.bf16 or self.cfg.bfloat16
|
bf16 = self.cfg.bf16 or self.cfg.bfloat16
|
||||||
|
bf16 = bf16 if bf16 is not None else False
|
||||||
|
training_args_kwargs["bf16"] = bf16
|
||||||
|
|
||||||
def _configure_scheduler(self, training_args_kwargs: dict):
|
def _configure_scheduler(self, training_args_kwargs: dict):
|
||||||
if self.cfg.lr_scheduler in ["one_cycle", "rex"]:
|
if self.cfg.lr_scheduler in ["one_cycle", "rex"]:
|
||||||
|
|||||||
Reference in New Issue
Block a user