From 17d715c2b35ffd4153e16cecc72b1592f556ae31 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 7 May 2025 15:06:07 -0400 Subject: [PATCH] swap tinymodels that have safetensors for some ci tests (#2641) --- .github/workflows/tests-nightly.yml | 87 +++++++++++++++++++ requirements.txt | 1 + src/axolotl/train.py | 5 +- .../utils/gradient_checkpointing/__init__.py | 21 +++++ tests/e2e/multigpu/test_llama.py | 2 +- .../lora_kernels/test_lora_kernel_patching.py | 10 ++- tests/e2e/patched/test_falcon_samplepack.py | 4 + tests/e2e/patched/test_mistral_samplepack.py | 4 +- tests/e2e/patched/test_model_patches.py | 2 +- tests/e2e/patched/test_resume.py | 4 +- tests/e2e/test_evaluate.py | 7 +- tests/e2e/test_falcon.py | 5 ++ tests/e2e/test_mistral.py | 4 +- tests/test_datasets.py | 1 - 14 files changed, 137 insertions(+), 20 deletions(-) diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml index 23eb25f56..539f7f71b 100644 --- a/.github/workflows/tests-nightly.yml +++ b/.github/workflows/tests-nightly.yml @@ -18,9 +18,96 @@ jobs: env: SKIP: no-commit-to-branch + preload-cache: + name: Preload HF cache + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python_version: ["3.11"] + pytorch_version: ["2.6.0"] + timeout-minutes: 20 + + env: + AXOLOTL_IS_CI_CACHE_PRELOAD: "1" + + steps: + - name: Check out repository code + uses: actions/checkout@v4 + + - name: Restore HF cache + id: hf-cache-restore + uses: actions/cache/restore@v4 + with: + path: | + /home/runner/.cache/huggingface/hub/datasets--* + /home/runner/.cache/huggingface/hub/models--* + key: ${{ runner.os }}-hf-hub-cache-v2 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python_version }} + cache: 'pip' # caching pip dependencies + + - name: upgrade pip + run: | + pip3 install --upgrade pip + pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel + + - name: Install PyTorch + run: | + pip3 install torch==${{ matrix.pytorch_version }} + + - name: Install dependencies + run: | + pip3 show torch + pip3 install --no-build-isolation -U -e . + python scripts/unsloth_install.py | sh + python scripts/cutcrossentropy_install.py | sh + pip3 install -r requirements-dev.txt -r requirements-tests.txt + + - name: Make sure PyTorch version wasn't clobbered + run: | + python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__" + + - name: Ensure axolotl CLI was installed + run: | + axolotl --help + + - name: Pre-Download dataset fixture + run: | + huggingface-cli download --repo-type=dataset axolotl-ai-internal/axolotl-oss-dataset-fixtures + + - name: Run tests + run: | + pytest -v tests/conftest.py + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: ./coverage.xml + flags: unittests,pytorch-${{ matrix.pytorch_version }} + fail_ci_if_error: false + + - name: cleanup pip cache + run: | + find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \; + + - name: Save HF cache + id: hf-cache + uses: actions/cache/save@v4 + with: + path: | + /home/runner/.cache/huggingface/hub/datasets--* + /home/runner/.cache/huggingface/hub/models--* + key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }} + pytest: name: PyTest runs-on: ubuntu-latest + needs: [preload-cache] strategy: fail-fast: false max-parallel: 2 diff --git a/requirements.txt b/requirements.txt index dc495bedd..4ae82dd49 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,6 +11,7 @@ liger-kernel==0.5.9 packaging==23.2 +huggingface_hub==0.31.0 peft==0.15.2 transformers==4.51.3 tokenizers>=0.21.1 diff --git a/src/axolotl/train.py b/src/axolotl/train.py index e58eddbff..68efc0b77 100644 --- a/src/axolotl/train.py +++ b/src/axolotl/train.py @@ -2,6 +2,7 @@ import importlib import inspect +import logging import os import signal import sys @@ -12,7 +13,6 @@ from typing import Any, Dict import torch import transformers.modelcard -from accelerate.logging import get_logger from accelerate.utils import save_fsdp_model from datasets import Dataset from huggingface_hub.errors import OfflineModeIsEnabled @@ -42,7 +42,7 @@ try: except ImportError: BetterTransformer = None -LOG = get_logger(__name__) +LOG = logging.getLogger(__name__) def setup_model_and_tokenizer( @@ -63,7 +63,6 @@ def setup_model_and_tokenizer( # Load tokenizer LOG.debug( f"loading tokenizer... {cfg.tokenizer_config or cfg.base_model_config}", - main_process_only=True, ) tokenizer = load_tokenizer(cfg) diff --git a/src/axolotl/utils/gradient_checkpointing/__init__.py b/src/axolotl/utils/gradient_checkpointing/__init__.py index 0da5c83a2..f84f76d80 100644 --- a/src/axolotl/utils/gradient_checkpointing/__init__.py +++ b/src/axolotl/utils/gradient_checkpointing/__init__.py @@ -1,15 +1,36 @@ """custom checkpointing utils""" +import importlib from functools import partial +from packaging import version + from axolotl.utils.gradient_checkpointing.unsloth import ( Unsloth_Offloaded_Gradient_Checkpointer, ) +transformers_version = version.parse(importlib.metadata.version("transformers")) +if transformers_version > version.parse("4.51.3"): + from transformers.modeling_layers import GradientCheckpointingLayer + + def uses_gc_layers(decoder_layer): + return isinstance(decoder_layer.func.__self__, GradientCheckpointingLayer) + +else: + + def uses_gc_layers(_): + return False + def hf_grad_checkpoint_offload_wrapper( decoder_layer, *args, use_reentrant=None ): # pylint: disable=unused-argument + if uses_gc_layers(decoder_layer): + return Unsloth_Offloaded_Gradient_Checkpointer.apply( + decoder_layer, + *args, + ) + return Unsloth_Offloaded_Gradient_Checkpointer.apply( ( decoder_layer.func.__self__ diff --git a/tests/e2e/multigpu/test_llama.py b/tests/e2e/multigpu/test_llama.py index 1ff795bd6..38e6e741a 100644 --- a/tests/e2e/multigpu/test_llama.py +++ b/tests/e2e/multigpu/test_llama.py @@ -479,7 +479,7 @@ class TestMultiGPULlama: "sample_packing": True, "pad_to_sequence_len": True, "sequence_len": 2048, - "val_set_size": 0.05, + "val_set_size": 0.1, "special_tokens": { "pad_token": "<|endoftext|>", }, diff --git a/tests/e2e/patched/lora_kernels/test_lora_kernel_patching.py b/tests/e2e/patched/lora_kernels/test_lora_kernel_patching.py index f3e59b373..f6b7ee9b9 100644 --- a/tests/e2e/patched/lora_kernels/test_lora_kernel_patching.py +++ b/tests/e2e/patched/lora_kernels/test_lora_kernel_patching.py @@ -29,12 +29,12 @@ from axolotl.utils.dict import DictDefault MODEL_CONFIGS = [ { - "name": "openaccess-ai-collective/tiny-mistral", + "name": "trl-internal-testing/tiny-MistralForCausalLM-0.2", "expected_activation": apply_lora_mlp_swiglu, "dtype": torch.float16, }, { - "name": "Qwen/Qwen2-7B", + "name": "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", "expected_activation": apply_lora_mlp_swiglu, "dtype": torch.float16, }, @@ -44,7 +44,7 @@ MODEL_CONFIGS = [ "dtype": torch.float32, }, { - "name": "mhenrichsen/gemma-2b", + "name": "trl-internal-testing/tiny-Gemma2ForCausalLM", "expected_activation": apply_lora_mlp_geglu, "dtype": torch.float16, }, @@ -156,7 +156,9 @@ def test_swiglu_mlp_integration(small_llama_model): def test_geglu_model_integration(): """Test GeGLU activation with Gemma model.""" model = AutoModelForCausalLM.from_pretrained( - "mhenrichsen/gemma-2b", torch_dtype=torch.float16, device_map="cuda:0" + "trl-internal-testing/tiny-Gemma2ForCausalLM", + torch_dtype=torch.float16, + device_map="cuda:0", ) peft_config = get_peft_config( { diff --git a/tests/e2e/patched/test_falcon_samplepack.py b/tests/e2e/patched/test_falcon_samplepack.py index 0034169af..667b62ffb 100644 --- a/tests/e2e/patched/test_falcon_samplepack.py +++ b/tests/e2e/patched/test_falcon_samplepack.py @@ -6,6 +6,8 @@ import logging import os import unittest +import pytest + from axolotl.cli.args import TrainerCliArgs from axolotl.common.datasets import load_datasets from axolotl.train import train @@ -23,6 +25,7 @@ class TestFalconPatched(unittest.TestCase): Test case for Falcon models """ + @pytest.mark.skip(reason="no tiny models for testing with safetensors") @with_temp_dir def test_qlora(self, temp_dir): # pylint: disable=duplicate-code @@ -71,6 +74,7 @@ class TestFalconPatched(unittest.TestCase): train(cfg=cfg, dataset_meta=dataset_meta) check_model_output_exists(temp_dir, cfg) + @pytest.mark.skip(reason="no tiny models for testing with safetensors") @with_temp_dir def test_ft(self, temp_dir): # pylint: disable=duplicate-code diff --git a/tests/e2e/patched/test_mistral_samplepack.py b/tests/e2e/patched/test_mistral_samplepack.py index 3bc0fcfbc..ccfeb3d63 100644 --- a/tests/e2e/patched/test_mistral_samplepack.py +++ b/tests/e2e/patched/test_mistral_samplepack.py @@ -28,7 +28,7 @@ class TestMistral(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "openaccess-ai-collective/tiny-mistral", + "base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2", "flash_attention": True, "sample_packing": True, "sequence_len": 1024, @@ -76,7 +76,7 @@ class TestMistral(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "openaccess-ai-collective/tiny-mistral", + "base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2", "flash_attention": True, "sample_packing": True, "sequence_len": 1024, diff --git a/tests/e2e/patched/test_model_patches.py b/tests/e2e/patched/test_model_patches.py index 8a75db52e..26090e697 100644 --- a/tests/e2e/patched/test_model_patches.py +++ b/tests/e2e/patched/test_model_patches.py @@ -56,7 +56,7 @@ class TestModelPatches(unittest.TestCase): def test_mistral_multipack(self, temp_dir): cfg = DictDefault( { - "base_model": "openaccess-ai-collective/tiny-mistral", + "base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2", "flash_attention": True, "sample_packing": True, "sequence_len": 2048, diff --git a/tests/e2e/patched/test_resume.py b/tests/e2e/patched/test_resume.py index a84759bae..61e4a0e03 100644 --- a/tests/e2e/patched/test_resume.py +++ b/tests/e2e/patched/test_resume.py @@ -15,7 +15,7 @@ from axolotl.train import train from axolotl.utils.config import normalize_config, validate_config from axolotl.utils.dict import DictDefault -from ..utils import check_model_output_exists, most_recent_subdir +from ..utils import check_model_output_exists, most_recent_subdir, require_torch_2_6_0 LOG = logging.getLogger("axolotl.tests.e2e") os.environ["WANDB_DISABLED"] = "true" @@ -26,6 +26,7 @@ class TestResumeLlama: Test case for resuming training of llama models """ + @require_torch_2_6_0 def test_resume_lora_packed(self, temp_dir): # pylint: disable=duplicate-code cfg = DictDefault( @@ -62,6 +63,7 @@ class TestResumeLlama: "save_total_limit": 5, "max_steps": 15, "use_tensorboard": True, + "save_safetensors": True, } ) if is_torch_bf16_gpu_available(): diff --git a/tests/e2e/test_evaluate.py b/tests/e2e/test_evaluate.py index b2d7d02ca..0278113b7 100644 --- a/tests/e2e/test_evaluate.py +++ b/tests/e2e/test_evaluate.py @@ -19,14 +19,11 @@ class TestE2eEvaluate: # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 1024, "val_set_size": 0.02, "special_tokens": { - "unk_token": "", - "bos_token": "", - "eos_token": "", + "pad_token": "<|endoftext|>", }, "datasets": [ { diff --git a/tests/e2e/test_falcon.py b/tests/e2e/test_falcon.py index a1641a997..24afab0b3 100644 --- a/tests/e2e/test_falcon.py +++ b/tests/e2e/test_falcon.py @@ -6,6 +6,8 @@ import logging import os import unittest +import pytest + from axolotl.cli.args import TrainerCliArgs from axolotl.common.datasets import load_datasets from axolotl.train import train @@ -23,6 +25,7 @@ class TestFalcon(unittest.TestCase): Test case for falcon """ + @pytest.mark.skip(reason="no tiny models for testing with safetensors") @with_temp_dir def test_lora(self, temp_dir): # pylint: disable=duplicate-code @@ -74,6 +77,7 @@ class TestFalcon(unittest.TestCase): train(cfg=cfg, dataset_meta=dataset_meta) check_model_output_exists(temp_dir, cfg) + @pytest.mark.skip(reason="no tiny models for testing with safetensors") @with_temp_dir def test_lora_added_vocab(self, temp_dir): # pylint: disable=duplicate-code @@ -129,6 +133,7 @@ class TestFalcon(unittest.TestCase): train(cfg=cfg, dataset_meta=dataset_meta) check_model_output_exists(temp_dir, cfg) + @pytest.mark.skip(reason="no tiny models for testing with safetensors") @with_temp_dir def test_ft(self, temp_dir): # pylint: disable=duplicate-code diff --git a/tests/e2e/test_mistral.py b/tests/e2e/test_mistral.py index 740fa6eed..ba8cf2896 100644 --- a/tests/e2e/test_mistral.py +++ b/tests/e2e/test_mistral.py @@ -30,7 +30,7 @@ class TestMistral(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "openaccess-ai-collective/tiny-mistral", + "base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2", "flash_attention": True, "sequence_len": 1024, "load_in_8bit": True, @@ -77,7 +77,7 @@ class TestMistral(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "openaccess-ai-collective/tiny-mistral", + "base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2", "flash_attention": True, "sequence_len": 1024, "val_set_size": 0.02, diff --git a/tests/test_datasets.py b/tests/test_datasets.py index ded82869f..88d196ad1 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -414,7 +414,6 @@ class TestDatasetPreparation: snapshot_path = snapshot_download( repo_id="mhenrichsen/alpaca_2k_test", repo_type="dataset", - local_dir=tmp_ds_path, ) shutil.copytree(snapshot_path, tmp_ds_path, dirs_exist_ok=True)