swap tinymodels that have safetensors for some ci tests (#2641)

2025-05-07 15:06:07 -04:00
parent 25e6c5f9bd
commit 0f3587174d
14 changed files with 137 additions and 20 deletions
--- a/.github/workflows/tests-nightly.yml
+++ b/.github/workflows/tests-nightly.yml
@@ -18,9 +18,96 @@ jobs:
        env:
          SKIP: no-commit-to-branch
  preload-cache:
    name: Preload HF cache
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        python_version: ["3.11"]
        pytorch_version: ["2.6.0"]
    timeout-minutes: 20
    env:
      AXOLOTL_IS_CI_CACHE_PRELOAD: "1"
    steps:
      - name: Check out repository code
        uses: actions/checkout@v4
      - name: Restore HF cache
        id: hf-cache-restore
        uses: actions/cache/restore@v4
        with:
          path: |
            /home/runner/.cache/huggingface/hub/datasets--*
            /home/runner/.cache/huggingface/hub/models--*
          key: ${{ runner.os }}-hf-hub-cache-v2
      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python_version }}
          cache: 'pip' # caching pip dependencies
      - name: upgrade pip
        run: |
          pip3 install --upgrade pip
          pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel
      - name: Install PyTorch
        run: |
          pip3 install torch==${{ matrix.pytorch_version }}
      - name: Install dependencies
        run: |
          pip3 show torch
          pip3 install --no-build-isolation -U -e .
          python scripts/unsloth_install.py | sh
          python scripts/cutcrossentropy_install.py | sh
          pip3 install -r requirements-dev.txt -r requirements-tests.txt
      - name: Make sure PyTorch version wasn't clobbered
        run: |
          python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__"
      - name: Ensure axolotl CLI was installed
        run: |
          axolotl --help
      - name: Pre-Download dataset fixture
        run: |
          huggingface-cli download --repo-type=dataset axolotl-ai-internal/axolotl-oss-dataset-fixtures
      - name: Run tests
        run: |
          pytest -v tests/conftest.py
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v5
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          files: ./coverage.xml
          flags: unittests,pytorch-${{ matrix.pytorch_version }}
          fail_ci_if_error: false
      - name: cleanup pip cache
        run: |
          find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
      - name: Save HF cache
        id: hf-cache
        uses: actions/cache/save@v4
        with:
          path: |
            /home/runner/.cache/huggingface/hub/datasets--*
            /home/runner/.cache/huggingface/hub/models--*
          key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }}
  pytest:
    name: PyTest
    runs-on: ubuntu-latest
    needs: [preload-cache]
    strategy:
      fail-fast: false
      max-parallel: 2
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,6 +11,7 @@ liger-kernel==0.5.9
 packaging==23.2
 huggingface_hub==0.31.0
 peft==0.15.2
 transformers==4.51.3
 tokenizers>=0.21.1
--- a/src/axolotl/train.py
+++ b/src/axolotl/train.py
@@ -2,6 +2,7 @@
 import importlib
 import inspect
 import logging
 import os
 import signal
 import sys
@@ -12,7 +13,6 @@ from typing import Any, Dict
 import torch
 import transformers.modelcard
 from accelerate.logging import get_logger
 from accelerate.utils import save_fsdp_model
 from datasets import Dataset
 from huggingface_hub.errors import OfflineModeIsEnabled
@@ -42,7 +42,7 @@ try:
 except ImportError:
    BetterTransformer = None
-LOG = get_logger(__name__)
+LOG = logging.getLogger(__name__)
 def setup_model_and_tokenizer(
@@ -63,7 +63,6 @@ def setup_model_and_tokenizer(
    # Load tokenizer
    LOG.debug(
        f"loading tokenizer... {cfg.tokenizer_config or cfg.base_model_config}",
        main_process_only=True,
    )
    tokenizer = load_tokenizer(cfg)
--- a/src/axolotl/utils/gradient_checkpointing/init.py
+++ b/src/axolotl/utils/gradient_checkpointing/init.py
@@ -1,15 +1,36 @@
 """custom checkpointing utils"""
 import importlib
 from functools import partial
 from packaging import version
 from axolotl.utils.gradient_checkpointing.unsloth import (
    Unsloth_Offloaded_Gradient_Checkpointer,
 )
 transformers_version = version.parse(importlib.metadata.version("transformers"))
 if transformers_version > version.parse("4.51.3"):
    from transformers.modeling_layers import GradientCheckpointingLayer
    def uses_gc_layers(decoder_layer):
        return isinstance(decoder_layer.func.__self__, GradientCheckpointingLayer)
 else:
    def uses_gc_layers(_):
        return False
 def hf_grad_checkpoint_offload_wrapper(
    decoder_layer, *args, use_reentrant=None
 ):  # pylint: disable=unused-argument
    if uses_gc_layers(decoder_layer):
        return Unsloth_Offloaded_Gradient_Checkpointer.apply(
            decoder_layer,
            *args,
        )
    return Unsloth_Offloaded_Gradient_Checkpointer.apply(
        (
            decoder_layer.func.__self__
--- a/tests/e2e/multigpu/test_llama.py
+++ b/tests/e2e/multigpu/test_llama.py
@@ -479,7 +479,7 @@ class TestMultiGPULlama:
                "sample_packing": True,
                "pad_to_sequence_len": True,
                "sequence_len": 2048,
-                "val_set_size": 0.05,
+                "val_set_size": 0.1,
                "special_tokens": {
                    "pad_token": "<|endoftext|>",
                },
--- a/tests/e2e/patched/lora_kernels/test_lora_kernel_patching.py
+++ b/tests/e2e/patched/lora_kernels/test_lora_kernel_patching.py
@@ -29,12 +29,12 @@ from axolotl.utils.dict import DictDefault
 MODEL_CONFIGS = [
    {
-        "name": "openaccess-ai-collective/tiny-mistral",
+        "name": "trl-internal-testing/tiny-MistralForCausalLM-0.2",
        "expected_activation": apply_lora_mlp_swiglu,
        "dtype": torch.float16,
    },
    {
-        "name": "Qwen/Qwen2-7B",
+        "name": "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
        "expected_activation": apply_lora_mlp_swiglu,
        "dtype": torch.float16,
    },
@@ -44,7 +44,7 @@ MODEL_CONFIGS = [
        "dtype": torch.float32,
    },
    {
-        "name": "mhenrichsen/gemma-2b",
+        "name": "trl-internal-testing/tiny-Gemma2ForCausalLM",
        "expected_activation": apply_lora_mlp_geglu,
        "dtype": torch.float16,
    },
@@ -156,7 +156,9 @@ def test_swiglu_mlp_integration(small_llama_model):
 def test_geglu_model_integration():
    """Test GeGLU activation with Gemma model."""
    model = AutoModelForCausalLM.from_pretrained(
-        "mhenrichsen/gemma-2b", torch_dtype=torch.float16, device_map="cuda:0"
+        "trl-internal-testing/tiny-Gemma2ForCausalLM",
        torch_dtype=torch.float16,
        device_map="cuda:0",
    )
    peft_config = get_peft_config(
        {
--- a/tests/e2e/patched/test_falcon_samplepack.py
+++ b/tests/e2e/patched/test_falcon_samplepack.py
@@ -6,6 +6,8 @@ import logging
 import os
 import unittest
 import pytest
 from axolotl.cli.args import TrainerCliArgs
 from axolotl.common.datasets import load_datasets
 from axolotl.train import train
@@ -23,6 +25,7 @@ class TestFalconPatched(unittest.TestCase):
    Test case for Falcon models
    """
    @pytest.mark.skip(reason="no tiny models for testing with safetensors")
    @with_temp_dir
    def test_qlora(self, temp_dir):
        # pylint: disable=duplicate-code
@@ -71,6 +74,7 @@ class TestFalconPatched(unittest.TestCase):
        train(cfg=cfg, dataset_meta=dataset_meta)
        check_model_output_exists(temp_dir, cfg)
    @pytest.mark.skip(reason="no tiny models for testing with safetensors")
    @with_temp_dir
    def test_ft(self, temp_dir):
        # pylint: disable=duplicate-code
--- a/tests/e2e/patched/test_mistral_samplepack.py
+++ b/tests/e2e/patched/test_mistral_samplepack.py
@@ -28,7 +28,7 @@ class TestMistral(unittest.TestCase):
        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
-                "base_model": "openaccess-ai-collective/tiny-mistral",
+                "base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2",
                "flash_attention": True,
                "sample_packing": True,
                "sequence_len": 1024,
@@ -76,7 +76,7 @@ class TestMistral(unittest.TestCase):
        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
-                "base_model": "openaccess-ai-collective/tiny-mistral",
+                "base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2",
                "flash_attention": True,
                "sample_packing": True,
                "sequence_len": 1024,
--- a/tests/e2e/patched/test_model_patches.py
+++ b/tests/e2e/patched/test_model_patches.py
@@ -56,7 +56,7 @@ class TestModelPatches(unittest.TestCase):
    def test_mistral_multipack(self, temp_dir):
        cfg = DictDefault(
            {
-                "base_model": "openaccess-ai-collective/tiny-mistral",
+                "base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2",
                "flash_attention": True,
                "sample_packing": True,
                "sequence_len": 2048,
--- a/tests/e2e/patched/test_resume.py
+++ b/tests/e2e/patched/test_resume.py
@@ -15,7 +15,7 @@ from axolotl.train import train
 from axolotl.utils.config import normalize_config, validate_config
 from axolotl.utils.dict import DictDefault
-from ..utils import check_model_output_exists, most_recent_subdir
+from ..utils import check_model_output_exists, most_recent_subdir, require_torch_2_6_0
 LOG = logging.getLogger("axolotl.tests.e2e")
 os.environ["WANDB_DISABLED"] = "true"
@@ -26,6 +26,7 @@ class TestResumeLlama:
    Test case for resuming training of llama models
    """
    @require_torch_2_6_0
    def test_resume_lora_packed(self, temp_dir):
        # pylint: disable=duplicate-code
        cfg = DictDefault(
@@ -62,6 +63,7 @@ class TestResumeLlama:
                "save_total_limit": 5,
                "max_steps": 15,
                "use_tensorboard": True,
                "save_safetensors": True,
            }
        )
        if is_torch_bf16_gpu_available():
--- a/tests/e2e/test_evaluate.py
+++ b/tests/e2e/test_evaluate.py
@@ -19,14 +19,11 @@ class TestE2eEvaluate:
        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
-                "base_model": "JackFram/llama-68m",
+                "base_model": "HuggingFaceTB/SmolLM2-135M",
                "tokenizer_type": "LlamaTokenizer",
                "sequence_len": 1024,
                "val_set_size": 0.02,
                "special_tokens": {
-                    "unk_token": "<unk>",
+                    "pad_token": "<|endoftext|>",
                    "bos_token": "<s>",
                    "eos_token": "</s>",
                },
                "datasets": [
                    {
--- a/tests/e2e/test_falcon.py
+++ b/tests/e2e/test_falcon.py
@@ -6,6 +6,8 @@ import logging
 import os
 import unittest
 import pytest
 from axolotl.cli.args import TrainerCliArgs
 from axolotl.common.datasets import load_datasets
 from axolotl.train import train
@@ -23,6 +25,7 @@ class TestFalcon(unittest.TestCase):
    Test case for falcon
    """
    @pytest.mark.skip(reason="no tiny models for testing with safetensors")
    @with_temp_dir
    def test_lora(self, temp_dir):
        # pylint: disable=duplicate-code
@@ -74,6 +77,7 @@ class TestFalcon(unittest.TestCase):
        train(cfg=cfg, dataset_meta=dataset_meta)
        check_model_output_exists(temp_dir, cfg)
    @pytest.mark.skip(reason="no tiny models for testing with safetensors")
    @with_temp_dir
    def test_lora_added_vocab(self, temp_dir):
        # pylint: disable=duplicate-code
@@ -129,6 +133,7 @@ class TestFalcon(unittest.TestCase):
        train(cfg=cfg, dataset_meta=dataset_meta)
        check_model_output_exists(temp_dir, cfg)
    @pytest.mark.skip(reason="no tiny models for testing with safetensors")
    @with_temp_dir
    def test_ft(self, temp_dir):
        # pylint: disable=duplicate-code
--- a/tests/e2e/test_mistral.py
+++ b/tests/e2e/test_mistral.py
@@ -30,7 +30,7 @@ class TestMistral(unittest.TestCase):
        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
-                "base_model": "openaccess-ai-collective/tiny-mistral",
+                "base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2",
                "flash_attention": True,
                "sequence_len": 1024,
                "load_in_8bit": True,
@@ -77,7 +77,7 @@ class TestMistral(unittest.TestCase):
        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
-                "base_model": "openaccess-ai-collective/tiny-mistral",
+                "base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2",
                "flash_attention": True,
                "sequence_len": 1024,
                "val_set_size": 0.02,
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -414,7 +414,6 @@ class TestDatasetPreparation:
            snapshot_path = snapshot_download(
                repo_id="mhenrichsen/alpaca_2k_test",
                repo_type="dataset",
                local_dir=tmp_ds_path,
            )
            shutil.copytree(snapshot_path, tmp_ds_path, dirs_exist_ok=True)