bump HF versions except for trl (#2427)

2025-03-20 10:22:05 -04:00
parent 4d92a68a96
commit 38df5a36ea
10 changed files with 29 additions and 19 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,8 +22,8 @@ repos:
    rev: 6.1.0
    hooks:
    - id: flake8
-   repo: https://github.com/PyCQA/pylint
-    rev: v3.3.0
+-   repo: https://github.com/pylint-dev/pylint
+    rev: c8c96d20cde3552a79858c7456bb1483bf83d633
    hooks:
    - id: pylint
 -   repo: https://github.com/pre-commit/mirrors-mypy
--- a/cicd/Dockerfile.jinja
+++ b/cicd/Dockerfile.jinja
@@ -31,6 +31,7 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
        sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \
    fi

+RUN pip3 install -U packaging setuptools wheel
 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
    else \
--- a/docker/Dockerfile-base
+++ b/docker/Dockerfile-base
@@ -28,7 +28,7 @@ ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}"

 WORKDIR /workspace

-RUN python3 -m pip install --upgrade pip && pip3 install packaging && \
+RUN python3 -m pip install --upgrade pip && pip3 install -U packaging setuptools wheel && \
    python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} --extra-index-url https://download.pytorch.org/whl/cu$CUDA && \
    python3 -m pip install --no-cache-dir "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" && \
    python3 -m pip install --no-cache-dir "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,6 +8,7 @@ dynamic = ["version", "dependencies", "optional-dependencies"]
 description = "LLM Trainer"
 readme = "README.md"
 requires-python = ">=3.10"
+license-files = ["LICENSE"]

 [project.scripts]
 axolotl = "axolotl.cli.main:main"
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,11 +12,11 @@ liger-kernel==0.5.3

 packaging==24.2

-peft==0.14.0
+peft==0.15.0
 transformers==4.49.0
-tokenizers>=0.21.0
-accelerate==1.3.0
-datasets==3.2.0
+tokenizers>=0.21.1
+accelerate==1.5.2
+datasets==3.4.1
 deepspeed==0.16.1
 trl==0.15.1

--- a/scripts/cutcrossentropy_install.py
+++ b/scripts/cutcrossentropy_install.py
@@ -17,12 +17,12 @@ if v < V("2.4.0"):

 cce_spec = importlib.util.find_spec("cut_cross_entropy")

-UNINSTALL_PREFIX = ""
+uninstall_prefix = ""
 if cce_spec:
    if not importlib.util.find_spec("cut_cross_entropy.transformers"):
-        UNINSTALL_PREFIX = "pip uninstall -y cut-cross-entropy && "
+        uninstall_prefix = "pip uninstall -y cut-cross-entropy && "

 print(
-    UNINSTALL_PREFIX
+    uninstall_prefix
    + 'pip install "cut-cross-entropy[transformers] @ git+https://github.com/apple/ml-cross-entropy.git@24fbe4b5dab9a6c250a014573613c1890190536c"'
 )
--- a/src/axolotl/utils/data/sft.py
+++ b/src/axolotl/utils/data/sft.py
@@ -2,6 +2,7 @@

 import functools
 import logging
+import os
 from pathlib import Path
 from typing import List, Optional, Tuple, Union

@@ -344,6 +345,7 @@ def load_tokenized_prepared_datasets(
                )
                ds_from_iter.save_to_disk(str(prepared_ds_path))
            else:
+                os.makedirs(prepared_ds_path, exist_ok=True)
                dataset.save_to_disk(str(prepared_ds_path))
            if cfg.push_dataset_to_hub:
                LOG.info(
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -108,6 +108,12 @@ def download_arcee_ai_distilabel_intel_orca_dpo_pairs_dataset():
    )


+@pytest.fixture(scope="session", autouse=True)
+def download_tiny_shakespeare_dataset():
+    # download the dataset
+    snapshot_download_w_retry("Trelis/tiny-shakespeare", repo_type="dataset")
+
+
@pytest.fixture
 def temp_dir():
    # Create a temporary directory
--- a/tests/e2e/solo/test_relora_llama.py
+++ b/tests/e2e/solo/test_relora_llama.py
@@ -40,8 +40,8 @@ class TestReLoraLlama(unittest.TestCase):
                "lora_alpha": 16,
                "lora_dropout": 0.05,
                "lora_target_modules": ["q_proj", "v_proj"],
-                "relora_steps": 100,
-                "relora_warmup_steps": 20,
+                "relora_steps": 50,
+                "relora_warmup_steps": 10,
                "relora_anneal_steps": 10,
                "relora_prune_ratio": 0.9,
                "relora_cpu_offload": True,
@@ -60,9 +60,9 @@ class TestReLoraLlama(unittest.TestCase):
                        "message_field_content": "value",
                    },
                ],
-                "warmup_steps": 20,
+                "warmup_steps": 10,
                "num_epochs": 2,
-                "max_steps": 205,  # at least 2x relora_steps
+                "max_steps": 105,  # at least 2x relora_steps
                "micro_batch_size": 2,
                "gradient_accumulation_steps": 1,
                "output_dir": temp_dir,
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -7,13 +7,13 @@ import tempfile
 import unittest
 from pathlib import Path

+from conftest import snapshot_download_w_retry
 from constants import (
    ALPACA_MESSAGES_CONFIG_OG,
    ALPACA_MESSAGES_CONFIG_REVISION,
    SPECIAL_TOKENS,
 )
 from datasets import Dataset
-from huggingface_hub import snapshot_download
 from transformers import AutoTokenizer

 from axolotl.utils.data import load_tokenized_prepared_datasets
@@ -69,7 +69,7 @@ class TestDatasetPreparation(unittest.TestCase):
        with tempfile.TemporaryDirectory() as tmp_dir:
            tmp_ds_path = Path(tmp_dir) / "mhenrichsen/alpaca_2k_test"
            tmp_ds_path.mkdir(parents=True, exist_ok=True)
-            snapshot_download(
+            snapshot_download_w_retry(
                repo_id="mhenrichsen/alpaca_2k_test",
                repo_type="dataset",
                local_dir=tmp_ds_path,
@@ -81,7 +81,7 @@ class TestDatasetPreparation(unittest.TestCase):
            # how to load it.
            cfg = DictDefault(
                {
-                    "tokenizer_config": "huggyllama/llama-7b",
+                    "tokenizer_config": "HuggingFaceTB/SmolLM2-135M",
                    "sequence_len": 1024,
                    "datasets": [
                        {
@@ -339,7 +339,7 @@ class TestDatasetPreparation(unittest.TestCase):
        with tempfile.TemporaryDirectory() as tmp_dir:
            tmp_ds_path = Path(tmp_dir) / "mhenrichsen/alpaca_2k_test"
            tmp_ds_path.mkdir(parents=True, exist_ok=True)
-            snapshot_download(
+            snapshot_download_w_retry(
                repo_id="mhenrichsen/alpaca_2k_test",
                repo_type="dataset",
                local_dir=tmp_ds_path,
@@ -381,7 +381,7 @@ class TestDatasetPreparation(unittest.TestCase):
        with tempfile.TemporaryDirectory() as tmp_dir:
            tmp_ds_path = Path(tmp_dir) / "mhenrichsen/alpaca_2k_test"
            tmp_ds_path.mkdir(parents=True, exist_ok=True)
-            snapshot_download(
+            snapshot_download_w_retry(
                repo_id="mhenrichsen/alpaca_2k_test",
                repo_type="dataset",
                local_dir=tmp_ds_path,