Compare commits

..

10 Commits

Author SHA1 Message Date
Salman Mohammadi
0c36a6fea6 config fix -___- 2025-03-18 11:35:20 +00:00
Salman Mohammadi
64aca3c23c linting v2 2025-03-18 11:33:54 +00:00
Salman Mohammadi
22abfd6170 simplifying check 2025-03-18 11:26:53 +00:00
Salman Mohammadi
0658c458b7 Merge branch 'fix_kto' of github.com:axolotl-ai-cloud/axolotl into fix_kto 2025-03-18 11:23:48 +00:00
Salman Mohammadi
690908cf2f linting 2025-03-18 11:23:23 +00:00
salman
b9378e9b39 Merge branch 'main' into fix_kto 2025-03-18 11:22:00 +00:00
Salman Mohammadi
57b0ad1467 adding adapter check 2025-03-18 11:21:42 +00:00
Salman Mohammadi
ec4ead6e3e adding error 2025-03-18 11:20:34 +00:00
Salman Mohammadi
a319ac7d3e removing artifacts 2025-03-17 20:00:09 +00:00
Salman Mohammadi
09d3f2cffa WIP 2025-03-17 19:59:19 +00:00
19 changed files with 56 additions and 96 deletions

View File

@@ -40,12 +40,6 @@ jobs:
python_version: "3.11" python_version: "3.11"
pytorch: 2.6.0 pytorch: 2.6.0
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
- cuda: "128"
cuda_version: 12.8.1
cudnn_version: ""
python_version: "3.11"
pytorch: nightly
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v4 uses: actions/checkout@v4
@@ -67,7 +61,7 @@ jobs:
uses: docker/build-push-action@v4 uses: docker/build-push-action@v4
with: with:
context: . context: .
file: ${{ matrix.pytorch == 'nightly' && './docker/Dockerfile-base-nightly' || './docker/Dockerfile-base' }} file: ./docker/Dockerfile-base
push: ${{ github.event_name != 'pull_request' }} push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.metadata.outputs.tags }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }} tags: ${{ steps.metadata.outputs.tags }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
labels: ${{ steps.metadata.outputs.labels }} labels: ${{ steps.metadata.outputs.labels }}

View File

@@ -40,7 +40,7 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
pip3 install wheel packaging==23.2 pip3 install wheel packaging
pip3 install --no-build-isolation -e . pip3 install --no-build-isolation -e .
pip3 install -r requirements-dev.txt -r requirements-tests.txt pip3 install -r requirements-dev.txt -r requirements-tests.txt

View File

@@ -42,7 +42,7 @@ jobs:
- name: upgrade pip - name: upgrade pip
run: | run: |
pip3 install --upgrade pip pip3 install --upgrade pip
pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel pip3 install --upgrade packaging setuptools wheel
- name: Install PyTorch - name: Install PyTorch
run: | run: |
@@ -59,7 +59,7 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
pip3 install --upgrade pip pip3 install --upgrade pip
pip3 install --upgrade packaging==23.2 pip3 install --upgrade packaging
pip3 install --no-build-isolation -U -e . pip3 install --no-build-isolation -U -e .
python scripts/unsloth_install.py | sh python scripts/unsloth_install.py | sh
python scripts/cutcrossentropy_install.py | sh python scripts/cutcrossentropy_install.py | sh

View File

@@ -74,7 +74,7 @@ jobs:
- name: upgrade pip - name: upgrade pip
run: | run: |
pip3 install --upgrade pip pip3 install --upgrade pip
pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel pip3 install --upgrade packaging setuptools wheel
- name: Install PyTorch - name: Install PyTorch
run: | run: |
@@ -147,7 +147,7 @@ jobs:
- name: upgrade pip - name: upgrade pip
run: | run: |
pip3 install --upgrade pip pip3 install --upgrade pip
pip3 install --upgrade packaging==23.2 setuptools==75.8.0 setuptools_scm build wheel pip3 install --upgrade packaging setuptools setuptools_scm build wheel
- name: Install PyTorch - name: Install PyTorch
run: | run: |

View File

@@ -22,8 +22,8 @@ repos:
rev: 6.1.0 rev: 6.1.0
hooks: hooks:
- id: flake8 - id: flake8
- repo: https://github.com/pylint-dev/pylint - repo: https://github.com/PyCQA/pylint
rev: c8c96d20cde3552a79858c7456bb1483bf83d633 rev: v3.3.0
hooks: hooks:
- id: pylint - id: pylint
- repo: https://github.com/pre-commit/mirrors-mypy - repo: https://github.com/pre-commit/mirrors-mypy

View File

@@ -55,7 +55,7 @@ Features:
### Installation ### Installation
```bash ```bash
pip3 install -U packaging==23.2 setuptools==75.8.0 wheel ninja pip3 install -U packaging setuptools wheel ninja
pip3 install --no-build-isolation axolotl[flash-attn,deepspeed] pip3 install --no-build-isolation axolotl[flash-attn,deepspeed]
# Download example axolotl configs, deepspeed configs # Download example axolotl configs, deepspeed configs

View File

@@ -31,7 +31,6 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \ sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \
fi fi
RUN pip install packaging==23.2 setuptools==75.8.0
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \ pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
else \ else \

View File

@@ -28,7 +28,7 @@ ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}"
WORKDIR /workspace WORKDIR /workspace
RUN python3 -m pip install --upgrade pip && pip3 install -U packaging==23.2 setuptools==75.8.0 wheel && \ RUN python3 -m pip install --upgrade pip && pip3 install packaging && \
python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} --extra-index-url https://download.pytorch.org/whl/cu$CUDA && \ python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} --extra-index-url https://download.pytorch.org/whl/cu$CUDA && \
python3 -m pip install --no-cache-dir "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" && \ python3 -m pip install --no-cache-dir "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" && \
python3 -m pip install --no-cache-dir "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main" python3 -m pip install --no-cache-dir "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main"

View File

@@ -1,39 +0,0 @@
ARG CUDA_VERSION="12.8.1"
ARG CUDNN_VERSION="8"
ARG UBUNTU_VERSION="22.04"
ARG MAX_JOBS=4
FROM nvidia/cuda:$CUDA_VERSION-cudnn$CUDNN_VERSION-devel-ubuntu$UBUNTU_VERSION AS base-builder
ENV PATH="/root/miniconda3/bin:${PATH}"
ARG PYTHON_VERSION="3.11"
ARG PYTORCH_VERSION="nightly"
ARG CUDA="128"
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
ENV PYTHON_VERSION=$PYTHON_VERSION
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
RUN apt-get update \
&& apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev pkg-config && rm -rf /var/lib/apt/lists/* \
&& wget \
https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
&& mkdir /root/.conda \
&& bash Miniconda3-latest-Linux-x86_64.sh -b \
&& rm -f Miniconda3-latest-Linux-x86_64.sh \
&& conda create -n "py${PYTHON_VERSION}" python="${PYTHON_VERSION}"
ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}"
WORKDIR /workspace
RUN python3 -m pip install --upgrade pip && pip3 install packaging && \
python3 -m pip install --no-cache-dir -U torch --extra-index-url https://download.pytorch.org/whl/nightly/cu$CUDA && \
python3 -m pip install --no-cache-dir "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" && \
python3 -m pip install --no-cache-dir "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main"
RUN git lfs install --skip-repo && \
pip3 install awscli && \
# The base image ships with `pydantic==1.8.2` which is not working
pip3 install -U --no-cache-dir pydantic==1.10.10

View File

@@ -55,7 +55,7 @@ tf32: true
gradient_checkpointing: true gradient_checkpointing: true
gradient_checkpointing_kwargs: gradient_checkpointing_kwargs:
use_reentrant: true use_reentrant: false
early_stopping_patience: early_stopping_patience:
resume_from_checkpoint: resume_from_checkpoint:
local_rank: local_rank:

View File

@@ -1,5 +1,5 @@
[build-system] [build-system]
requires = ["setuptools>=64", "wheel", "setuptools_scm>=8", "packaging==23.2"] requires = ["setuptools>=64", "wheel", "setuptools_scm>=8"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
[project] [project]
@@ -8,7 +8,6 @@ dynamic = ["version", "dependencies", "optional-dependencies"]
description = "LLM Trainer" description = "LLM Trainer"
readme = "README.md" readme = "README.md"
requires-python = ">=3.10" requires-python = ">=3.10"
# license = "Apache-2.0"
[project.scripts] [project.scripts]
axolotl = "axolotl.cli.main:main" axolotl = "axolotl.cli.main:main"

View File

@@ -1,7 +1,7 @@
--extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
# START section of dependencies that don't install on Darwin/MacOS # START section of dependencies that don't install on Darwin/MacOS
bitsandbytes==0.45.3 bitsandbytes==0.45.2
triton>=3.0.0 triton>=3.0.0
mamba-ssm==1.2.0.post1 mamba-ssm==1.2.0.post1
flash-attn==2.7.4.post1 flash-attn==2.7.4.post1
@@ -12,12 +12,12 @@ liger-kernel==0.5.3
packaging==23.2 packaging==23.2
peft==0.15.0 peft==0.14.0
transformers==4.49.0 transformers==4.49.0
tokenizers>=0.21.1 tokenizers>=0.21.0
accelerate==1.5.2 accelerate==1.3.0
datasets==3.4.1 datasets==3.2.0
deepspeed==0.16.4 deepspeed==0.16.1
trl==0.15.1 trl==0.15.1
optimum==1.16.2 optimum==1.16.2

View File

@@ -17,12 +17,12 @@ if v < V("2.4.0"):
cce_spec = importlib.util.find_spec("cut_cross_entropy") cce_spec = importlib.util.find_spec("cut_cross_entropy")
uninstall_prefix = "" UNINSTALL_PREFIX = ""
if cce_spec: if cce_spec:
if not importlib.util.find_spec("cut_cross_entropy.transformers"): if not importlib.util.find_spec("cut_cross_entropy.transformers"):
uninstall_prefix = "pip uninstall -y cut-cross-entropy && " UNINSTALL_PREFIX = "pip uninstall -y cut-cross-entropy && "
print( print(
uninstall_prefix UNINSTALL_PREFIX
+ 'pip install "cut-cross-entropy[transformers] @ git+https://github.com/apple/ml-cross-entropy.git@24fbe4b5dab9a6c250a014573613c1890190536c"' + 'pip install "cut-cross-entropy[transformers] @ git+https://github.com/apple/ml-cross-entropy.git@24fbe4b5dab9a6c250a014573613c1890190536c"'
) )

View File

@@ -128,7 +128,7 @@ setup(
"flash-attn==2.7.4.post1", "flash-attn==2.7.4.post1",
], ],
"deepspeed": [ "deepspeed": [
"deepspeed==0.16.4", "deepspeed==0.16.1",
"deepspeed-kernels", "deepspeed-kernels",
], ],
"mamba-ssm": [ "mamba-ssm": [

View File

@@ -507,7 +507,7 @@ class HyperparametersConfig(BaseModel):
weight_decay: Optional[float] = 0.0 weight_decay: Optional[float] = 0.0
optimizer: Optional[ optimizer: Optional[
Union[OptimizerNames, CustomSupportedOptimizers] Union[OptimizerNames, CustomSupportedOptimizers]
] = OptimizerNames.ADAMW_TORCH_FUSED ] = OptimizerNames.ADAMW_HF
optim_args: Optional[Union[str, Dict[str, Any]]] = Field( optim_args: Optional[Union[str, Dict[str, Any]]] = Field(
default=None, default=None,
json_schema_extra={"description": "Optional arguments to supply to optimizer."}, json_schema_extra={"description": "Optional arguments to supply to optimizer."},
@@ -1679,6 +1679,30 @@ class AxolotlInputConfig(
return data return data
@model_validator(mode="before")
@classmethod
def check_rl_config_gradient_checkpointing(cls, data):
# TODO: SalmanMohammadi
# Distributed RL with QLoRA + gradient checkpointing
# and use_reentrant = True is broken upstream in TRL
# pylint: disable=too-many-boolean-expressions
if (
data.get("rl")
and data.get("gradient_checkpointing")
and data.get("gradient_checkpointing_kwargs")
and data.get("gradient_checkpointing_kwargs").get("use_reentrant")
and data.get("load_in_4bit")
and data.get("adapter") == "qlora"
and data.get("capabilities")
and data.get("capabilities").get("n_gpu", 1) > 1
):
raise ValueError(
"The `use_reentrant: True` implementation of gradient checkpointing "
"is not supported for distributed RL training with QLoRA. Please set "
"`use_reentrant: False` in `gradient_checkpointing_kwargs`."
)
return data
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
def check_kto_config(cls, data): def check_kto_config(cls, data):
@@ -1689,15 +1713,6 @@ class AxolotlInputConfig(
if data.get("remove_unused_columns") is not False: if data.get("remove_unused_columns") is not False:
raise ValueError("Set `remove_unused_columns: False` when using kto") raise ValueError("Set `remove_unused_columns: False` when using kto")
if data.get("gradient_checkpointing") and not (
data.get("gradient_checkpointing_kwargs")
and isinstance(data.get("gradient_checkpointing_kwargs"), dict)
and data["gradient_checkpointing_kwargs"].get("use_reentrant")
):
raise ValueError(
"Set `gradient_checkpointing_kwargs: {use_reentrant: true}` for when kto is enabled"
)
return data return data

View File

@@ -2,7 +2,6 @@
import functools import functools
import logging import logging
import os
from pathlib import Path from pathlib import Path
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
@@ -345,7 +344,6 @@ def load_tokenized_prepared_datasets(
) )
ds_from_iter.save_to_disk(str(prepared_ds_path)) ds_from_iter.save_to_disk(str(prepared_ds_path))
else: else:
os.makedirs(prepared_ds_path, exist_ok=True)
dataset.save_to_disk(str(prepared_ds_path)) dataset.save_to_disk(str(prepared_ds_path))
if cfg.push_dataset_to_hub: if cfg.push_dataset_to_hub:
LOG.info( LOG.info(

View File

@@ -108,12 +108,6 @@ def download_arcee_ai_distilabel_intel_orca_dpo_pairs_dataset():
) )
@pytest.fixture(scope="session", autouse=True)
def download_tiny_shakespeare_dataset():
# download the dataset
snapshot_download_w_retry("Trelis/tiny-shakespeare", repo_type="dataset")
@pytest.fixture @pytest.fixture
def temp_dir(): def temp_dir():
# Create a temporary directory # Create a temporary directory

View File

@@ -40,8 +40,8 @@ class TestReLoraLlama(unittest.TestCase):
"lora_alpha": 16, "lora_alpha": 16,
"lora_dropout": 0.05, "lora_dropout": 0.05,
"lora_target_modules": ["q_proj", "v_proj"], "lora_target_modules": ["q_proj", "v_proj"],
"relora_steps": 50, "relora_steps": 100,
"relora_warmup_steps": 10, "relora_warmup_steps": 20,
"relora_anneal_steps": 10, "relora_anneal_steps": 10,
"relora_prune_ratio": 0.9, "relora_prune_ratio": 0.9,
"relora_cpu_offload": True, "relora_cpu_offload": True,
@@ -60,9 +60,9 @@ class TestReLoraLlama(unittest.TestCase):
"message_field_content": "value", "message_field_content": "value",
}, },
], ],
"warmup_steps": 10, "warmup_steps": 20,
"num_epochs": 2, "num_epochs": 2,
"max_steps": 105, # at least 2x relora_steps "max_steps": 205, # at least 2x relora_steps
"micro_batch_size": 2, "micro_batch_size": 2,
"gradient_accumulation_steps": 1, "gradient_accumulation_steps": 1,
"output_dir": temp_dir, "output_dir": temp_dir,

View File

@@ -7,13 +7,13 @@ import tempfile
import unittest import unittest
from pathlib import Path from pathlib import Path
from conftest import snapshot_download_w_retry
from constants import ( from constants import (
ALPACA_MESSAGES_CONFIG_OG, ALPACA_MESSAGES_CONFIG_OG,
ALPACA_MESSAGES_CONFIG_REVISION, ALPACA_MESSAGES_CONFIG_REVISION,
SPECIAL_TOKENS, SPECIAL_TOKENS,
) )
from datasets import Dataset from datasets import Dataset
from huggingface_hub import snapshot_download
from transformers import AutoTokenizer from transformers import AutoTokenizer
from axolotl.utils.data import load_tokenized_prepared_datasets from axolotl.utils.data import load_tokenized_prepared_datasets
@@ -69,7 +69,7 @@ class TestDatasetPreparation(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
tmp_ds_path = Path(tmp_dir) / "mhenrichsen/alpaca_2k_test" tmp_ds_path = Path(tmp_dir) / "mhenrichsen/alpaca_2k_test"
tmp_ds_path.mkdir(parents=True, exist_ok=True) tmp_ds_path.mkdir(parents=True, exist_ok=True)
snapshot_download_w_retry( snapshot_download(
repo_id="mhenrichsen/alpaca_2k_test", repo_id="mhenrichsen/alpaca_2k_test",
repo_type="dataset", repo_type="dataset",
local_dir=tmp_ds_path, local_dir=tmp_ds_path,
@@ -81,7 +81,7 @@ class TestDatasetPreparation(unittest.TestCase):
# how to load it. # how to load it.
cfg = DictDefault( cfg = DictDefault(
{ {
"tokenizer_config": "HuggingFaceTB/SmolLM2-135M", "tokenizer_config": "huggyllama/llama-7b",
"sequence_len": 1024, "sequence_len": 1024,
"datasets": [ "datasets": [
{ {
@@ -339,7 +339,7 @@ class TestDatasetPreparation(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
tmp_ds_path = Path(tmp_dir) / "mhenrichsen/alpaca_2k_test" tmp_ds_path = Path(tmp_dir) / "mhenrichsen/alpaca_2k_test"
tmp_ds_path.mkdir(parents=True, exist_ok=True) tmp_ds_path.mkdir(parents=True, exist_ok=True)
snapshot_download_w_retry( snapshot_download(
repo_id="mhenrichsen/alpaca_2k_test", repo_id="mhenrichsen/alpaca_2k_test",
repo_type="dataset", repo_type="dataset",
local_dir=tmp_ds_path, local_dir=tmp_ds_path,
@@ -381,7 +381,7 @@ class TestDatasetPreparation(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
tmp_ds_path = Path(tmp_dir) / "mhenrichsen/alpaca_2k_test" tmp_ds_path = Path(tmp_dir) / "mhenrichsen/alpaca_2k_test"
tmp_ds_path.mkdir(parents=True, exist_ok=True) tmp_ds_path.mkdir(parents=True, exist_ok=True)
snapshot_download_w_retry( snapshot_download(
repo_id="mhenrichsen/alpaca_2k_test", repo_id="mhenrichsen/alpaca_2k_test",
repo_type="dataset", repo_type="dataset",
local_dir=tmp_ds_path, local_dir=tmp_ds_path,