Compare commits
14 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0c36a6fea6 | ||
|
|
64aca3c23c | ||
|
|
22abfd6170 | ||
|
|
0658c458b7 | ||
|
|
690908cf2f | ||
|
|
b9378e9b39 | ||
|
|
57b0ad1467 | ||
|
|
ec4ead6e3e | ||
|
|
a319ac7d3e | ||
|
|
09d3f2cffa | ||
|
|
85147ec430 | ||
|
|
51cd409488 | ||
|
|
7235123d44 | ||
|
|
4f5eb42a73 |
@@ -55,6 +55,7 @@ Features:
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
pip3 install -U packaging setuptools wheel ninja
|
||||
pip3 install --no-build-isolation axolotl[flash-attn,deepspeed]
|
||||
|
||||
# Download example axolotl configs, deepspeed configs
|
||||
|
||||
@@ -32,8 +32,9 @@ website:
|
||||
contents:
|
||||
- docs/getting-started.qmd
|
||||
- docs/installation.qmd
|
||||
- docs/cli.qmd
|
||||
- docs/inference.qmd
|
||||
- docs/cli.qmd
|
||||
- docs/config.qmd
|
||||
|
||||
- section: "Dataset Formats"
|
||||
contents: docs/dataset-formats/*
|
||||
@@ -74,10 +75,6 @@ website:
|
||||
- docs/debugging.qmd
|
||||
- docs/nccl.qmd
|
||||
|
||||
- section: "Reference"
|
||||
contents:
|
||||
- docs/config.qmd
|
||||
|
||||
format:
|
||||
html:
|
||||
theme: darkly
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: Config options
|
||||
title: Config Reference
|
||||
description: A complete list of all configuration options.
|
||||
---
|
||||
|
||||
@@ -30,6 +30,8 @@ tokenizer_legacy:
|
||||
# Resize the model embeddings when new tokens are added to multiples of 32
|
||||
# This is reported to improve training speed on some models
|
||||
resize_token_embeddings_to_32x:
|
||||
# Optional[bool] Whether to shrink the embeddings to len(tokenizer). By default, we won't shrink.
|
||||
shrink_embeddings:
|
||||
|
||||
# (Internal use only)
|
||||
# Used to identify which the model is based on
|
||||
@@ -205,10 +207,46 @@ test_datasets:
|
||||
data_files:
|
||||
- /workspace/data/eval.jsonl
|
||||
|
||||
# use RL training: 'dpo', 'ipo', 'kto'
|
||||
# use RL training: 'dpo', 'ipo', 'kto', 'simpo', 'orpo', 'grpo'
|
||||
rl:
|
||||
# whether to perform weighting if doing DPO training. Boolean.
|
||||
dpo_use_weighting:
|
||||
rl_beta: # Optional[float]. The beta parameter for the RL training.
|
||||
|
||||
# dpo
|
||||
dpo_use_weighting: # Optional[bool]. Whether to perform weighting.
|
||||
rpo_alpha: # Optional[float]. Weighting of NLL term in loss from RPO paper.
|
||||
|
||||
# orpo
|
||||
orpo_alpha: 0.1 # Parameter controlling the relative ratio loss weight in the ORPO loss. Passed to `beta` in `ORPOConfig` due to trl mapping.
|
||||
|
||||
# kto
|
||||
kto_desirable_weight: # Optional[float]. Factor for desirable loss term in KTO loss.
|
||||
kto_undesirable_weight: # Optional[float]. Factor for undesirable loss term in KTO loss.
|
||||
|
||||
# simpo
|
||||
cpo_alpha: 1.0 # Weight of the BC regularizer
|
||||
simpo_gamma: 0.5 # Target reward margin for the SimPO loss
|
||||
|
||||
# grpo
|
||||
trl:
|
||||
use_vllm: # Optional[bool]. Whether to use VLLM for RL training.
|
||||
vllm_device: # Optional[str]. Device to use for VLLM.
|
||||
vllm_gpu_memory_utilization: # Optional[float]. GPU memory utilization for VLLM.
|
||||
vllm_max_model_len: # Optional[int]. Maximum length of the model for VLLM.
|
||||
vllm_dtype: # Optional[str]. Data type for VLLM.
|
||||
|
||||
beta: # Optional[float]. Beta parameter for the RL training. Same as `rl_beta`. Use
|
||||
max_completion_length: # Optional[int]. Maximum length of the completion for RL training.
|
||||
|
||||
reward_funcs: # Optional[list[str]]. List of reward functions to load. Paths must be importable from current dir.
|
||||
reward_weights: # Optional[list[float]]. List of reward weights for the reward functions.
|
||||
|
||||
num_generations: # Optional[int]. Number of generations to sample.
|
||||
log_completions: # Optional[bool]. Whether to log completions.
|
||||
|
||||
sync_ref_model: # Optional[bool]. Whether to sync the reference model.
|
||||
ref_model_mixup_alpha: # Optional[float]. Mixup alpha for the reference model.
|
||||
ref_model_sync_steps: # Optional[int]. Sync steps for the reference model.
|
||||
|
||||
|
||||
# reward modelling: `True` or `False`
|
||||
reward_model:
|
||||
@@ -232,7 +270,7 @@ default_system_message: You are a helpful assistant. Please give a long and deta
|
||||
# subsequent training attempts load faster, relative path
|
||||
dataset_prepared_path: data/last_run_prepared
|
||||
# Push prepared dataset to hub
|
||||
push_dataset_to_hub: # repo path
|
||||
push_dataset_to_hub: # Optional[str] repo_org/repo_name
|
||||
# The maximum number of processes to use while preprocessing your input dataset. This defaults to `os.cpu_count()`
|
||||
# if not set.
|
||||
dataset_processes: # defaults to os.cpu_count() if not set
|
||||
|
||||
10
docs/faq.qmd
10
docs/faq.qmd
@@ -27,6 +27,16 @@ description: Frequently asked questions
|
||||
|
||||
> A: This is usually an issue with the GPU. This can be resolved through setting the os environment variable `CUDA_VISIBLE_DEVICES=0`. If you are on runpod, this is usually a pod issue. Starting a new pod should take care of it.
|
||||
|
||||
**Q: Received mismatch error on merge adapters / loading adapters between torch.Size of checkpoint and model.**
|
||||
|
||||
> A: This is likely due to vocab size mismatch. By default, Axolotl expands the model's embeddings if the tokenizer has more tokens than the model. Please use the `axolotl merge-lora` command to merge the adapters instead of using your own scripts.
|
||||
|
||||
> On the other hand, if the model has more tokens than the tokenizer, Axolotl does not shrink the model's embeddings unless `shrink_embeddings: true` is set in the config.
|
||||
|
||||
**Q: How to call Axolotl via custom python scripts?**
|
||||
|
||||
> A: Yes, since Axolotl is just Python, please see `src/axolotl/cli/main.py` on how each command is called.
|
||||
|
||||
### Chat templates
|
||||
|
||||
**Q: `jinja2.exceptions.UndefinedError: 'dict object' has no attribute 'content' / 'role' / ____`**
|
||||
|
||||
@@ -36,7 +36,9 @@ The YAML configuration file controls everything about your training. Here's what
|
||||
|
||||
```yaml
|
||||
base_model: NousResearch/Llama-3.2-1B
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: true
|
||||
adapter: lora
|
||||
|
||||
datasets:
|
||||
- path: teknium/GPT4-LLM-Cleaned
|
||||
@@ -44,11 +46,15 @@ datasets:
|
||||
dataset_prepared_path: last_run_prepared
|
||||
val_set_size: 0.1
|
||||
output_dir: ./outputs/lora-out
|
||||
|
||||
adapter: lora
|
||||
lora_model_dir:
|
||||
```
|
||||
|
||||
::: {.callout-tip}
|
||||
`load_in_8bit: true` and `adapter: lora` enables LoRA adapter finetuning.
|
||||
|
||||
- To perform Full finetuning, remove these two lines.
|
||||
- To perform QLoRA finetuning, replace with `load_in_4bit: true` and `adapter: qlora`.
|
||||
:::
|
||||
|
||||
See our [Config options](config.qmd) for more details.
|
||||
|
||||
### Training {#sec-training}
|
||||
@@ -56,7 +62,7 @@ See our [Config options](config.qmd) for more details.
|
||||
When you run `axolotl train`, Axolotl:
|
||||
|
||||
1. Downloads the base model
|
||||
2. (If specified) applies LoRA adapter layers
|
||||
2. (If specified) applies QLoRA/LoRA adapter layers
|
||||
3. Loads and processes the dataset
|
||||
4. Runs the training loop
|
||||
5. Saves the trained model and / or LoRA weights
|
||||
@@ -69,6 +75,8 @@ Let's modify the example for your own data:
|
||||
|
||||
```yaml
|
||||
base_model: NousResearch/Nous-Hermes-llama-1b-v1
|
||||
|
||||
load_in_8bit: true
|
||||
adapter: lora
|
||||
|
||||
# Training settings
|
||||
@@ -104,8 +112,6 @@ format):
|
||||
{"instruction": "Classify this text", "input": "Not good at all", "output": "negative"}
|
||||
```
|
||||
|
||||
Please consult the supported [Dataset Formats](dataset-formats/) for more details.
|
||||
|
||||
3. Run the training:
|
||||
|
||||
```bash
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: "Inference"
|
||||
title: "Inference and Merging"
|
||||
format:
|
||||
html:
|
||||
toc: true
|
||||
@@ -9,10 +9,14 @@ execute:
|
||||
enabled: false
|
||||
---
|
||||
|
||||
This guide covers how to use your trained models for inference, including model loading, interactive testing, and common troubleshooting steps.
|
||||
This guide covers how to use your trained models for inference, including model loading, interactive testing, merging adapters, and common troubleshooting steps.
|
||||
|
||||
## Quick Start {#sec-quickstart}
|
||||
|
||||
::: {.callout-tip}
|
||||
Use the same config used for training on inference/merging.
|
||||
:::
|
||||
|
||||
### Basic Inference {#sec-basic}
|
||||
|
||||
::: {.panel-tabset}
|
||||
|
||||
@@ -22,6 +22,7 @@ This guide covers all the ways you can install and set up Axolotl for your envir
|
||||
### PyPI Installation (Recommended) {#sec-pypi}
|
||||
|
||||
```{.bash}
|
||||
pip3 install -U packaging setuptools wheel ninja
|
||||
pip3 install --no-build-isolation axolotl[flash-attn,deepspeed]
|
||||
```
|
||||
|
||||
@@ -37,7 +38,7 @@ For the latest features between releases:
|
||||
```{.bash}
|
||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
pip3 install packaging ninja
|
||||
pip3 install -U packaging setuptools wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
||||
```
|
||||
|
||||
@@ -107,7 +108,7 @@ We recommend using WSL2 (Windows Subsystem for Linux) or Docker.
|
||||
2. Install PyTorch: https://pytorch.org/get-started/locally/
|
||||
3. Install Axolotl:
|
||||
```{.bash}
|
||||
pip3 install packaging
|
||||
pip3 install -U packaging setuptools wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
||||
```
|
||||
4. (Optional) Login to Hugging Face:
|
||||
|
||||
@@ -66,6 +66,10 @@ logic to be compatible with more of them.
|
||||
|
||||
</details>
|
||||
|
||||
::: {.callout-tip}
|
||||
Check out our [LoRA optimizations blog](https://axolotlai.substack.com/p/accelerating-lora-fine-tuning-with).
|
||||
:::
|
||||
|
||||
## Usage
|
||||
|
||||
These optimizations can be enabled in your Axolotl config YAML file. The
|
||||
|
||||
@@ -41,6 +41,10 @@ Bradley-Terry chat templates expect single-turn conversations in the following f
|
||||
|
||||
### Process Reward Models (PRM)
|
||||
|
||||
::: {.callout-tip}
|
||||
Check out our [PRM blog](https://axolotlai.substack.com/p/process-reward-models).
|
||||
:::
|
||||
|
||||
Process reward models are trained using data which contains preference annotations for each step in a series of interactions. Typically, PRMs are trained to provide reward signals over each step of a reasoning trace and are used for downstream reinforcement learning.
|
||||
```yaml
|
||||
base_model: Qwen/Qwen2.5-3B
|
||||
|
||||
@@ -298,7 +298,7 @@ The input format is a simple JSON input with customizable fields based on the ab
|
||||
|
||||
### IPO
|
||||
|
||||
As IPO is just DPO with a different loss function, all supported options for DPO works here.
|
||||
As IPO is just DPO with a different loss function, all supported dataset formats for [DPO](#dpo) are also supported for IPO.
|
||||
|
||||
```yaml
|
||||
rl: ipo
|
||||
@@ -344,8 +344,9 @@ ORPO supports the following types with the following dataset format:
|
||||
|
||||
```yaml
|
||||
rl: kto
|
||||
rl_beta: 0.5
|
||||
kto_desirable_weight: 0.2
|
||||
rl_beta: 0.1 # default
|
||||
kto_desirable_weight: 1.0 # default
|
||||
kto_undesirable_weight: 1.0 # default
|
||||
|
||||
remove_unused_columns: false
|
||||
|
||||
@@ -497,6 +498,10 @@ The input format is a simple JSON input with customizable fields based on the ab
|
||||
|
||||
### GRPO
|
||||
|
||||
::: {.callout-tip}
|
||||
Check out our [GRPO cookbook](https://github.com/axolotl-ai-cloud/axolotl-cookbook/tree/main/grpo#training-an-r1-style-large-language-model-using-grpo).
|
||||
:::
|
||||
|
||||
GRPO uses custom reward functions and transformations. Please have them ready locally.
|
||||
|
||||
For ex, to load OpenAI's GSM8K and use a random reward for completions:
|
||||
@@ -540,6 +545,19 @@ To see other examples of custom reward functions, please see [TRL GRPO Docs](htt
|
||||
|
||||
To see description of the configs, please see [TRLConfig](https://github.com/axolotl-ai-cloud/axolotl/blob/main/src/axolotl/utils/config/models/input/v0_4_1/trl.py).
|
||||
|
||||
### SimPO
|
||||
|
||||
SimPO uses [CPOTrainer](https://huggingface.co/docs/trl/main/en/cpo_trainer) but with alternative loss function.
|
||||
|
||||
```yaml
|
||||
rl: simpo
|
||||
rl_beta: 0.1 # default in CPOTrainer
|
||||
cpo_alpha: 1.0 # default in CPOTrainer
|
||||
simpo_gamma: 0.5 # default in CPOTrainer
|
||||
```
|
||||
|
||||
This method uses the same dataset format as [DPO](#dpo).
|
||||
|
||||
### Using local dataset files
|
||||
|
||||
```yaml
|
||||
|
||||
@@ -55,7 +55,7 @@ tf32: true
|
||||
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: true
|
||||
use_reentrant: false
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
"""Module with Pydantic models for configuration."""
|
||||
|
||||
# pylint: disable=too-many-lines
|
||||
|
||||
import logging
|
||||
@@ -1678,6 +1679,30 @@ class AxolotlInputConfig(
|
||||
|
||||
return data
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def check_rl_config_gradient_checkpointing(cls, data):
|
||||
# TODO: SalmanMohammadi
|
||||
# Distributed RL with QLoRA + gradient checkpointing
|
||||
# and use_reentrant = True is broken upstream in TRL
|
||||
# pylint: disable=too-many-boolean-expressions
|
||||
if (
|
||||
data.get("rl")
|
||||
and data.get("gradient_checkpointing")
|
||||
and data.get("gradient_checkpointing_kwargs")
|
||||
and data.get("gradient_checkpointing_kwargs").get("use_reentrant")
|
||||
and data.get("load_in_4bit")
|
||||
and data.get("adapter") == "qlora"
|
||||
and data.get("capabilities")
|
||||
and data.get("capabilities").get("n_gpu", 1) > 1
|
||||
):
|
||||
raise ValueError(
|
||||
"The `use_reentrant: True` implementation of gradient checkpointing "
|
||||
"is not supported for distributed RL training with QLoRA. Please set "
|
||||
"`use_reentrant: False` in `gradient_checkpointing_kwargs`."
|
||||
)
|
||||
return data
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def check_kto_config(cls, data):
|
||||
@@ -1688,15 +1713,6 @@ class AxolotlInputConfig(
|
||||
if data.get("remove_unused_columns") is not False:
|
||||
raise ValueError("Set `remove_unused_columns: False` when using kto")
|
||||
|
||||
if data.get("gradient_checkpointing") and not (
|
||||
data.get("gradient_checkpointing_kwargs")
|
||||
and isinstance(data.get("gradient_checkpointing_kwargs"), dict)
|
||||
and data["gradient_checkpointing_kwargs"].get("use_reentrant")
|
||||
):
|
||||
raise ValueError(
|
||||
"Set `gradient_checkpointing_kwargs: {use_reentrant: true}` for when kto is enabled"
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
@@ -1827,6 +1843,14 @@ class AxolotlConfigWCapabilities(AxolotlInputConfig):
|
||||
data["torch_compile"] = False
|
||||
return data
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def check_beta_and_trl_beta_match(cls, data):
|
||||
if data.get("beta") and data.get("trl", {}).get("beta"):
|
||||
if data["beta"] != data["trl"]["beta"]:
|
||||
raise ValueError("beta and trl.beta must match or one must be removed")
|
||||
return data
|
||||
|
||||
|
||||
def handle_legacy_message_fields_logic(data: dict) -> dict:
|
||||
"""
|
||||
|
||||
@@ -24,7 +24,6 @@ from peft import (
|
||||
PeftModelForCausalLM,
|
||||
prepare_model_for_kbit_training,
|
||||
)
|
||||
from peft.tuners.lora import QuantLinear
|
||||
from torch import nn
|
||||
from transformers import ( # noqa: F401
|
||||
AddedToken,
|
||||
@@ -1360,7 +1359,7 @@ def load_llama_adapter(model, cfg):
|
||||
|
||||
|
||||
def find_all_linear_names(model):
|
||||
cls = (bnb.nn.Linear4bit, bnb.nn.Linear8bitLt, torch.nn.Linear, QuantLinear)
|
||||
cls = (bnb.nn.Linear4bit, bnb.nn.Linear8bitLt, torch.nn.Linear)
|
||||
lora_module_names = set()
|
||||
for name, module in model.named_modules():
|
||||
if (
|
||||
|
||||
Reference in New Issue
Block a user