Compare commits
23 Commits
autogptq-t
...
benchmark-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c3de28942c | ||
|
|
45848a9285 | ||
|
|
d6cea18034 | ||
|
|
606846e0a5 | ||
|
|
a6c9223114 | ||
|
|
8b16ecd448 | ||
|
|
f5db88a10d | ||
|
|
99d844f215 | ||
|
|
aefd4d74fa | ||
|
|
24b0e93235 | ||
|
|
2455254b92 | ||
|
|
918e040601 | ||
|
|
ef062d8fcb | ||
|
|
d4c8b66f3d | ||
|
|
64e9824d3e | ||
|
|
1134654c98 | ||
|
|
2fc756c289 | ||
|
|
943b84c490 | ||
|
|
6f166464d8 | ||
|
|
e3b07402a7 | ||
|
|
8d3c8a3eab | ||
|
|
c30120e684 | ||
|
|
9aed60fa54 |
10
.github/workflows/main.yml
vendored
10
.github/workflows/main.yml
vendored
@@ -23,6 +23,11 @@ jobs:
|
|||||||
python_version: "3.10"
|
python_version: "3.10"
|
||||||
pytorch: 2.0.1
|
pytorch: 2.0.1
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
|
- cuda: 118
|
||||||
|
cuda_version: 11.8.0
|
||||||
|
python_version: "3.9"
|
||||||
|
pytorch: 2.0.1
|
||||||
|
axolotl_extras: gptq
|
||||||
runs-on: self-hosted
|
runs-on: self-hosted
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
@@ -68,6 +73,11 @@ jobs:
|
|||||||
pytorch: 2.0.1
|
pytorch: 2.0.1
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
is_latest: true
|
is_latest: true
|
||||||
|
- cuda: 118
|
||||||
|
cuda_version: 11.8.0
|
||||||
|
python_version: "3.9"
|
||||||
|
pytorch: 2.0.1
|
||||||
|
axolotl_extras: gptq
|
||||||
runs-on: self-hosted
|
runs-on: self-hosted
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
|
|||||||
2
.github/workflows/tests.yml
vendored
2
.github/workflows/tests.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
pip install -e .
|
pip install -e .[peft]
|
||||||
pip install -r requirements-tests.txt
|
pip install -r requirements-tests.txt
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
|
|||||||
19
README.md
19
README.md
@@ -328,15 +328,6 @@ See [examples](examples) for quick start. It is recommended to duplicate and mod
|
|||||||
name: enron_emails
|
name: enron_emails
|
||||||
type: completion # format from earlier
|
type: completion # format from earlier
|
||||||
|
|
||||||
# huggingface repo with multiple named configurations/subsets
|
|
||||||
datasets:
|
|
||||||
- path: bigcode/commitpackft
|
|
||||||
name:
|
|
||||||
- ruby
|
|
||||||
- python
|
|
||||||
- typescript
|
|
||||||
type: ... # unimplemented custom format
|
|
||||||
|
|
||||||
# local
|
# local
|
||||||
datasets:
|
datasets:
|
||||||
- path: data.jsonl # or json
|
- path: data.jsonl # or json
|
||||||
@@ -416,10 +407,6 @@ fp16: true
|
|||||||
# Use CUDA tf32
|
# Use CUDA tf32
|
||||||
tf32: true # require >=ampere
|
tf32: true # require >=ampere
|
||||||
|
|
||||||
# No AMP (automatic mixed precision)
|
|
||||||
bfloat16: true # require >=ampere
|
|
||||||
float16: true
|
|
||||||
|
|
||||||
# a list of one or more datasets to finetune the model with
|
# a list of one or more datasets to finetune the model with
|
||||||
datasets:
|
datasets:
|
||||||
# hf dataset repo | "json" for local dataset, make sure to fill data_files
|
# hf dataset repo | "json" for local dataset, make sure to fill data_files
|
||||||
@@ -472,9 +459,6 @@ dataset_shard_idx:
|
|||||||
# the maximum length of an input to train with, this should typically be less than 2048
|
# the maximum length of an input to train with, this should typically be less than 2048
|
||||||
# as most models have a token/context limit of 2048
|
# as most models have a token/context limit of 2048
|
||||||
sequence_len: 2048
|
sequence_len: 2048
|
||||||
# pad inputs so each step uses constant sized buffers
|
|
||||||
# this will reduce memory fragmentation and may prevent OOMs, by re-using memory more efficiently
|
|
||||||
pad_to_sequence_len:
|
|
||||||
# max sequence length to concatenate training samples together up to
|
# max sequence length to concatenate training samples together up to
|
||||||
# inspired by StackLLaMA. see https://huggingface.co/blog/stackllama#supervised-fine-tuning
|
# inspired by StackLLaMA. see https://huggingface.co/blog/stackllama#supervised-fine-tuning
|
||||||
# FutureWarning: This will soon be DEPRECATED
|
# FutureWarning: This will soon be DEPRECATED
|
||||||
@@ -626,6 +610,9 @@ deepspeed:
|
|||||||
# Path to torch distx for optim 'adamw_anyprecision'
|
# Path to torch distx for optim 'adamw_anyprecision'
|
||||||
torchdistx_path:
|
torchdistx_path:
|
||||||
|
|
||||||
|
# Set padding for data collator to 'longest'
|
||||||
|
collator_pad_to_longest:
|
||||||
|
|
||||||
# Set to HF dataset for type: 'completion' for streaming instead of pre-tokenize
|
# Set to HF dataset for type: 'completion' for streaming instead of pre-tokenize
|
||||||
pretraining_dataset:
|
pretraining_dataset:
|
||||||
|
|
||||||
|
|||||||
@@ -11,13 +11,14 @@ RUN apt-get update && \
|
|||||||
|
|
||||||
WORKDIR /workspace
|
WORKDIR /workspace
|
||||||
|
|
||||||
|
RUN pip3 install --force-reinstall "peft @ git+https://github.com/huggingface/peft.git@main"
|
||||||
RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git
|
RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git
|
||||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||||
RUN cd axolotl && \
|
RUN cd axolotl && \
|
||||||
if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||||
pip install -e .[flash-attn,gptq,$AXOLOTL_EXTRAS]; \
|
pip install -e .[flash-attn,$AXOLOTL_EXTRAS]; \
|
||||||
else \
|
else \
|
||||||
pip install -e .[flash-attn,gptq]; \
|
pip install -e .[flash-attn]; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# fix so that git fetch/pull from remote works
|
# fix so that git fetch/pull from remote works
|
||||||
|
|||||||
8
examples/gptq-lora-7b/README.md
Normal file
8
examples/gptq-lora-7b/README.md
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
# LLaMa 7B using LoRA
|
||||||
|
|
||||||
|
This is a good place to start for beginners. This will run on an NVIDIA RTX4090 with no other changes needed.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
accelerate launch scripts/finetune.py examples/gptq-lora-7b/config.yml
|
||||||
|
|
||||||
|
```
|
||||||
63
examples/gptq-lora-7b/config.yml
Normal file
63
examples/gptq-lora-7b/config.yml
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
base_model: Neko-Institute-of-Science/LLaMA-7B-4bit-128g
|
||||||
|
base_model_config: Neko-Institute-of-Science/LLaMA-7B-4bit-128g
|
||||||
|
model_type: LlamaForCausalLM
|
||||||
|
tokenizer_type: LlamaTokenizer
|
||||||
|
trust_remote_code:
|
||||||
|
load_in_8bit: true
|
||||||
|
gptq: true
|
||||||
|
datasets:
|
||||||
|
- path: vicgalle/alpaca-gpt4
|
||||||
|
type: alpaca
|
||||||
|
dataset_prepared_path: last_run_prepared
|
||||||
|
val_set_size: 0.02
|
||||||
|
adapter:
|
||||||
|
lora_model_dir:
|
||||||
|
sequence_len: 2048
|
||||||
|
max_packed_sequence_len:
|
||||||
|
lora_r: 8
|
||||||
|
lora_alpha: 16
|
||||||
|
lora_dropout: 0.05
|
||||||
|
lora_target_modules:
|
||||||
|
- q_proj
|
||||||
|
- v_proj
|
||||||
|
lora_fan_in_fan_out: false
|
||||||
|
wandb_project: llama-7b-lora-int4
|
||||||
|
wandb_entity:
|
||||||
|
wandb_watch:
|
||||||
|
wandb_run_id:
|
||||||
|
wandb_log_model:
|
||||||
|
output_dir: ./llama-7b-lora-int4
|
||||||
|
gradient_accumulation_steps: 1
|
||||||
|
micro_batch_size: 1
|
||||||
|
num_epochs: 3
|
||||||
|
optimizer: adamw_bnb_8bit
|
||||||
|
torchdistx_path:
|
||||||
|
lr_scheduler: cosine
|
||||||
|
learning_rate: 0.0000002
|
||||||
|
train_on_inputs: false
|
||||||
|
group_by_length: false
|
||||||
|
fp16: true
|
||||||
|
bf16: false
|
||||||
|
tf32: true
|
||||||
|
early_stopping_patience:
|
||||||
|
resume_from_checkpoint:
|
||||||
|
local_rank:
|
||||||
|
logging_steps: 5
|
||||||
|
xformers_attention:
|
||||||
|
flash_attention:
|
||||||
|
gradient_checkpointing: true
|
||||||
|
gptq_groupsize: 128
|
||||||
|
gptq_model_v1: false
|
||||||
|
warmup_steps: 20
|
||||||
|
eval_steps: 110
|
||||||
|
save_steps: 660
|
||||||
|
debug:
|
||||||
|
deepspeed:
|
||||||
|
weight_decay: 0.0001
|
||||||
|
fsdp:
|
||||||
|
fsdp_config:
|
||||||
|
tokens:
|
||||||
|
pad_token: "<pad>"
|
||||||
|
bos_token: "<s>"
|
||||||
|
eos_token: "</s>"
|
||||||
|
unk_token: "<unk>"
|
||||||
@@ -1,76 +0,0 @@
|
|||||||
base_model: TheBloke/Llama-2-7B-GPTQ
|
|
||||||
base_model_config: TheBloke/Llama-2-7B-GPTQ
|
|
||||||
is_llama_derived_model: false
|
|
||||||
gptq: true
|
|
||||||
gptq_bits: 4
|
|
||||||
model_type: AutoModelForCausalLM
|
|
||||||
tokenizer_type: LlamaTokenizer
|
|
||||||
tokenizer_use_fast: true
|
|
||||||
tokenizer_legacy: true
|
|
||||||
load_in_8bit: false
|
|
||||||
load_in_4bit: false
|
|
||||||
strict: false
|
|
||||||
push_dataset_to_hub:
|
|
||||||
hf_use_auth_token: true
|
|
||||||
datasets:
|
|
||||||
- path: mhenrichsen/alpaca_2k_test
|
|
||||||
type: alpaca
|
|
||||||
dataset_prepared_path: last_run_prepared
|
|
||||||
val_set_size: 0.01
|
|
||||||
adapter: lora
|
|
||||||
lora_model_dir:
|
|
||||||
sequence_len: 4096
|
|
||||||
sample_packing:
|
|
||||||
lora_r: 8
|
|
||||||
lora_alpha: 32
|
|
||||||
lora_dropout: 0.05
|
|
||||||
lora_target_modules:
|
|
||||||
- k_proj
|
|
||||||
- o_proj
|
|
||||||
- q_proj
|
|
||||||
- v_proj
|
|
||||||
lora_target_linear:
|
|
||||||
lora_fan_in_fan_out:
|
|
||||||
wandb_project:
|
|
||||||
wandb_watch:
|
|
||||||
wandb_run_id:
|
|
||||||
wandb_log_model:
|
|
||||||
output_dir: ./model-out
|
|
||||||
gradient_accumulation_steps: 1
|
|
||||||
micro_batch_size: 1
|
|
||||||
num_epochs: 3
|
|
||||||
optimizer: adamw_torch
|
|
||||||
adam_beta2: 0.95
|
|
||||||
adam_eps: 0.00001
|
|
||||||
max_grad_norm: 1.0
|
|
||||||
torchdistx_path:
|
|
||||||
lr_scheduler: cosine
|
|
||||||
lr_quadratic_warmup: true
|
|
||||||
learning_rate: 0.000017
|
|
||||||
train_on_inputs: false
|
|
||||||
group_by_length: false
|
|
||||||
bf16: false
|
|
||||||
fp16: false
|
|
||||||
float16: true
|
|
||||||
tf32: true
|
|
||||||
gradient_checkpointing: true
|
|
||||||
early_stopping_patience:
|
|
||||||
resume_from_checkpoint:
|
|
||||||
local_rank:
|
|
||||||
logging_steps: 1
|
|
||||||
xformers_attention:
|
|
||||||
flash_attention:
|
|
||||||
sdp_attention:
|
|
||||||
flash_optimum:
|
|
||||||
gptq_groupsize:
|
|
||||||
gptq_model_v1:
|
|
||||||
warmup_steps: 100
|
|
||||||
eval_steps:
|
|
||||||
save_steps:
|
|
||||||
debug:
|
|
||||||
deepspeed:
|
|
||||||
weight_decay: 0.1
|
|
||||||
special_tokens:
|
|
||||||
bos_token: "<s>"
|
|
||||||
eos_token: "</s>"
|
|
||||||
unk_token: "<unk>"
|
|
||||||
@@ -47,3 +47,4 @@ local_rank:
|
|||||||
gradient_checkpointing: true
|
gradient_checkpointing: true
|
||||||
fsdp:
|
fsdp:
|
||||||
fsdp_config:
|
fsdp_config:
|
||||||
|
collator_pad_to_longest: true
|
||||||
|
|||||||
@@ -1,13 +1,10 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
|
||||||
--extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
|
|
||||||
torch==2.0.1
|
|
||||||
auto-gptq
|
|
||||||
packaging
|
packaging
|
||||||
peft @ git+https://github.com/huggingface/peft.git
|
peft @ git+https://github.com/huggingface/peft.git
|
||||||
transformers @ git+https://github.com/huggingface/transformers.git
|
transformers @ git+https://github.com/huggingface/transformers.git
|
||||||
bitsandbytes>=0.41.1
|
bitsandbytes>=0.41.1
|
||||||
accelerate @ git+https://github.com/huggingface/accelerate@2a289f6108e77a77a4efffb3f6316bc98538413b
|
accelerate @ git+https://github.com/huggingface/accelerate@2a289f6108e77a77a4efffb3f6316bc98538413b
|
||||||
addict
|
addict
|
||||||
|
evaluate
|
||||||
fire
|
fire
|
||||||
PyYAML>=6.0
|
PyYAML>=6.0
|
||||||
datasets
|
datasets
|
||||||
@@ -28,4 +25,3 @@ rouge-score==0.1.2
|
|||||||
scipy
|
scipy
|
||||||
scikit-learn==1.2.2
|
scikit-learn==1.2.2
|
||||||
pynvml
|
pynvml
|
||||||
art
|
|
||||||
|
|||||||
@@ -6,17 +6,14 @@ import os
|
|||||||
import random
|
import random
|
||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Union
|
from typing import Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
import fire
|
import fire
|
||||||
import torch
|
import torch
|
||||||
import transformers
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
# add src to the pythonpath so we don't need to pip install this
|
# add src to the pythonpath so we don't need to pip install this
|
||||||
from art import text2art
|
|
||||||
from optimum.bettertransformer import BetterTransformer
|
from optimum.bettertransformer import BetterTransformer
|
||||||
from transformers import GenerationConfig, TextStreamer
|
from transformers import GenerationConfig, TextStreamer
|
||||||
|
|
||||||
@@ -25,7 +22,7 @@ from axolotl.utils.config import normalize_config, validate_config
|
|||||||
from axolotl.utils.data import prepare_dataset
|
from axolotl.utils.data import prepare_dataset
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
from axolotl.utils.distributed import is_main_process
|
from axolotl.utils.distributed import is_main_process
|
||||||
from axolotl.utils.models import load_model, load_model_config, load_tokenizer
|
from axolotl.utils.models import load_model, load_tokenizer
|
||||||
from axolotl.utils.tokenization import check_dataset_labels
|
from axolotl.utils.tokenization import check_dataset_labels
|
||||||
from axolotl.utils.trainer import setup_trainer
|
from axolotl.utils.trainer import setup_trainer
|
||||||
from axolotl.utils.wandb import setup_wandb_env_vars
|
from axolotl.utils.wandb import setup_wandb_env_vars
|
||||||
@@ -40,26 +37,16 @@ LOG = logging.getLogger("axolotl.scripts")
|
|||||||
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
def print_axolotl_text_art():
|
||||||
class TrainerCliArgs:
|
ascii_art = """
|
||||||
"""
|
dP dP dP
|
||||||
dataclass representing the various non-training arguments
|
88 88 88
|
||||||
"""
|
.d8888b. dP. .dP .d8888b. 88 .d8888b. d8888P 88
|
||||||
|
88' `88 `8bd8' 88' `88 88 88' `88 88 88
|
||||||
|
88. .88 .d88b. 88. .88 88 88. .88 88 88
|
||||||
|
`88888P8 dP' `dP `88888P' dP `88888P' dP dP
|
||||||
|
"""
|
||||||
|
|
||||||
debug: bool = field(default=False)
|
|
||||||
inference: bool = field(default=False)
|
|
||||||
merge_lora: bool = field(default=False)
|
|
||||||
prepare_ds_only: bool = field(default=False)
|
|
||||||
prompter: Optional[str] = field(default=None)
|
|
||||||
shard: bool = field(default=False)
|
|
||||||
|
|
||||||
|
|
||||||
def print_axolotl_text_art(suffix=None):
|
|
||||||
font = "nancyj"
|
|
||||||
ascii_text = " axolotl"
|
|
||||||
if suffix:
|
|
||||||
ascii_text += f" x {suffix}"
|
|
||||||
ascii_art = text2art(" axolotl", font=font)
|
|
||||||
if is_main_process():
|
if is_main_process():
|
||||||
print(ascii_art)
|
print(ascii_art)
|
||||||
|
|
||||||
@@ -74,8 +61,6 @@ def get_multi_line_input() -> Optional[str]:
|
|||||||
|
|
||||||
|
|
||||||
def do_inference(cfg, model, tokenizer, prompter: Optional[str]):
|
def do_inference(cfg, model, tokenizer, prompter: Optional[str]):
|
||||||
if prompter == "None":
|
|
||||||
prompter = None
|
|
||||||
default_tokens = {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>"}
|
default_tokens = {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>"}
|
||||||
|
|
||||||
for token, symbol in default_tokens.items():
|
for token, symbol in default_tokens.items():
|
||||||
@@ -150,10 +135,6 @@ def choose_config(path: Path):
|
|||||||
"No YAML config files found in the specified directory. Are you using a .yml extension?"
|
"No YAML config files found in the specified directory. Are you using a .yml extension?"
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(yaml_files) == 1:
|
|
||||||
print(f"Using default YAML file '{yaml_files[0]}'")
|
|
||||||
return yaml_files[0]
|
|
||||||
|
|
||||||
print("Choose a YAML file:")
|
print("Choose a YAML file:")
|
||||||
for idx, file in enumerate(yaml_files):
|
for idx, file in enumerate(yaml_files):
|
||||||
print(f"{idx + 1}. {file}")
|
print(f"{idx + 1}. {file}")
|
||||||
@@ -177,20 +158,45 @@ def check_not_in(list1: List[str], list2: Union[Dict[str, Any], List[str]]) -> b
|
|||||||
|
|
||||||
|
|
||||||
def train(
|
def train(
|
||||||
*,
|
config: Path = Path("configs/"),
|
||||||
cfg: DictDefault,
|
prepare_ds_only: bool = False,
|
||||||
cli_args: TrainerCliArgs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
|
print_axolotl_text_art()
|
||||||
|
if Path(config).is_dir():
|
||||||
|
config = choose_config(config)
|
||||||
|
|
||||||
|
# load the config from the yaml file
|
||||||
|
with open(config, encoding="utf-8") as file:
|
||||||
|
cfg: DictDefault = DictDefault(yaml.safe_load(file))
|
||||||
|
# if there are any options passed in the cli, if it is something that seems valid from the yaml,
|
||||||
|
# then overwrite the value
|
||||||
|
cfg_keys = cfg.keys()
|
||||||
|
for k, _ in kwargs.items():
|
||||||
|
# if not strict, allow writing to cfg even if it's not in the yml already
|
||||||
|
if k in cfg_keys or not cfg.strict:
|
||||||
|
# handle booleans
|
||||||
|
if isinstance(cfg[k], bool):
|
||||||
|
cfg[k] = bool(kwargs[k])
|
||||||
|
else:
|
||||||
|
cfg[k] = kwargs[k]
|
||||||
|
|
||||||
|
validate_config(cfg)
|
||||||
|
|
||||||
|
normalize_config(cfg)
|
||||||
|
|
||||||
|
setup_wandb_env_vars(cfg)
|
||||||
|
|
||||||
# load the tokenizer first
|
# load the tokenizer first
|
||||||
LOG.info(f"loading tokenizer... {cfg.tokenizer_config or cfg.base_model_config}")
|
LOG.info(f"loading tokenizer... {cfg.tokenizer_config or cfg.base_model_config}")
|
||||||
tokenizer = load_tokenizer(cfg)
|
tokenizer = load_tokenizer(cfg)
|
||||||
|
|
||||||
if not (
|
if (
|
||||||
cli_args.shard or cli_args.merge_lora or cli_args.inference
|
check_not_in(["shard", "merge_lora"], kwargs) and not cfg.inference
|
||||||
): # don't need to load dataset for these
|
): # don't need to load dataset for these
|
||||||
train_dataset, eval_dataset, total_num_steps = prepare_dataset(cfg, tokenizer)
|
train_dataset, eval_dataset, total_num_steps = prepare_dataset(cfg, tokenizer)
|
||||||
|
|
||||||
if cli_args.debug or cfg.debug:
|
if cfg.debug or "debug" in kwargs:
|
||||||
LOG.info("check_dataset_labels...")
|
LOG.info("check_dataset_labels...")
|
||||||
check_dataset_labels(
|
check_dataset_labels(
|
||||||
train_dataset.select(
|
train_dataset.select(
|
||||||
@@ -199,17 +205,17 @@ def train(
|
|||||||
tokenizer,
|
tokenizer,
|
||||||
)
|
)
|
||||||
|
|
||||||
if cli_args.prepare_ds_only:
|
if prepare_ds_only:
|
||||||
LOG.info("Finished preparing dataset. Exiting...")
|
LOG.info("Finished preparing dataset. Exiting...")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Load the model and tokenizer
|
# Load the model and tokenizer
|
||||||
LOG.info("loading model and (optionally) peft_config...")
|
LOG.info("loading model and (optionally) peft_config...")
|
||||||
model, peft_config = load_model(cfg, tokenizer, inference=cli_args.inference)
|
model, peft_config = load_model(cfg, tokenizer)
|
||||||
|
|
||||||
safe_serialization = cfg.save_safetensors is True
|
safe_serialization = cfg.save_safetensors is True
|
||||||
|
|
||||||
if cli_args.merge_lora and cfg.adapter is not None:
|
if "merge_lora" in kwargs and cfg.adapter is not None:
|
||||||
LOG.info("running merge of LoRA with base model")
|
LOG.info("running merge of LoRA with base model")
|
||||||
model = model.merge_and_unload()
|
model = model.merge_and_unload()
|
||||||
model.to(dtype=torch.float16)
|
model.to(dtype=torch.float16)
|
||||||
@@ -223,13 +229,18 @@ def train(
|
|||||||
tokenizer.save_pretrained(str(Path(cfg.output_dir) / "merged"))
|
tokenizer.save_pretrained(str(Path(cfg.output_dir) / "merged"))
|
||||||
return
|
return
|
||||||
|
|
||||||
if cli_args.inference:
|
if cfg.inference:
|
||||||
LOG.debug("Running inference on model")
|
LOG.info("calling do_inference function")
|
||||||
do_inference(cfg, model, tokenizer, prompter=cli_args.prompter)
|
prompter: Optional[str] = "AlpacaPrompter"
|
||||||
|
if "prompter" in kwargs:
|
||||||
|
if kwargs["prompter"] == "None":
|
||||||
|
prompter = None
|
||||||
|
else:
|
||||||
|
prompter = kwargs["prompter"]
|
||||||
|
do_inference(cfg, model, tokenizer, prompter=prompter)
|
||||||
return
|
return
|
||||||
|
|
||||||
if cli_args.shard:
|
if "shard" in kwargs:
|
||||||
LOG.debug("Re-saving model w/ sharding")
|
|
||||||
model.save_pretrained(cfg.output_dir, safe_serialization=safe_serialization)
|
model.save_pretrained(cfg.output_dir, safe_serialization=safe_serialization)
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -311,51 +322,5 @@ def train(
|
|||||||
model.save_pretrained(cfg.output_dir, safe_serialization=safe_serialization)
|
model.save_pretrained(cfg.output_dir, safe_serialization=safe_serialization)
|
||||||
|
|
||||||
|
|
||||||
def load_cfg(config: Path = Path("examples/"), **kwargs):
|
|
||||||
if Path(config).is_dir():
|
|
||||||
config = choose_config(config)
|
|
||||||
|
|
||||||
# load the config from the yaml file
|
|
||||||
with open(config, encoding="utf-8") as file:
|
|
||||||
cfg: DictDefault = DictDefault(yaml.safe_load(file))
|
|
||||||
# if there are any options passed in the cli, if it is something that seems valid from the yaml,
|
|
||||||
# then overwrite the value
|
|
||||||
cfg_keys = cfg.keys()
|
|
||||||
for k, _ in kwargs.items():
|
|
||||||
# if not strict, allow writing to cfg even if it's not in the yml already
|
|
||||||
if k in cfg_keys or not cfg.strict:
|
|
||||||
# handle booleans
|
|
||||||
if isinstance(cfg[k], bool):
|
|
||||||
cfg[k] = bool(kwargs[k])
|
|
||||||
else:
|
|
||||||
cfg[k] = kwargs[k]
|
|
||||||
|
|
||||||
model_config = load_model_config(cfg)
|
|
||||||
|
|
||||||
# figure out if the model is llama
|
|
||||||
cfg.is_llama_derived_model = (
|
|
||||||
(hasattr(model_config, "model_type") and model_config.model_type == "llama")
|
|
||||||
or cfg.is_llama_derived_model
|
|
||||||
or "llama" in cfg.base_model
|
|
||||||
or (cfg.model_type and "llama" in cfg.model_type.lower())
|
|
||||||
)
|
|
||||||
validate_config(cfg)
|
|
||||||
|
|
||||||
normalize_config(cfg)
|
|
||||||
|
|
||||||
setup_wandb_env_vars(cfg)
|
|
||||||
return cfg
|
|
||||||
|
|
||||||
|
|
||||||
def do_train(config: Path = Path("examples/"), **kwargs):
|
|
||||||
print_axolotl_text_art()
|
|
||||||
parsed_cfg = load_cfg(config, **kwargs)
|
|
||||||
parser = transformers.HfArgumentParser((TrainerCliArgs))
|
|
||||||
parsed_cli_args, _ = parser.parse_args_into_dataclasses(
|
|
||||||
return_remaining_strings=True
|
|
||||||
)
|
|
||||||
train(cfg=parsed_cfg, cli_args=parsed_cli_args)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
fire.Fire(do_train)
|
fire.Fire(train)
|
||||||
|
|||||||
39
setup.py
39
setup.py
@@ -2,27 +2,15 @@
|
|||||||
|
|
||||||
from setuptools import find_packages, setup
|
from setuptools import find_packages, setup
|
||||||
|
|
||||||
|
install_requires = []
|
||||||
def parse_requirements():
|
with open("./requirements.txt", encoding="utf-8") as requirements_file:
|
||||||
_install_requires = []
|
# don't include peft yet until we check the int4
|
||||||
_dependency_links = []
|
# need to manually install peft for now...
|
||||||
with open("./requirements.txt", encoding="utf-8") as requirements_file:
|
reqs = [r.strip() for r in requirements_file.readlines() if "peft" not in r]
|
||||||
lines = [
|
reqs = [r for r in reqs if "flash-attn" not in r]
|
||||||
r.strip() for r in requirements_file.readlines() if "auto-gptq" not in r
|
reqs = [r for r in reqs if r and r[0] != "#"]
|
||||||
]
|
for r in reqs:
|
||||||
for line in lines:
|
install_requires.append(r)
|
||||||
if line.startswith("--extra-index-url"):
|
|
||||||
# Handle custom index URLs
|
|
||||||
_, url = line.split()
|
|
||||||
_dependency_links.append(url)
|
|
||||||
elif "flash-attn" not in line and line and line[0] != "#":
|
|
||||||
# Handle standard packages
|
|
||||||
_install_requires.append(line)
|
|
||||||
return _install_requires, _dependency_links
|
|
||||||
|
|
||||||
|
|
||||||
install_requires, dependency_links = parse_requirements()
|
|
||||||
|
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="axolotl",
|
name="axolotl",
|
||||||
@@ -31,10 +19,12 @@ setup(
|
|||||||
package_dir={"": "src"},
|
package_dir={"": "src"},
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
install_requires=install_requires,
|
install_requires=install_requires,
|
||||||
dependency_links=dependency_links,
|
|
||||||
extras_require={
|
extras_require={
|
||||||
"gptq": [
|
"gptq": [
|
||||||
"auto-gptq",
|
"alpaca_lora_4bit @ git+https://github.com/winglian/alpaca_lora_4bit.git@setup_pip",
|
||||||
|
],
|
||||||
|
"gptq_triton": [
|
||||||
|
"alpaca_lora_4bit[triton] @ git+https://github.com/winglian/alpaca_lora_4bit.git@setup_pip",
|
||||||
],
|
],
|
||||||
"flash-attn": [
|
"flash-attn": [
|
||||||
"flash-attn==2.0.8",
|
"flash-attn==2.0.8",
|
||||||
@@ -42,5 +32,8 @@ setup(
|
|||||||
"extras": [
|
"extras": [
|
||||||
"deepspeed",
|
"deepspeed",
|
||||||
],
|
],
|
||||||
|
"peft": [
|
||||||
|
"peft @ git+https://github.com/huggingface/peft.git",
|
||||||
|
],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,9 +1,19 @@
|
|||||||
"""Callbacks for Trainer class"""
|
"""Callbacks for Trainer class"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
from typing import TYPE_CHECKING, Dict, List
|
||||||
|
|
||||||
|
import evaluate
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import torch
|
||||||
|
import torch.distributed as dist
|
||||||
|
from datasets import load_dataset
|
||||||
from optimum.bettertransformer import BetterTransformer
|
from optimum.bettertransformer import BetterTransformer
|
||||||
|
from tqdm import tqdm
|
||||||
from transformers import (
|
from transformers import (
|
||||||
TrainerCallback,
|
TrainerCallback,
|
||||||
TrainerControl,
|
TrainerControl,
|
||||||
@@ -13,8 +23,19 @@ from transformers import (
|
|||||||
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR, IntervalStrategy
|
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR, IntervalStrategy
|
||||||
|
|
||||||
from axolotl.utils.bench import log_gpu_memory_usage
|
from axolotl.utils.bench import log_gpu_memory_usage
|
||||||
|
from axolotl.utils.distributed import (
|
||||||
|
barrier,
|
||||||
|
gather_scalar_from_all_ranks,
|
||||||
|
get_world_size,
|
||||||
|
is_main_process,
|
||||||
|
zero_first,
|
||||||
|
)
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from axolotl.utils.trainer import AxolotlTrainingArguments
|
||||||
|
|
||||||
LOG = logging.getLogger("axolotl.callbacks")
|
LOG = logging.getLogger("axolotl.callbacks")
|
||||||
|
IGNORE_INDEX = -100
|
||||||
|
|
||||||
|
|
||||||
class SavePeftModelCallback(TrainerCallback): # pylint: disable=too-few-public-methods
|
class SavePeftModelCallback(TrainerCallback): # pylint: disable=too-few-public-methods
|
||||||
@@ -96,3 +117,192 @@ class GPUStatsCallback(
|
|||||||
log_gpu_memory_usage(LOG, "while training", self.cfg.device)
|
log_gpu_memory_usage(LOG, "while training", self.cfg.device)
|
||||||
self.logged = True
|
self.logged = True
|
||||||
return control
|
return control
|
||||||
|
|
||||||
|
|
||||||
|
def bench_eval_callback_factory(trainer, tokenizer):
|
||||||
|
accuracy = evaluate.load("accuracy")
|
||||||
|
abcd_idx = [
|
||||||
|
tokenizer("A", add_special_tokens=False).input_ids[0],
|
||||||
|
tokenizer("B", add_special_tokens=False).input_ids[0],
|
||||||
|
tokenizer("C", add_special_tokens=False).input_ids[0],
|
||||||
|
tokenizer("D", add_special_tokens=False).input_ids[0],
|
||||||
|
tokenizer("E", add_special_tokens=False).input_ids[0],
|
||||||
|
tokenizer("F", add_special_tokens=False).input_ids[0],
|
||||||
|
tokenizer("G", add_special_tokens=False).input_ids[0],
|
||||||
|
]
|
||||||
|
bench_split = "eval"
|
||||||
|
|
||||||
|
def transform_bench_subject(example):
|
||||||
|
# Split on ':' and trim whitespace
|
||||||
|
parts = example["subject"].split(":")
|
||||||
|
first_part = (
|
||||||
|
parts[0].strip().lower().replace("-", "_")
|
||||||
|
) # Lowercase the first part
|
||||||
|
second_part = (
|
||||||
|
parts[1].strip().replace("-", "_") if len(parts) > 1 else "all"
|
||||||
|
) # Replace hyphens with underscores
|
||||||
|
|
||||||
|
# Return the transformed values
|
||||||
|
return {"name": first_part, "subject": second_part}
|
||||||
|
|
||||||
|
if trainer.args.bench_dataset == "mmlu-zs":
|
||||||
|
bench_dataset = load_dataset(
|
||||||
|
"openaccess-ai-collective/mmlu-evals",
|
||||||
|
data_files={
|
||||||
|
"eval": "zero_shot_mmlu_val.json",
|
||||||
|
"test": "zero_shot_mmlu_test.json",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
# bench_dataset = bench_dataset.remove_columns("subject")
|
||||||
|
# MMLU Five-shot (Eval/Test only)
|
||||||
|
elif trainer.args.bench_dataset in ["mmlu", "mmlu-fs"]:
|
||||||
|
bench_dataset = load_dataset(
|
||||||
|
"openaccess-ai-collective/mmlu-evals",
|
||||||
|
data_files={
|
||||||
|
"eval": "five_shot_mmlu_val.json",
|
||||||
|
"test": "five_shot_mmlu_test.json",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
# bench_dataset = bench_dataset.remove_columns('subject')
|
||||||
|
elif "/" in trainer.args.bench_dataset:
|
||||||
|
bench_ds = trainer.args.bench_dataset
|
||||||
|
bench_ds_name = "/".join(bench_ds.split("/", 2)[:2])
|
||||||
|
bench_ds_data_file = "/".join(bench_ds.split("/", 2)[2:])
|
||||||
|
bench_dataset = load_dataset(
|
||||||
|
bench_ds_name,
|
||||||
|
data_files={
|
||||||
|
"eval": bench_ds_data_file,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
bench_dataset["eval"] = bench_dataset["eval"].map(transform_bench_subject)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"unhandled value `{trainer.args.bench_dataset}` for bench_dataset training args"
|
||||||
|
)
|
||||||
|
bench_dataset = bench_dataset[trainer.args.bench_split]
|
||||||
|
if trainer.args.max_bench_samples is not None:
|
||||||
|
bench_dataset = bench_dataset.select(range(trainer.args.max_bench_samples))
|
||||||
|
|
||||||
|
def tokenize_evals(example):
|
||||||
|
source = f"{tokenizer.bos_token}{example['input']}"
|
||||||
|
target = f"{example['output']}{tokenizer.eos_token}"
|
||||||
|
|
||||||
|
tokenized_source = tokenizer(
|
||||||
|
source,
|
||||||
|
max_length=2048,
|
||||||
|
truncation=True,
|
||||||
|
add_special_tokens=False,
|
||||||
|
)
|
||||||
|
tokenized_target = tokenizer(
|
||||||
|
target,
|
||||||
|
max_length=2048,
|
||||||
|
truncation=True,
|
||||||
|
add_special_tokens=False,
|
||||||
|
)
|
||||||
|
input_ids = tokenized_source["input_ids"] + tokenized_target["input_ids"]
|
||||||
|
labels = [IGNORE_INDEX] * len(tokenized_source["input_ids"]) + tokenized_target[
|
||||||
|
"input_ids"
|
||||||
|
]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"input_ids": input_ids,
|
||||||
|
"labels": labels,
|
||||||
|
"subject": example["subject"],
|
||||||
|
}
|
||||||
|
|
||||||
|
with zero_first(is_main_process()):
|
||||||
|
bench_dataset = bench_dataset.map(tokenize_evals)
|
||||||
|
bench_dataset = bench_dataset.filter(lambda x: x["labels"][-2] in abcd_idx)
|
||||||
|
|
||||||
|
class BenchEvalCallback(TrainerCallback):
|
||||||
|
"""
|
||||||
|
TrainerCallback that runs the MMLU evals
|
||||||
|
"""
|
||||||
|
|
||||||
|
def on_evaluate(
|
||||||
|
self,
|
||||||
|
args: AxolotlTrainingArguments,
|
||||||
|
state: TrainerState, # pylint: disable=unused-argument
|
||||||
|
control: TrainerControl, # pylint: disable=unused-argument
|
||||||
|
metrics: Dict[str, float], # pylint: disable=unused-argument
|
||||||
|
**kwargs, # pylint: disable=unused-argument
|
||||||
|
):
|
||||||
|
data_loader = trainer.get_bench_dataloader(
|
||||||
|
bench_dataset.remove_columns(["input", "subject", "output", "name"])
|
||||||
|
)
|
||||||
|
trainer.model.eval()
|
||||||
|
preds, refs = [], []
|
||||||
|
loss_bench = 0
|
||||||
|
for batch in tqdm(data_loader, total=len(data_loader)):
|
||||||
|
(loss, logits, labels) = trainer.prediction_step(
|
||||||
|
trainer.model,
|
||||||
|
batch,
|
||||||
|
prediction_loss_only=False,
|
||||||
|
)
|
||||||
|
# There are two tokens, the output, and eos token.
|
||||||
|
for i, logit in enumerate(logits):
|
||||||
|
label_non_zero_id = (batch["labels"][i] != IGNORE_INDEX).nonzero()[
|
||||||
|
0
|
||||||
|
][0]
|
||||||
|
logit_abcd = logit[label_non_zero_id - 1][abcd_idx]
|
||||||
|
preds.append(torch.argmax(logit_abcd).item())
|
||||||
|
labels = labels[labels != IGNORE_INDEX].view(-1, 2)[:, 0]
|
||||||
|
refs += [
|
||||||
|
abcd_idx.index(label) if label in abcd_idx else -1
|
||||||
|
for label in labels.tolist()
|
||||||
|
]
|
||||||
|
loss_bench += loss.item()
|
||||||
|
# Extract results by subject.
|
||||||
|
bench_name = bench_dataset["name"]
|
||||||
|
bench_names: dict = {s: {"refs": [], "preds": []} for s in set(bench_name)}
|
||||||
|
for s, p, r in zip(bench_name, preds, refs): # pylint: disable=invalid-name
|
||||||
|
bench_names[s]["preds"].append(p)
|
||||||
|
bench_names[s]["refs"].append(r)
|
||||||
|
barrier()
|
||||||
|
local_bench_names = bench_names
|
||||||
|
gathered_bench_names: List[Dict] = [{} for _ in range(get_world_size())]
|
||||||
|
# Gather results from all GPUs to GPU 0
|
||||||
|
|
||||||
|
loss_bench_ranks = gather_scalar_from_all_ranks(
|
||||||
|
lambda: loss_bench, get_world_size()
|
||||||
|
)
|
||||||
|
len_data_loader_ranks = gather_scalar_from_all_ranks(
|
||||||
|
lambda: len(data_loader), get_world_size()
|
||||||
|
)
|
||||||
|
|
||||||
|
if not is_main_process():
|
||||||
|
dist.gather_object(local_bench_names, dst=0)
|
||||||
|
else:
|
||||||
|
dist.gather_object(local_bench_names, gathered_bench_names, dst=0)
|
||||||
|
bench_loss = sum(loss_bench_ranks) / sum(len_data_loader_ranks)
|
||||||
|
results = {"bench_loss": bench_loss}
|
||||||
|
|
||||||
|
# Combine results from all GPUs
|
||||||
|
combined_bench_names: Dict[str, Dict[str, List]] = {}
|
||||||
|
for bench_name in gathered_bench_names:
|
||||||
|
for name, data in bench_name.items():
|
||||||
|
if name not in combined_bench_names:
|
||||||
|
combined_bench_names[name] = {"refs": [], "preds": []}
|
||||||
|
combined_bench_names[name]["refs"].extend(data["refs"])
|
||||||
|
combined_bench_names[name]["preds"].extend(data["preds"])
|
||||||
|
|
||||||
|
bench_scores = []
|
||||||
|
for (
|
||||||
|
bench_name
|
||||||
|
) in combined_bench_names: # pylint: disable=consider-using-dict-items
|
||||||
|
bench_score = accuracy.compute(
|
||||||
|
references=combined_bench_names[bench_name]["refs"],
|
||||||
|
predictions=combined_bench_names[bench_name]["preds"],
|
||||||
|
)["accuracy"]
|
||||||
|
if not pd.isna(bench_score):
|
||||||
|
results[
|
||||||
|
f"bench_{bench_split}_accuracy_{bench_name}"
|
||||||
|
] = bench_score
|
||||||
|
bench_scores.append(bench_score)
|
||||||
|
else:
|
||||||
|
results[f"bench_{bench_split}_accuracy_{bench_name}"] = 0.0
|
||||||
|
bench_scores.append(0.0)
|
||||||
|
results[f"bench_{bench_split}_accuracy"] = np.mean(bench_scores)
|
||||||
|
trainer.log(results)
|
||||||
|
|
||||||
|
return BenchEvalCallback
|
||||||
|
|||||||
@@ -97,7 +97,9 @@ def validate_config(cfg):
|
|||||||
"To calculate the equivalent gradient_accumulation_steps, divide batch_size / micro_batch_size / number of gpus.",
|
"To calculate the equivalent gradient_accumulation_steps, divide batch_size / micro_batch_size / number of gpus.",
|
||||||
)
|
)
|
||||||
if cfg.load_4bit:
|
if cfg.load_4bit:
|
||||||
raise ValueError("cfg.load_4bit parameter has been deprecated")
|
raise ValueError(
|
||||||
|
"cfg.load_4bit parameter has been deprecated and replaced by cfg.gptq"
|
||||||
|
)
|
||||||
|
|
||||||
if cfg.adapter == "qlora":
|
if cfg.adapter == "qlora":
|
||||||
if cfg.merge_lora:
|
if cfg.merge_lora:
|
||||||
|
|||||||
@@ -134,17 +134,8 @@ def load_tokenized_prepared_datasets(
|
|||||||
seed = 42
|
seed = 42
|
||||||
|
|
||||||
datasets = []
|
datasets = []
|
||||||
|
|
||||||
def for_d_in_datasets(dataset_configs):
|
|
||||||
for dataset in dataset_configs:
|
|
||||||
if dataset.name and isinstance(dataset.name, list):
|
|
||||||
for name in dataset.name:
|
|
||||||
yield DictDefault({**dataset, "name": name})
|
|
||||||
else:
|
|
||||||
yield dataset
|
|
||||||
|
|
||||||
# pylint: disable=invalid-name
|
# pylint: disable=invalid-name
|
||||||
for d in for_d_in_datasets(cfg.datasets):
|
for d in cfg.datasets:
|
||||||
ds: Union[Dataset, DatasetDict] = None
|
ds: Union[Dataset, DatasetDict] = None
|
||||||
ds_from_hub = False
|
ds_from_hub = False
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
"""
|
"""
|
||||||
utility helpers for distributed checks
|
utility helpers for distributed checks
|
||||||
"""
|
"""
|
||||||
|
import os
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
|
||||||
|
import torch
|
||||||
import torch.distributed as dist
|
import torch.distributed as dist
|
||||||
from accelerate import Accelerator
|
from accelerate import Accelerator
|
||||||
|
|
||||||
@@ -43,6 +45,10 @@ def is_main_process():
|
|||||||
return dist.get_rank() == 0
|
return dist.get_rank() == 0
|
||||||
|
|
||||||
|
|
||||||
|
def get_world_size():
|
||||||
|
return int(os.getenv("WORLD_SIZE", "1"))
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def zero_first(is_main):
|
def zero_first(is_main):
|
||||||
"""
|
"""
|
||||||
@@ -53,3 +59,35 @@ def zero_first(is_main):
|
|||||||
yield
|
yield
|
||||||
if is_main: # then rank 0 waits after it has run the context
|
if is_main: # then rank 0 waits after it has run the context
|
||||||
barrier()
|
barrier()
|
||||||
|
|
||||||
|
|
||||||
|
def gather_scalar_from_all_ranks(fn, world_size=1): # pylint: disable=invalid-name
|
||||||
|
"""
|
||||||
|
Run a callable 'fn' on all ranks and gather the results on the specified rank.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
- fn (callable): A function that computes the value. This should not have any side effects.
|
||||||
|
- rank (int, optional): The rank that gathers the values. Default is 0.
|
||||||
|
- world_size (int, optional): Total number of processes in the current distributed setup.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- A list of computed values from all ranks if on the gathering rank, otherwise None.
|
||||||
|
"""
|
||||||
|
value_scalar = fn()
|
||||||
|
value_tensor = torch.tensor(value_scalar, device=dist.get_rank()).float()
|
||||||
|
|
||||||
|
if not is_main_process():
|
||||||
|
dist.gather(value_tensor, dst=0)
|
||||||
|
else:
|
||||||
|
gathered_tensors = [torch.zeros_like(value_tensor) for _ in range(world_size)]
|
||||||
|
dist.gather(value_tensor, gather_list=gathered_tensors, dst=0)
|
||||||
|
|
||||||
|
# Convert tensors back to their original type (int or float)
|
||||||
|
gathered_values = []
|
||||||
|
for tensor in gathered_tensors:
|
||||||
|
if tensor == tensor.int():
|
||||||
|
gathered_values.append(int(tensor.item()))
|
||||||
|
else:
|
||||||
|
gathered_values.append(float(tensor.item()))
|
||||||
|
return gathered_values
|
||||||
|
return None
|
||||||
|
|||||||
@@ -4,19 +4,18 @@
|
|||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
from typing import Optional, Tuple # noqa: F401
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING, Optional, Tuple # noqa: F401
|
||||||
|
|
||||||
import bitsandbytes as bnb
|
import bitsandbytes as bnb
|
||||||
import torch
|
import torch
|
||||||
import transformers
|
import transformers
|
||||||
from optimum.bettertransformer import BetterTransformer
|
from optimum.bettertransformer import BetterTransformer
|
||||||
from peft import PeftConfig, prepare_model_for_kbit_training
|
|
||||||
from transformers import ( # noqa: F401
|
from transformers import ( # noqa: F401
|
||||||
AutoConfig,
|
AutoConfig,
|
||||||
AutoModelForCausalLM,
|
AutoModelForCausalLM,
|
||||||
AutoTokenizer,
|
AutoTokenizer,
|
||||||
BitsAndBytesConfig,
|
BitsAndBytesConfig,
|
||||||
GPTQConfig,
|
|
||||||
LlamaConfig,
|
LlamaConfig,
|
||||||
PreTrainedModel,
|
PreTrainedModel,
|
||||||
PreTrainedTokenizerBase,
|
PreTrainedTokenizerBase,
|
||||||
@@ -24,17 +23,13 @@ from transformers import ( # noqa: F401
|
|||||||
|
|
||||||
from axolotl.prompt_tokenizers import LLAMA_DEFAULT_EOS_TOKEN
|
from axolotl.prompt_tokenizers import LLAMA_DEFAULT_EOS_TOKEN
|
||||||
from axolotl.utils.bench import log_gpu_memory_usage
|
from axolotl.utils.bench import log_gpu_memory_usage
|
||||||
from axolotl.utils.dict import DictDefault
|
|
||||||
|
|
||||||
LOG = logging.getLogger("axolotl")
|
LOG = logging.getLogger("axolotl")
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from peft import PeftConfig # noqa: F401
|
||||||
|
|
||||||
def load_model_config(cfg):
|
from axolotl.utils.dict import DictDefault # noqa: F401
|
||||||
model_config_name = cfg.base_model_config or cfg.base_model
|
|
||||||
trust_remote_code: bool = False or cfg.trust_remote_code
|
|
||||||
return AutoConfig.from_pretrained(
|
|
||||||
model_config_name, trust_remote_code=trust_remote_code
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def load_tokenizer(cfg):
|
def load_tokenizer(cfg):
|
||||||
@@ -91,10 +86,8 @@ def load_tokenizer(cfg):
|
|||||||
|
|
||||||
|
|
||||||
def load_model(
|
def load_model(
|
||||||
cfg: DictDefault,
|
cfg, tokenizer
|
||||||
tokenizer: PreTrainedTokenizerBase,
|
): # type: (DictDefault, PreTrainedTokenizerBase) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
|
||||||
inference: bool = False,
|
|
||||||
) -> Tuple[PreTrainedModel, Optional[PeftConfig]]:
|
|
||||||
"""
|
"""
|
||||||
Load a model for a given configuration and tokenizer.
|
Load a model for a given configuration and tokenizer.
|
||||||
"""
|
"""
|
||||||
@@ -104,9 +97,14 @@ def load_model(
|
|||||||
|
|
||||||
# TODO refactor as a kwarg
|
# TODO refactor as a kwarg
|
||||||
load_in_8bit = cfg.load_in_8bit
|
load_in_8bit = cfg.load_in_8bit
|
||||||
|
cfg.is_llama_derived_model = (
|
||||||
|
"llama" in base_model
|
||||||
|
or (cfg.model_type and "llama" in cfg.model_type.lower())
|
||||||
|
or cfg.is_llama_derived_model
|
||||||
|
)
|
||||||
|
|
||||||
if cfg.is_llama_derived_model and cfg.flash_attention:
|
if cfg.is_llama_derived_model and cfg.flash_attention:
|
||||||
if cfg.device not in ["mps", "cpu"] and not inference:
|
if cfg.device not in ["mps", "cpu"] and not cfg.inference:
|
||||||
from axolotl.monkeypatch.llama_attn_hijack_flash import (
|
from axolotl.monkeypatch.llama_attn_hijack_flash import (
|
||||||
replace_llama_attn_with_flash_attn,
|
replace_llama_attn_with_flash_attn,
|
||||||
)
|
)
|
||||||
@@ -148,22 +146,39 @@ def load_model(
|
|||||||
if (
|
if (
|
||||||
cfg.is_llama_derived_model
|
cfg.is_llama_derived_model
|
||||||
and (cfg.max_packed_sequence_len or cfg.sample_packing)
|
and (cfg.max_packed_sequence_len or cfg.sample_packing)
|
||||||
and not inference
|
and not cfg.inference
|
||||||
):
|
):
|
||||||
from axolotl.monkeypatch.llama_expand_mask import hijack_expand_mask
|
from axolotl.monkeypatch.llama_expand_mask import hijack_expand_mask
|
||||||
|
|
||||||
LOG.info("patching _expand_mask")
|
LOG.info("patching _expand_mask")
|
||||||
hijack_expand_mask()
|
hijack_expand_mask()
|
||||||
|
|
||||||
|
try:
|
||||||
|
if cfg.gptq:
|
||||||
|
from alpaca_lora_4bit.monkeypatch.peft_tuners_lora_monkey_patch import (
|
||||||
|
replace_peft_model_with_int4_lora_model,
|
||||||
|
)
|
||||||
|
|
||||||
|
replace_peft_model_with_int4_lora_model()
|
||||||
|
except Exception as err:
|
||||||
|
LOG.exception(err)
|
||||||
|
raise err
|
||||||
|
|
||||||
|
if not cfg.gptq and (
|
||||||
|
(cfg.adapter == "lora" and load_in_8bit)
|
||||||
|
or (cfg.adapter == "qlora" and cfg.load_in_4bit)
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
from peft import prepare_model_for_kbit_training
|
||||||
|
except ImportError:
|
||||||
|
# For backward compatibility
|
||||||
|
from peft import (
|
||||||
|
prepare_model_for_int8_training as prepare_model_for_kbit_training,
|
||||||
|
)
|
||||||
|
|
||||||
model_kwargs = {}
|
model_kwargs = {}
|
||||||
if cfg.model_revision:
|
if cfg.model_revision:
|
||||||
model_kwargs["revision"] = cfg.model_revision
|
model_kwargs["revision"] = cfg.model_revision
|
||||||
if cfg.gptq:
|
|
||||||
# TODO we should figure out how read the models config.json first
|
|
||||||
model_kwargs["quantization_config"] = GPTQConfig(
|
|
||||||
bits=cfg.gptq_bits,
|
|
||||||
disable_exllama=True,
|
|
||||||
)
|
|
||||||
if cfg.adapter == "qlora" and cfg.load_in_4bit:
|
if cfg.adapter == "qlora" and cfg.load_in_4bit:
|
||||||
model_kwargs["quantization_config"] = BitsAndBytesConfig(
|
model_kwargs["quantization_config"] = BitsAndBytesConfig(
|
||||||
load_in_4bit=True,
|
load_in_4bit=True,
|
||||||
@@ -174,7 +189,45 @@ def load_model(
|
|||||||
bnb_4bit_quant_type="nf4",
|
bnb_4bit_quant_type="nf4",
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
if cfg.is_llama_derived_model and not cfg.trust_remote_code and not cfg.gptq:
|
if cfg.gptq and cfg.is_llama_derived_model:
|
||||||
|
from alpaca_lora_4bit.autograd_4bit import load_llama_model_4bit_low_ram
|
||||||
|
from huggingface_hub import snapshot_download
|
||||||
|
|
||||||
|
try:
|
||||||
|
snapshot_download_kwargs = {}
|
||||||
|
if cfg.base_model_ignore_patterns:
|
||||||
|
snapshot_download_kwargs[
|
||||||
|
"ignore_patterns"
|
||||||
|
] = cfg.base_model_ignore_patterns
|
||||||
|
cache_model_path = Path(
|
||||||
|
snapshot_download(base_model, **snapshot_download_kwargs)
|
||||||
|
)
|
||||||
|
files = (
|
||||||
|
list(cache_model_path.glob("*.pt"))
|
||||||
|
+ list(cache_model_path.glob("*.safetensors"))
|
||||||
|
+ list(cache_model_path.glob("*.bin"))
|
||||||
|
)
|
||||||
|
if len(files) > 0:
|
||||||
|
model_path = str(files[0])
|
||||||
|
else:
|
||||||
|
LOG.warning(
|
||||||
|
"unable to find a cached model file, this will likely fail..."
|
||||||
|
)
|
||||||
|
model_path = str(cache_model_path)
|
||||||
|
except Exception: # pylint: disable=broad-exception-caught
|
||||||
|
model_path = cfg.base_model
|
||||||
|
model, _ = load_llama_model_4bit_low_ram(
|
||||||
|
base_model_config if base_model_config else base_model,
|
||||||
|
model_path,
|
||||||
|
device_map=cfg.device_map,
|
||||||
|
half=cfg.fp16,
|
||||||
|
groupsize=cfg.gptq_groupsize if cfg.gptq_groupsize else -1,
|
||||||
|
is_v1_model=cfg.gptq_model_v1
|
||||||
|
if cfg.gptq_model_v1 is not None
|
||||||
|
else True,
|
||||||
|
)
|
||||||
|
load_in_8bit = False
|
||||||
|
elif cfg.is_llama_derived_model and not cfg.trust_remote_code:
|
||||||
from transformers import LlamaForCausalLM
|
from transformers import LlamaForCausalLM
|
||||||
|
|
||||||
config_kwargs = {}
|
config_kwargs = {}
|
||||||
@@ -220,24 +273,15 @@ def load_model(
|
|||||||
# )
|
# )
|
||||||
# model.train() # sets to train instead of eval mode
|
# model.train() # sets to train instead of eval mode
|
||||||
elif model_type and not cfg.trust_remote_code:
|
elif model_type and not cfg.trust_remote_code:
|
||||||
if cfg.gptq:
|
model = getattr(transformers, model_type).from_pretrained(
|
||||||
model = AutoModelForCausalLM.from_pretrained(
|
base_model,
|
||||||
base_model,
|
device_map=cfg.device_map,
|
||||||
device_map=cfg.device_map,
|
load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
|
||||||
torch_dtype=cfg.torch_dtype,
|
load_in_4bit=cfg.load_in_4bit and cfg.adapter is not None,
|
||||||
trust_remote_code=cfg.trust_remote_code or False,
|
torch_dtype=cfg.torch_dtype,
|
||||||
**model_kwargs,
|
trust_remote_code=cfg.trust_remote_code or False,
|
||||||
)
|
**model_kwargs,
|
||||||
else:
|
)
|
||||||
model = getattr(transformers, model_type).from_pretrained(
|
|
||||||
base_model,
|
|
||||||
device_map=cfg.device_map,
|
|
||||||
load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
|
|
||||||
load_in_4bit=cfg.load_in_4bit and cfg.adapter is not None,
|
|
||||||
torch_dtype=cfg.torch_dtype,
|
|
||||||
trust_remote_code=cfg.trust_remote_code or False,
|
|
||||||
**model_kwargs,
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
config = AutoConfig.from_pretrained(
|
config = AutoConfig.from_pretrained(
|
||||||
base_model,
|
base_model,
|
||||||
@@ -313,12 +357,11 @@ def load_model(
|
|||||||
module.to(torch.float32)
|
module.to(torch.float32)
|
||||||
|
|
||||||
needs_fa2_dtype = cfg.adapter or cfg.fsdp
|
needs_fa2_dtype = cfg.adapter or cfg.fsdp
|
||||||
if (cfg.adapter == "lora" and load_in_8bit) or (
|
if not cfg.gptq and (
|
||||||
cfg.adapter == "qlora" and cfg.load_in_4bit
|
(cfg.adapter == "lora" and load_in_8bit)
|
||||||
|
or (cfg.adapter == "qlora" and cfg.load_in_4bit)
|
||||||
):
|
):
|
||||||
LOG.info("converting PEFT model w/ prepare_model_for_kbit_training")
|
LOG.info("converting PEFT model w/ prepare_model_for_kbit_training")
|
||||||
if cfg.gradient_checkpointing:
|
|
||||||
model.gradient_checkpointing_enable()
|
|
||||||
model = prepare_model_for_kbit_training(
|
model = prepare_model_for_kbit_training(
|
||||||
model, use_gradient_checkpointing=cfg.gradient_checkpointing
|
model, use_gradient_checkpointing=cfg.gradient_checkpointing
|
||||||
)
|
)
|
||||||
@@ -340,10 +383,22 @@ def load_model(
|
|||||||
if cfg.ddp and not load_in_8bit:
|
if cfg.ddp and not load_in_8bit:
|
||||||
model.to(f"cuda:{cfg.local_rank}")
|
model.to(f"cuda:{cfg.local_rank}")
|
||||||
|
|
||||||
|
if cfg.gptq:
|
||||||
|
# Scales to half
|
||||||
|
LOG.info("Fitting 4bit scales and zeros to half")
|
||||||
|
for _, module in model.named_modules():
|
||||||
|
if "Autograd4bitQuantLinear" in str(type(module)) or "Linear4bitLt" in str(
|
||||||
|
type(module)
|
||||||
|
):
|
||||||
|
if hasattr(module, "is_v1_model") and module.is_v1_model:
|
||||||
|
module.zeros = module.zeros.half()
|
||||||
|
module.scales = module.scales.half()
|
||||||
|
module.bias = module.bias.half()
|
||||||
|
|
||||||
if (
|
if (
|
||||||
torch.cuda.device_count() > 1
|
torch.cuda.device_count() > 1
|
||||||
and int(os.getenv("WORLD_SIZE", "1")) > 1
|
and int(os.getenv("WORLD_SIZE", "1")) > 1
|
||||||
and (cfg.load_in_4bit)
|
and (cfg.gptq or cfg.load_in_4bit)
|
||||||
):
|
):
|
||||||
# llama is PROBABLY model parallelizable, but the default isn't that it is
|
# llama is PROBABLY model parallelizable, but the default isn't that it is
|
||||||
# so let's only set it for the 4bit, see
|
# so let's only set it for the 4bit, see
|
||||||
@@ -369,15 +424,15 @@ def load_model(
|
|||||||
return model, lora_config
|
return model, lora_config
|
||||||
|
|
||||||
|
|
||||||
def load_adapter(model, cfg, adapter, inference=False):
|
def load_adapter(model, cfg, adapter):
|
||||||
# type: (PreTrainedModel, DictDefault, Optional[str], bool) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
|
# type: (PreTrainedModel, DictDefault, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
|
||||||
|
|
||||||
if adapter is None:
|
if adapter is None:
|
||||||
return model, None
|
return model, None
|
||||||
if hasattr(model, "enable_input_require_grads"):
|
if hasattr(model, "enable_input_require_grads"):
|
||||||
model.enable_input_require_grads()
|
model.enable_input_require_grads()
|
||||||
if adapter in ["lora", "qlora"]:
|
if adapter in ["lora", "qlora"]:
|
||||||
return load_lora(model, cfg, inference=inference)
|
return load_lora(model, cfg)
|
||||||
if adapter == "llama-adapter":
|
if adapter == "llama-adapter":
|
||||||
return load_llama_adapter(model, cfg)
|
return load_llama_adapter(model, cfg)
|
||||||
|
|
||||||
@@ -409,8 +464,12 @@ def load_llama_adapter(model, cfg):
|
|||||||
return model, peft_config
|
return model, peft_config
|
||||||
|
|
||||||
|
|
||||||
def find_all_linear_names(model):
|
def find_all_linear_names(bits, model):
|
||||||
cls = (bnb.nn.Linear4bit, bnb.nn.Linear8bitLt, torch.nn.Linear)
|
cls = (
|
||||||
|
bnb.nn.Linear4bit
|
||||||
|
if bits == 4
|
||||||
|
else (bnb.nn.Linear8bitLt if bits == 8 else torch.nn.Linear)
|
||||||
|
)
|
||||||
lora_module_names = set()
|
lora_module_names = set()
|
||||||
for name, module in model.named_modules():
|
for name, module in model.named_modules():
|
||||||
if isinstance(module, cls):
|
if isinstance(module, cls):
|
||||||
@@ -423,15 +482,21 @@ def find_all_linear_names(model):
|
|||||||
return list(lora_module_names)
|
return list(lora_module_names)
|
||||||
|
|
||||||
|
|
||||||
def load_lora(model, cfg, inference=False):
|
def load_lora(model, cfg):
|
||||||
# type: (PreTrainedModel, DictDefault, bool) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
|
# type: (PreTrainedModel, DictDefault) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
|
||||||
|
|
||||||
from peft import LoraConfig, PeftModel, get_peft_model
|
from peft import LoraConfig, PeftModel, get_peft_model
|
||||||
|
|
||||||
lora_target_modules = list(cfg.lora_target_modules or [])
|
lora_target_modules = list(cfg.lora_target_modules or [])
|
||||||
|
|
||||||
if cfg.lora_target_linear:
|
if cfg.lora_target_linear:
|
||||||
linear_names = find_all_linear_names(model)
|
bits = None
|
||||||
|
if cfg.load_in_4bit:
|
||||||
|
bits = 4
|
||||||
|
elif cfg.load_in_8bit:
|
||||||
|
bits = 8
|
||||||
|
|
||||||
|
linear_names = find_all_linear_names(bits, model)
|
||||||
LOG.info(f"found linear modules: {repr(linear_names)}")
|
LOG.info(f"found linear modules: {repr(linear_names)}")
|
||||||
lora_target_modules = list(set(lora_target_modules + linear_names))
|
lora_target_modules = list(set(lora_target_modules + linear_names))
|
||||||
|
|
||||||
@@ -451,7 +516,7 @@ def load_lora(model, cfg, inference=False):
|
|||||||
model = PeftModel.from_pretrained(
|
model = PeftModel.from_pretrained(
|
||||||
model,
|
model,
|
||||||
cfg.lora_model_dir,
|
cfg.lora_model_dir,
|
||||||
is_trainable=(not inference),
|
is_trainable=not cfg.inference,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
model = get_peft_model(model, lora_config)
|
model = get_peft_model(model, lora_config)
|
||||||
|
|||||||
@@ -12,9 +12,15 @@ from typing import Optional, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch.cuda
|
import torch.cuda
|
||||||
|
import transformers
|
||||||
from datasets import Dataset, set_caching_enabled
|
from datasets import Dataset, set_caching_enabled
|
||||||
from torch.optim.lr_scheduler import OneCycleLR
|
from torch.optim.lr_scheduler import OneCycleLR
|
||||||
from torch.utils.data import DataLoader, DistributedSampler, RandomSampler
|
from torch.utils.data import (
|
||||||
|
DataLoader,
|
||||||
|
DistributedSampler,
|
||||||
|
RandomSampler,
|
||||||
|
SequentialSampler,
|
||||||
|
)
|
||||||
from transformers import EarlyStoppingCallback, Trainer, TrainingArguments
|
from transformers import EarlyStoppingCallback, Trainer, TrainingArguments
|
||||||
from transformers.trainer_pt_utils import SequentialDistributedSampler
|
from transformers.trainer_pt_utils import SequentialDistributedSampler
|
||||||
|
|
||||||
@@ -23,6 +29,7 @@ from axolotl.utils.callbacks import (
|
|||||||
GPUStatsCallback,
|
GPUStatsCallback,
|
||||||
SaveBetterTransformerModelCallback,
|
SaveBetterTransformerModelCallback,
|
||||||
SavePeftModelCallback,
|
SavePeftModelCallback,
|
||||||
|
bench_eval_callback_factory,
|
||||||
)
|
)
|
||||||
from axolotl.utils.collators import DataCollatorForSeq2Seq
|
from axolotl.utils.collators import DataCollatorForSeq2Seq
|
||||||
from axolotl.utils.dataloader import MultipackDistributedDataloader
|
from axolotl.utils.dataloader import MultipackDistributedDataloader
|
||||||
@@ -127,6 +134,27 @@ class AxolotlTrainingArguments(TrainingArguments):
|
|||||||
default=None,
|
default=None,
|
||||||
metadata={"help": "how many warmup steps to take after reset for ReLoRA"},
|
metadata={"help": "how many warmup steps to take after reset for ReLoRA"},
|
||||||
)
|
)
|
||||||
|
bench_split: Optional[str] = field(
|
||||||
|
default="eval", metadata={"help": "The benchmark split to run on"}
|
||||||
|
)
|
||||||
|
bench_dataset: Optional[str] = field(
|
||||||
|
default="pharaouk/dharma-1/dharma_1_mini.json",
|
||||||
|
metadata={
|
||||||
|
"help": "Benchmark dataset to use: options are `mmlu-zs`, `mmlu-fs`, or the full path to the dataset file"
|
||||||
|
},
|
||||||
|
)
|
||||||
|
do_bench_eval: Optional[bool] = field(
|
||||||
|
default=False, metadata={"help": "Whether to run the Benchmark evaluation."}
|
||||||
|
)
|
||||||
|
max_bench_samples: Optional[int] = field(
|
||||||
|
default=None,
|
||||||
|
metadata={
|
||||||
|
"help": "If set, only evaluates on `max_bench_samples` of the benchmark dataset."
|
||||||
|
},
|
||||||
|
)
|
||||||
|
bench_source_max_len: int = field(
|
||||||
|
default=2048, metadata={"help": "Maximum source sequence length for bench."}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class AxolotlTrainer(Trainer):
|
class AxolotlTrainer(Trainer):
|
||||||
@@ -136,6 +164,10 @@ class AxolotlTrainer(Trainer):
|
|||||||
|
|
||||||
args = None # type: AxolotlTrainingArguments
|
args = None # type: AxolotlTrainingArguments
|
||||||
|
|
||||||
|
def __init__(self, *args, bench_data_collator=None, **kwargs):
|
||||||
|
self.bench_data_collator = bench_data_collator
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
def create_scheduler(
|
def create_scheduler(
|
||||||
self, num_training_steps: int, optimizer: torch.optim.Optimizer = None
|
self, num_training_steps: int, optimizer: torch.optim.Optimizer = None
|
||||||
):
|
):
|
||||||
@@ -226,6 +258,31 @@ class AxolotlTrainer(Trainer):
|
|||||||
)
|
)
|
||||||
return super().get_eval_dataloader(eval_dataset)
|
return super().get_eval_dataloader(eval_dataset)
|
||||||
|
|
||||||
|
def _get_bench_sampler(
|
||||||
|
self, bench_dataset: Dataset
|
||||||
|
) -> Optional[torch.utils.data.Sampler]:
|
||||||
|
if self.args.world_size <= 1:
|
||||||
|
return SequentialSampler(bench_dataset)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_bench_dataloader(
|
||||||
|
self,
|
||||||
|
bench_dataset: Dataset,
|
||||||
|
) -> Union[DataLoader, MultipackDistributedDataloader]:
|
||||||
|
dataloader_params = {
|
||||||
|
"batch_size": self.args.eval_batch_size,
|
||||||
|
"collate_fn": self.bench_data_collator,
|
||||||
|
"num_workers": self.args.dataloader_num_workers,
|
||||||
|
"pin_memory": self.args.dataloader_pin_memory,
|
||||||
|
}
|
||||||
|
|
||||||
|
if not isinstance(bench_dataset, torch.utils.data.IterableDataset):
|
||||||
|
dataloader_params["sampler"] = self._get_bench_sampler(bench_dataset)
|
||||||
|
dataloader_params["drop_last"] = self.args.dataloader_drop_last
|
||||||
|
|
||||||
|
return DataLoader(bench_dataset, **dataloader_params)
|
||||||
|
# return self.accelerator.prepare(DataLoader(bench_dataset, **dataloader_params))
|
||||||
|
|
||||||
def compute_loss(self, model, inputs, return_outputs=False):
|
def compute_loss(self, model, inputs, return_outputs=False):
|
||||||
# use one's weighted cross entropy loss calc
|
# use one's weighted cross entropy loss calc
|
||||||
# if self.args.sample_packing:
|
# if self.args.sample_packing:
|
||||||
@@ -447,7 +504,23 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_
|
|||||||
training_arguments_kwargs["seed"] = cfg.seed
|
training_arguments_kwargs["seed"] = cfg.seed
|
||||||
|
|
||||||
if cfg.gradient_checkpointing:
|
if cfg.gradient_checkpointing:
|
||||||
training_arguments_kwargs["gradient_checkpointing"] = cfg.gradient_checkpointing
|
if cfg.gptq:
|
||||||
|
from alpaca_lora_4bit.gradient_checkpointing import (
|
||||||
|
apply_gradient_checkpointing,
|
||||||
|
)
|
||||||
|
|
||||||
|
gradient_checkpointing_ratio = (
|
||||||
|
cfg.gradient_checkpointing_ratio
|
||||||
|
if cfg.gradient_checkpointing_ratio
|
||||||
|
else 1.0
|
||||||
|
)
|
||||||
|
apply_gradient_checkpointing(
|
||||||
|
model, checkpoint_ratio=gradient_checkpointing_ratio
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
training_arguments_kwargs[
|
||||||
|
"gradient_checkpointing"
|
||||||
|
] = cfg.gradient_checkpointing
|
||||||
if cfg.fsdp:
|
if cfg.fsdp:
|
||||||
training_arguments_kwargs["fsdp"] = cfg.fsdp
|
training_arguments_kwargs["fsdp"] = cfg.fsdp
|
||||||
if cfg.fsdp_config:
|
if cfg.fsdp_config:
|
||||||
@@ -501,6 +574,11 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_
|
|||||||
"steps" if cfg.save_steps else "epoch"
|
"steps" if cfg.save_steps else "epoch"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if cfg.do_bench_eval:
|
||||||
|
training_arguments_kwargs["do_bench_eval"] = cfg.do_bench_eval
|
||||||
|
if cfg.bench_dataset:
|
||||||
|
training_arguments_kwargs["bench_dataset"] = cfg.bench_dataset
|
||||||
|
|
||||||
training_args = AxolotlTrainingArguments( # pylint: disable=unexpected-keyword-arg
|
training_args = AxolotlTrainingArguments( # pylint: disable=unexpected-keyword-arg
|
||||||
max_steps=total_num_steps if cfg.max_steps else -1,
|
max_steps=total_num_steps if cfg.max_steps else -1,
|
||||||
max_seq_length=cfg.sequence_len,
|
max_seq_length=cfg.sequence_len,
|
||||||
@@ -569,12 +647,10 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_
|
|||||||
callbacks.append(SaveBetterTransformerModelCallback)
|
callbacks.append(SaveBetterTransformerModelCallback)
|
||||||
|
|
||||||
data_collator_kwargs = {
|
data_collator_kwargs = {
|
||||||
"padding": True, # True/"longest" is the default
|
"padding": True,
|
||||||
}
|
}
|
||||||
if cfg.pad_to_sequence_len:
|
if cfg.collator_pad_to_longest:
|
||||||
data_collator_kwargs["pad_to_multiple_of"] = 64 * math.ceil(
|
data_collator_kwargs["padding"] = "longest"
|
||||||
cfg.sequence_len / 64
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
# A100 is best at 64, while others at 8. Let's use the larger so we don't have to check
|
# A100 is best at 64, while others at 8. Let's use the larger so we don't have to check
|
||||||
# https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html
|
# https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html
|
||||||
@@ -613,8 +689,16 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_
|
|||||||
return_tensors="pt",
|
return_tensors="pt",
|
||||||
**data_collator_kwargs,
|
**data_collator_kwargs,
|
||||||
),
|
),
|
||||||
|
bench_data_collator=transformers.DataCollatorForSeq2Seq(
|
||||||
|
tokenizer,
|
||||||
|
return_tensors="pt",
|
||||||
|
**data_collator_kwargs,
|
||||||
|
),
|
||||||
callbacks=callbacks,
|
callbacks=callbacks,
|
||||||
**trainer_kwargs,
|
**trainer_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if cfg.do_bench_eval:
|
||||||
|
trainer.add_callback(bench_eval_callback_factory(trainer, tokenizer))
|
||||||
|
|
||||||
return trainer
|
return trainer
|
||||||
|
|||||||
Reference in New Issue
Block a user