update README w deepspeed info (#605)
This commit is contained in:
30
README.md
30
README.md
@@ -31,6 +31,7 @@ Features:
|
|||||||
- [How to Use Custom Pretokenized Dataset](#how-to-use-your-custom-pretokenized-dataset)
|
- [How to Use Custom Pretokenized Dataset](#how-to-use-your-custom-pretokenized-dataset)
|
||||||
- [Config](#config)
|
- [Config](#config)
|
||||||
- [Train](#train)
|
- [Train](#train)
|
||||||
|
- [Training w/ Deepspeed](#training-with-deepspeed)
|
||||||
- [Inference](#inference)
|
- [Inference](#inference)
|
||||||
- [Merge LORA to Base](#merge-lora-to-base)
|
- [Merge LORA to Base](#merge-lora-to-base)
|
||||||
- [Common Errors](#common-errors-)
|
- [Common Errors](#common-errors-)
|
||||||
@@ -86,7 +87,7 @@ git clone https://github.com/OpenAccess-AI-Collective/axolotl
|
|||||||
cd axolotl
|
cd axolotl
|
||||||
|
|
||||||
pip3 install packaging
|
pip3 install packaging
|
||||||
pip3 install -e .[flash-attn]
|
pip3 install -e .[flash-attn,deepspeed]
|
||||||
pip3 install -U git+https://github.com/huggingface/peft.git
|
pip3 install -U git+https://github.com/huggingface/peft.git
|
||||||
|
|
||||||
# finetune lora
|
# finetune lora
|
||||||
@@ -121,7 +122,7 @@ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
|
|||||||
3. Install axolotl along with python dependencies
|
3. Install axolotl along with python dependencies
|
||||||
```bash
|
```bash
|
||||||
pip3 install packaging
|
pip3 install packaging
|
||||||
pip3 install -e .[flash-attn]
|
pip3 install -e .[flash-attn,deepspeed]
|
||||||
```
|
```
|
||||||
|
|
||||||
- LambdaLabs
|
- LambdaLabs
|
||||||
@@ -157,7 +158,7 @@ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
|
|||||||
cd axolotl
|
cd axolotl
|
||||||
|
|
||||||
pip3 install packaging
|
pip3 install packaging
|
||||||
pip3 install -e .[flash-attn]
|
pip3 install -e .[flash-attn,deepspeed]
|
||||||
pip3 install protobuf==3.20.3
|
pip3 install protobuf==3.20.3
|
||||||
pip3 install -U --ignore-installed requests Pillow psutil scipy
|
pip3 install -U --ignore-installed requests Pillow psutil scipy
|
||||||
```
|
```
|
||||||
@@ -715,11 +716,6 @@ fsdp_config:
|
|||||||
fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
|
fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
|
||||||
```
|
```
|
||||||
|
|
||||||
- llama Deepspeed
|
|
||||||
```yaml
|
|
||||||
deepspeed: deepspeed/zero3.json
|
|
||||||
```
|
|
||||||
|
|
||||||
##### Weights & Biases Logging
|
##### Weights & Biases Logging
|
||||||
|
|
||||||
- wandb options
|
- wandb options
|
||||||
@@ -732,6 +728,24 @@ wandb_run_id:
|
|||||||
wandb_log_model:
|
wandb_log_model:
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Training with Deepspeed
|
||||||
|
|
||||||
|
Deepspeed is an optimization suite for multi-gpu systems allowing you to train much larger models than you
|
||||||
|
might typically be able to fit into your GPU's VRAM. More information about the various optimization types
|
||||||
|
for deepspeed is available at https://huggingface.co/docs/accelerate/main/en/usage_guides/deepspeed#what-is-integrated
|
||||||
|
|
||||||
|
We provide several default deepspeed JSON configurations for ZeRO stage 1, 2, and 3.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
accelerate launch -m axolotl.cli.train examples/llama-2/config.py --deepspeed deepspeed/zero1.json
|
||||||
|
```
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
deepspeed: deepspeed/zero1.json
|
||||||
|
```
|
||||||
|
|
||||||
### Inference
|
### Inference
|
||||||
|
|
||||||
Pass the appropriate flag to the train command:
|
Pass the appropriate flag to the train command:
|
||||||
|
|||||||
@@ -1,39 +1,41 @@
|
|||||||
{
|
{
|
||||||
"zero_optimization": {
|
"zero_optimization": {
|
||||||
"stage": 1,
|
"stage": 1,
|
||||||
"overlap_comm": true
|
"overlap_comm": true
|
||||||
},
|
},
|
||||||
"bf16": {
|
"bf16": {
|
||||||
"enabled": "auto"
|
"enabled": "auto"
|
||||||
},
|
},
|
||||||
"fp16": {
|
"fp16": {
|
||||||
"enabled": "auto",
|
"enabled": "auto",
|
||||||
"auto_cast": false,
|
"auto_cast": false,
|
||||||
"loss_scale": 0,
|
"loss_scale": 0,
|
||||||
"initial_scale_power": 32,
|
"initial_scale_power": 32,
|
||||||
"loss_scale_window": 1000,
|
"loss_scale_window": 1000,
|
||||||
"hysteresis": 2,
|
"hysteresis": 2,
|
||||||
"min_loss_scale": 1
|
"min_loss_scale": 1
|
||||||
},
|
},
|
||||||
"optimizer": {
|
"optimizer": {
|
||||||
"type": "AdamW",
|
"type": "AdamW",
|
||||||
"params": {
|
"params": {
|
||||||
"lr": "auto",
|
"lr": "auto",
|
||||||
"betas": "auto",
|
"betas": "auto",
|
||||||
"eps": "auto",
|
"eps": "auto",
|
||||||
"weight_decay": "auto"
|
"weight_decay": "auto"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"scheduler": {
|
"scheduler": {
|
||||||
"type": "WarmupDecayLR",
|
"type": "WarmupDecayLR",
|
||||||
"params": {
|
"params": {
|
||||||
"warmup_min_lr": "auto",
|
"warmup_min_lr": "auto",
|
||||||
"warmup_max_lr": "auto",
|
"warmup_max_lr": "auto",
|
||||||
"warmup_num_steps": "auto",
|
"warmup_num_steps": "auto",
|
||||||
"total_num_steps": "auto"
|
"warmup_type": "linear",
|
||||||
}
|
"total_num_steps": "auto"
|
||||||
},
|
}
|
||||||
"train_batch_size": "auto",
|
},
|
||||||
"train_micro_batch_size_per_gpu": "auto",
|
"gradient_accumulation_steps": "auto",
|
||||||
"wall_clock_breakdown": false
|
"train_batch_size": "auto",
|
||||||
|
"train_micro_batch_size_per_gpu": "auto",
|
||||||
|
"wall_clock_breakdown": false
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,43 +1,45 @@
|
|||||||
{
|
{
|
||||||
"zero_optimization": {
|
"zero_optimization": {
|
||||||
"stage": 2,
|
"stage": 2,
|
||||||
"offload_optimizer": {
|
"offload_optimizer": {
|
||||||
"device": "cpu"
|
"device": "cpu"
|
||||||
},
|
|
||||||
"contiguous_gradients": true,
|
|
||||||
"overlap_comm": true
|
|
||||||
},
|
},
|
||||||
"bf16": {
|
"contiguous_gradients": true,
|
||||||
"enabled": "auto"
|
"overlap_comm": true
|
||||||
},
|
},
|
||||||
"fp16": {
|
"bf16": {
|
||||||
"enabled": "auto",
|
"enabled": "auto"
|
||||||
"auto_cast": false,
|
},
|
||||||
"loss_scale": 0,
|
"fp16": {
|
||||||
"initial_scale_power": 32,
|
"enabled": "auto",
|
||||||
"loss_scale_window": 1000,
|
"auto_cast": false,
|
||||||
"hysteresis": 2,
|
"loss_scale": 0,
|
||||||
"min_loss_scale": 1
|
"initial_scale_power": 32,
|
||||||
},
|
"loss_scale_window": 1000,
|
||||||
"optimizer": {
|
"hysteresis": 2,
|
||||||
"type": "AdamW",
|
"min_loss_scale": 1
|
||||||
"params": {
|
},
|
||||||
"lr": "auto",
|
"optimizer": {
|
||||||
"betas": "auto",
|
"type": "AdamW",
|
||||||
"eps": "auto",
|
"params": {
|
||||||
"weight_decay": "auto"
|
"lr": "auto",
|
||||||
}
|
"betas": "auto",
|
||||||
},
|
"eps": "auto",
|
||||||
"scheduler": {
|
"weight_decay": "auto"
|
||||||
"type": "WarmupDecayLR",
|
}
|
||||||
"params": {
|
},
|
||||||
"warmup_min_lr": "auto",
|
"scheduler": {
|
||||||
"warmup_max_lr": "auto",
|
"type": "WarmupDecayLR",
|
||||||
"warmup_num_steps": "auto",
|
"params": {
|
||||||
"total_num_steps": "auto"
|
"warmup_min_lr": "auto",
|
||||||
}
|
"warmup_max_lr": "auto",
|
||||||
},
|
"warmup_num_steps": "auto",
|
||||||
"train_batch_size": "auto",
|
"warmup_type": "linear",
|
||||||
"train_micro_batch_size_per_gpu": "auto",
|
"total_num_steps": "auto"
|
||||||
"wall_clock_breakdown": false
|
}
|
||||||
|
},
|
||||||
|
"gradient_accumulation_steps": "auto",
|
||||||
|
"train_batch_size": "auto",
|
||||||
|
"train_micro_batch_size_per_gpu": "auto",
|
||||||
|
"wall_clock_breakdown": false
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -45,9 +45,11 @@
|
|||||||
"params": {
|
"params": {
|
||||||
"warmup_min_lr": "auto",
|
"warmup_min_lr": "auto",
|
||||||
"warmup_max_lr": "auto",
|
"warmup_max_lr": "auto",
|
||||||
"warmup_num_steps": "auto"
|
"warmup_num_steps": "auto",
|
||||||
|
"warmup_type": "linear"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"gradient_accumulation_steps": "auto",
|
||||||
"train_batch_size": "auto",
|
"train_batch_size": "auto",
|
||||||
"train_micro_batch_size_per_gpu": "auto",
|
"train_micro_batch_size_per_gpu": "auto",
|
||||||
"wall_clock_breakdown": false
|
"wall_clock_breakdown": false
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ peft @ git+https://github.com/huggingface/peft.git
|
|||||||
transformers @ git+https://github.com/huggingface/transformers.git
|
transformers @ git+https://github.com/huggingface/transformers.git
|
||||||
bitsandbytes>=0.41.1
|
bitsandbytes>=0.41.1
|
||||||
accelerate @ git+https://github.com/huggingface/accelerate
|
accelerate @ git+https://github.com/huggingface/accelerate
|
||||||
|
deepspeed
|
||||||
addict
|
addict
|
||||||
evaluate
|
evaluate
|
||||||
fire
|
fire
|
||||||
|
|||||||
9
setup.py
9
setup.py
@@ -13,7 +13,12 @@ def parse_requirements():
|
|||||||
# Handle custom index URLs
|
# Handle custom index URLs
|
||||||
_, url = line.split()
|
_, url = line.split()
|
||||||
_dependency_links.append(url)
|
_dependency_links.append(url)
|
||||||
elif "flash-attn" not in line and line and line[0] != "#":
|
elif (
|
||||||
|
"flash-attn" not in line
|
||||||
|
and "deepspeed" not in line
|
||||||
|
and line
|
||||||
|
and line[0] != "#"
|
||||||
|
):
|
||||||
# Handle standard packages
|
# Handle standard packages
|
||||||
_install_requires.append(line)
|
_install_requires.append(line)
|
||||||
return _install_requires, _dependency_links
|
return _install_requires, _dependency_links
|
||||||
@@ -35,7 +40,7 @@ setup(
|
|||||||
"flash-attn": [
|
"flash-attn": [
|
||||||
"flash-attn>=2.2.1",
|
"flash-attn>=2.2.1",
|
||||||
],
|
],
|
||||||
"extras": [
|
"deepspeed": [
|
||||||
"deepspeed",
|
"deepspeed",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user