update README w deepspeed info (#605)

This commit is contained in:
Wing Lian
2023-09-22 00:15:52 -04:00
committed by GitHub
parent d5f8589021
commit c25ba7939b
6 changed files with 114 additions and 88 deletions

View File

@@ -31,6 +31,7 @@ Features:
- [How to Use Custom Pretokenized Dataset](#how-to-use-your-custom-pretokenized-dataset) - [How to Use Custom Pretokenized Dataset](#how-to-use-your-custom-pretokenized-dataset)
- [Config](#config) - [Config](#config)
- [Train](#train) - [Train](#train)
- [Training w/ Deepspeed](#training-with-deepspeed)
- [Inference](#inference) - [Inference](#inference)
- [Merge LORA to Base](#merge-lora-to-base) - [Merge LORA to Base](#merge-lora-to-base)
- [Common Errors](#common-errors-) - [Common Errors](#common-errors-)
@@ -86,7 +87,7 @@ git clone https://github.com/OpenAccess-AI-Collective/axolotl
cd axolotl cd axolotl
pip3 install packaging pip3 install packaging
pip3 install -e .[flash-attn] pip3 install -e .[flash-attn,deepspeed]
pip3 install -U git+https://github.com/huggingface/peft.git pip3 install -U git+https://github.com/huggingface/peft.git
# finetune lora # finetune lora
@@ -121,7 +122,7 @@ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
3. Install axolotl along with python dependencies 3. Install axolotl along with python dependencies
```bash ```bash
pip3 install packaging pip3 install packaging
pip3 install -e .[flash-attn] pip3 install -e .[flash-attn,deepspeed]
``` ```
- LambdaLabs - LambdaLabs
@@ -157,7 +158,7 @@ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
cd axolotl cd axolotl
pip3 install packaging pip3 install packaging
pip3 install -e .[flash-attn] pip3 install -e .[flash-attn,deepspeed]
pip3 install protobuf==3.20.3 pip3 install protobuf==3.20.3
pip3 install -U --ignore-installed requests Pillow psutil scipy pip3 install -U --ignore-installed requests Pillow psutil scipy
``` ```
@@ -715,11 +716,6 @@ fsdp_config:
fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
``` ```
- llama Deepspeed
```yaml
deepspeed: deepspeed/zero3.json
```
##### Weights & Biases Logging ##### Weights & Biases Logging
- wandb options - wandb options
@@ -732,6 +728,24 @@ wandb_run_id:
wandb_log_model: wandb_log_model:
``` ```
### Training with Deepspeed
Deepspeed is an optimization suite for multi-gpu systems allowing you to train much larger models than you
might typically be able to fit into your GPU's VRAM. More information about the various optimization types
for deepspeed is available at https://huggingface.co/docs/accelerate/main/en/usage_guides/deepspeed#what-is-integrated
We provide several default deepspeed JSON configurations for ZeRO stage 1, 2, and 3.
```shell
accelerate launch -m axolotl.cli.train examples/llama-2/config.py --deepspeed deepspeed/zero1.json
```
or
```yaml
deepspeed: deepspeed/zero1.json
```
### Inference ### Inference
Pass the appropriate flag to the train command: Pass the appropriate flag to the train command:

View File

@@ -1,39 +1,41 @@
{ {
"zero_optimization": { "zero_optimization": {
"stage": 1, "stage": 1,
"overlap_comm": true "overlap_comm": true
}, },
"bf16": { "bf16": {
"enabled": "auto" "enabled": "auto"
}, },
"fp16": { "fp16": {
"enabled": "auto", "enabled": "auto",
"auto_cast": false, "auto_cast": false,
"loss_scale": 0, "loss_scale": 0,
"initial_scale_power": 32, "initial_scale_power": 32,
"loss_scale_window": 1000, "loss_scale_window": 1000,
"hysteresis": 2, "hysteresis": 2,
"min_loss_scale": 1 "min_loss_scale": 1
}, },
"optimizer": { "optimizer": {
"type": "AdamW", "type": "AdamW",
"params": { "params": {
"lr": "auto", "lr": "auto",
"betas": "auto", "betas": "auto",
"eps": "auto", "eps": "auto",
"weight_decay": "auto" "weight_decay": "auto"
} }
}, },
"scheduler": { "scheduler": {
"type": "WarmupDecayLR", "type": "WarmupDecayLR",
"params": { "params": {
"warmup_min_lr": "auto", "warmup_min_lr": "auto",
"warmup_max_lr": "auto", "warmup_max_lr": "auto",
"warmup_num_steps": "auto", "warmup_num_steps": "auto",
"total_num_steps": "auto" "warmup_type": "linear",
} "total_num_steps": "auto"
}, }
"train_batch_size": "auto", },
"train_micro_batch_size_per_gpu": "auto", "gradient_accumulation_steps": "auto",
"wall_clock_breakdown": false "train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false
} }

View File

@@ -1,43 +1,45 @@
{ {
"zero_optimization": { "zero_optimization": {
"stage": 2, "stage": 2,
"offload_optimizer": { "offload_optimizer": {
"device": "cpu" "device": "cpu"
},
"contiguous_gradients": true,
"overlap_comm": true
}, },
"bf16": { "contiguous_gradients": true,
"enabled": "auto" "overlap_comm": true
}, },
"fp16": { "bf16": {
"enabled": "auto", "enabled": "auto"
"auto_cast": false, },
"loss_scale": 0, "fp16": {
"initial_scale_power": 32, "enabled": "auto",
"loss_scale_window": 1000, "auto_cast": false,
"hysteresis": 2, "loss_scale": 0,
"min_loss_scale": 1 "initial_scale_power": 32,
}, "loss_scale_window": 1000,
"optimizer": { "hysteresis": 2,
"type": "AdamW", "min_loss_scale": 1
"params": { },
"lr": "auto", "optimizer": {
"betas": "auto", "type": "AdamW",
"eps": "auto", "params": {
"weight_decay": "auto" "lr": "auto",
} "betas": "auto",
}, "eps": "auto",
"scheduler": { "weight_decay": "auto"
"type": "WarmupDecayLR", }
"params": { },
"warmup_min_lr": "auto", "scheduler": {
"warmup_max_lr": "auto", "type": "WarmupDecayLR",
"warmup_num_steps": "auto", "params": {
"total_num_steps": "auto" "warmup_min_lr": "auto",
} "warmup_max_lr": "auto",
}, "warmup_num_steps": "auto",
"train_batch_size": "auto", "warmup_type": "linear",
"train_micro_batch_size_per_gpu": "auto", "total_num_steps": "auto"
"wall_clock_breakdown": false }
},
"gradient_accumulation_steps": "auto",
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false
} }

View File

@@ -45,9 +45,11 @@
"params": { "params": {
"warmup_min_lr": "auto", "warmup_min_lr": "auto",
"warmup_max_lr": "auto", "warmup_max_lr": "auto",
"warmup_num_steps": "auto" "warmup_num_steps": "auto",
"warmup_type": "linear"
} }
}, },
"gradient_accumulation_steps": "auto",
"train_batch_size": "auto", "train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto", "train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false "wall_clock_breakdown": false

View File

@@ -7,6 +7,7 @@ peft @ git+https://github.com/huggingface/peft.git
transformers @ git+https://github.com/huggingface/transformers.git transformers @ git+https://github.com/huggingface/transformers.git
bitsandbytes>=0.41.1 bitsandbytes>=0.41.1
accelerate @ git+https://github.com/huggingface/accelerate accelerate @ git+https://github.com/huggingface/accelerate
deepspeed
addict addict
evaluate evaluate
fire fire

View File

@@ -13,7 +13,12 @@ def parse_requirements():
# Handle custom index URLs # Handle custom index URLs
_, url = line.split() _, url = line.split()
_dependency_links.append(url) _dependency_links.append(url)
elif "flash-attn" not in line and line and line[0] != "#": elif (
"flash-attn" not in line
and "deepspeed" not in line
and line
and line[0] != "#"
):
# Handle standard packages # Handle standard packages
_install_requires.append(line) _install_requires.append(line)
return _install_requires, _dependency_links return _install_requires, _dependency_links
@@ -35,7 +40,7 @@ setup(
"flash-attn": [ "flash-attn": [
"flash-attn>=2.2.1", "flash-attn>=2.2.1",
], ],
"extras": [ "deepspeed": [
"deepspeed", "deepspeed",
], ],
}, },