Fix Deepspeed Zero3 Config (#791)

* Update zero3.json

Take away CPU Offload by default (Slows things down horribly, better off reducing batchsize), and changes LR Scheduler to a properly decaying one

* Update zero3.json

fix something
This commit is contained in:
Teknium
2023-10-27 18:57:02 -07:00
committed by GitHub
parent 2e71ff03a6
commit d3193beac3

View File

@@ -1,14 +1,6 @@
{ {
"zero_optimization": { "zero_optimization": {
"stage": 3, "stage": 3,
"offload_optimizer": {
"device": "cpu",
"pin_memory": true
},
"offload_param": {
"device": "cpu",
"pin_memory": true
},
"overlap_comm": true, "overlap_comm": true,
"contiguous_gradients": true, "contiguous_gradients": true,
"sub_group_size": 0, "sub_group_size": 0,
@@ -41,12 +33,13 @@
} }
}, },
"scheduler": { "scheduler": {
"type": "WarmupLR", "type": "WarmupDecayLR",
"params": { "params": {
"warmup_min_lr": "auto", "warmup_min_lr": "auto",
"warmup_max_lr": "auto", "warmup_max_lr": "auto",
"warmup_num_steps": "auto", "warmup_num_steps": "auto",
"warmup_type": "linear" "warmup_type": "linear",
"total_num_steps": "auto"
} }
}, },
"gradient_accumulation_steps": "auto", "gradient_accumulation_steps": "auto",