diff --git a/deepspeed_configs/zero1.json b/deepspeed_configs/zero1.json index 787fc0d6b..8a57b8605 100644 --- a/deepspeed_configs/zero1.json +++ b/deepspeed_configs/zero1.json @@ -16,6 +16,7 @@ "min_loss_scale": 1 }, "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", "train_batch_size": "auto", "train_micro_batch_size_per_gpu": "auto", "wall_clock_breakdown": false diff --git a/deepspeed_configs/zero2.json b/deepspeed_configs/zero2.json index 5b22d996c..153ac0280 100644 --- a/deepspeed_configs/zero2.json +++ b/deepspeed_configs/zero2.json @@ -20,6 +20,7 @@ "min_loss_scale": 1 }, "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", "train_batch_size": "auto", "train_micro_batch_size_per_gpu": "auto", "wall_clock_breakdown": false diff --git a/deepspeed_configs/zero3.json b/deepspeed_configs/zero3.json index a185afab4..90ec3677e 100644 --- a/deepspeed_configs/zero3.json +++ b/deepspeed_configs/zero3.json @@ -24,6 +24,7 @@ "min_loss_scale": 1 }, "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", "train_batch_size": "auto", "train_micro_batch_size_per_gpu": "auto", "wall_clock_breakdown": false diff --git a/deepspeed_configs/zero3_bf16.json b/deepspeed_configs/zero3_bf16.json index 263caa393..16e64d76b 100644 --- a/deepspeed_configs/zero3_bf16.json +++ b/deepspeed_configs/zero3_bf16.json @@ -24,6 +24,7 @@ "min_loss_scale": 1 }, "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", "train_batch_size": "auto", "train_micro_batch_size_per_gpu": "auto", "wall_clock_breakdown": false