From b0ee9ec7346f8f643fafac5a1dbbf529ef12cb61 Mon Sep 17 00:00:00 2001 From: Seungduk Kim Date: Mon, 11 Mar 2024 09:50:12 +0900 Subject: [PATCH] Set `gradient_clipping` to `auto` in DeepSpeed configs (#1382) [skip ci] --- deepspeed_configs/zero1.json | 1 + deepspeed_configs/zero2.json | 1 + deepspeed_configs/zero3.json | 1 + deepspeed_configs/zero3_bf16.json | 1 + 4 files changed, 4 insertions(+) diff --git a/deepspeed_configs/zero1.json b/deepspeed_configs/zero1.json index 787fc0d6b..8a57b8605 100644 --- a/deepspeed_configs/zero1.json +++ b/deepspeed_configs/zero1.json @@ -16,6 +16,7 @@ "min_loss_scale": 1 }, "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", "train_batch_size": "auto", "train_micro_batch_size_per_gpu": "auto", "wall_clock_breakdown": false diff --git a/deepspeed_configs/zero2.json b/deepspeed_configs/zero2.json index 5b22d996c..153ac0280 100644 --- a/deepspeed_configs/zero2.json +++ b/deepspeed_configs/zero2.json @@ -20,6 +20,7 @@ "min_loss_scale": 1 }, "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", "train_batch_size": "auto", "train_micro_batch_size_per_gpu": "auto", "wall_clock_breakdown": false diff --git a/deepspeed_configs/zero3.json b/deepspeed_configs/zero3.json index a185afab4..90ec3677e 100644 --- a/deepspeed_configs/zero3.json +++ b/deepspeed_configs/zero3.json @@ -24,6 +24,7 @@ "min_loss_scale": 1 }, "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", "train_batch_size": "auto", "train_micro_batch_size_per_gpu": "auto", "wall_clock_breakdown": false diff --git a/deepspeed_configs/zero3_bf16.json b/deepspeed_configs/zero3_bf16.json index 263caa393..16e64d76b 100644 --- a/deepspeed_configs/zero3_bf16.json +++ b/deepspeed_configs/zero3_bf16.json @@ -24,6 +24,7 @@ "min_loss_scale": 1 }, "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", "train_batch_size": "auto", "train_micro_batch_size_per_gpu": "auto", "wall_clock_breakdown": false