diff --git a/deepspeed_configs/zero3.json b/deepspeed_configs/zero3.json index 90ec3677e..a648cbe81 100644 --- a/deepspeed_configs/zero3.json +++ b/deepspeed_configs/zero3.json @@ -5,11 +5,11 @@ "contiguous_gradients": true, "sub_group_size": 0, "reduce_bucket_size": "auto", - "stage3_prefetch_bucket_size": "auto", - "stage3_param_persistence_threshold": "auto", - "stage3_max_live_parameters": 0, - "stage3_max_reuse_distance": 0, - "stage3_gather_16bit_weights_on_model_save": true + "prefetch_bucket_size": "auto", + "param_persistence_threshold": "auto", + "max_live_parameters": 0, + "max_reuse_distance": 0, + "gather_16bit_weights_on_model_save": true }, "bf16": { "enabled": "auto" diff --git a/deepspeed_configs/zero3_bf16.json b/deepspeed_configs/zero3_bf16.json index 49fb75755..4d31a1531 100644 --- a/deepspeed_configs/zero3_bf16.json +++ b/deepspeed_configs/zero3_bf16.json @@ -5,11 +5,11 @@ "contiguous_gradients": true, "sub_group_size": 0, "reduce_bucket_size": "auto", - "stage3_prefetch_bucket_size": "auto", - "stage3_param_persistence_threshold": "auto", - "stage3_max_live_parameters": 0, - "stage3_max_reuse_distance": 0, - "stage3_gather_16bit_weights_on_model_save": true + "prefetch_bucket_size": "auto", + "param_persistence_threshold": "auto", + "max_live_parameters": 0, + "max_reuse_distance": 0, + "gather_16bit_weights_on_model_save": true }, "bf16": { "enabled": true diff --git a/deepspeed_configs/zero3_bf16_cpuoffload_all.json b/deepspeed_configs/zero3_bf16_cpuoffload_all.json index 3ccc66db4..52fe9cdd4 100644 --- a/deepspeed_configs/zero3_bf16_cpuoffload_all.json +++ b/deepspeed_configs/zero3_bf16_cpuoffload_all.json @@ -15,11 +15,11 @@ "contiguous_gradients": true, "sub_group_size": 0, "reduce_bucket_size": "auto", - "stage3_prefetch_bucket_size": "auto", - "stage3_param_persistence_threshold": "auto", - "stage3_max_live_parameters": 0, - "stage3_max_reuse_distance": 0, - "stage3_gather_16bit_weights_on_model_save": true + "prefetch_bucket_size": "auto", + "param_persistence_threshold": "auto", + "max_live_parameters": 0, + "max_reuse_distance": 0, + "gather_16bit_weights_on_model_save": true }, "bf16": { "enabled": true diff --git a/deepspeed_configs/zero3_bf16_cpuoffload_params.json b/deepspeed_configs/zero3_bf16_cpuoffload_params.json index fe21d35f8..81ac1d1d8 100644 --- a/deepspeed_configs/zero3_bf16_cpuoffload_params.json +++ b/deepspeed_configs/zero3_bf16_cpuoffload_params.json @@ -11,11 +11,11 @@ "contiguous_gradients": true, "sub_group_size": 0, "reduce_bucket_size": "auto", - "stage3_prefetch_bucket_size": "auto", - "stage3_param_persistence_threshold": "auto", - "stage3_max_live_parameters": 0, - "stage3_max_reuse_distance": 0, - "stage3_gather_16bit_weights_on_model_save": true + "prefetch_bucket_size": "auto", + "param_persistence_threshold": "auto", + "max_live_parameters": 0, + "max_reuse_distance": 0, + "gather_16bit_weights_on_model_save": true }, "bf16": { "enabled": true