Revert "checkpoint model on first step callback (#2906)"

This reverts commit 10ba1622f7.
This commit is contained in:
Dan Saunders
2025-07-15 15:01:12 -04:00
committed by GitHub
parent 10ba1622f7
commit 6f6d917a99
146 changed files with 9 additions and 419 deletions

View File

@@ -69,7 +69,6 @@ class TestSequenceParallelism:
"use_tensorboard": True,
"sequence_parallel_degree": 2,
"ring_attn_func": ring_attn_func,
"save_first_step": False,
}
)

View File

@@ -61,7 +61,6 @@ class TestPackedFlex:
"max_steps": 2,
"use_tensorboard": True,
"save_strategy": "no",
"save_first_step": False,
}
)
if is_torch_bf16_gpu_available():

View File

@@ -223,7 +223,6 @@ def oai_gsm8k_transform(cfg, *args, **kwargs):
"save_safetensors": True,
"bf16": "auto",
"use_tensorboard": True,
"save_first_step": False,
}
)
@@ -318,7 +317,6 @@ def oai_gsm8k_transform(cfg, *args, **kwargs):
"save_safetensors": True,
"bf16": "auto",
"use_tensorboard": True,
"save_first_step": False,
}
)
@@ -411,7 +409,6 @@ def oai_gsm8k_transform(cfg, *args, **kwargs):
"save_safetensors": True,
"bf16": "auto",
"use_tensorboard": True,
"save_first_step": False,
}
)

View File

@@ -67,7 +67,6 @@ class TestMultiGPUEval:
"logging_steps": 1,
"weight_decay": 0.0,
"use_tensorboard": True,
"save_first_step": False,
}
)
@@ -139,7 +138,6 @@ class TestMultiGPUEval:
"logging_steps": 1,
"weight_decay": 0.0,
"use_tensorboard": True,
"save_first_step": False,
}
)

View File

@@ -71,7 +71,6 @@ class TestMultiGPUGemma3:
"flash_attention": True,
"use_tensorboard": True,
"bf16": True,
"save_first_step": False,
}
)

View File

@@ -69,7 +69,6 @@ class TestMultiGPULlama:
"flash_attention": True,
"use_tensorboard": True,
"bf16": True,
"save_first_step": False,
}
)
@@ -136,7 +135,6 @@ class TestMultiGPULlama:
"flash_attention": True,
"use_tensorboard": True,
"bf16": True,
"save_first_step": False,
}
)
@@ -212,7 +210,6 @@ class TestMultiGPULlama:
"flash_attention": True,
"use_tensorboard": True,
"bf16": True,
"save_first_step": False,
}
)
@@ -292,7 +289,6 @@ class TestMultiGPULlama:
"flash_attention": True,
"use_tensorboard": True,
"bf16": True,
"save_first_step": False,
}
)
@@ -369,7 +365,6 @@ class TestMultiGPULlama:
},
"use_tensorboard": True,
"seed": 42,
"save_first_step": False,
}
)
@@ -447,7 +442,6 @@ class TestMultiGPULlama:
"fsdp_auto_wrap_policy": "TRANSFORMER_BASED_WRAP",
},
"use_tensorboard": True,
"save_first_step": False,
}
)
@@ -526,7 +520,6 @@ class TestMultiGPULlama:
"fsdp_reshard_after_forward": fsdp_reshard_after_forward,
},
"use_tensorboard": True,
"save_first_step": False,
}
)
if attention_backend == "flash":
@@ -612,7 +605,6 @@ class TestMultiGPULlama:
"fsdp_auto_wrap_policy": "TRANSFORMER_BASED_WRAP",
},
"use_tensorboard": True,
"save_first_step": False,
}
)
@@ -697,7 +689,6 @@ class TestMultiGPULlama:
"flash_attention": True,
"deepspeed": str(AXOLOTL_ROOT / deepspeed),
"use_tensorboard": True,
"save_first_step": False,
**adapter,
}
)
@@ -774,7 +765,6 @@ class TestMultiGPULlama:
"deepspeed": str(AXOLOTL_ROOT / "deepspeed_configs/zero2.json"),
"use_tensorboard": True,
"seed": 42,
"save_first_step": False,
**adapter,
}
)
@@ -850,7 +840,6 @@ class TestMultiGPULlama:
"flash_attention": True,
"deepspeed": str(AXOLOTL_ROOT / "deepspeed_configs/zero1.json"),
"use_tensorboard": True,
"save_first_step": False,
**adapter,
}
)
@@ -919,7 +908,6 @@ class TestMultiGPULlama:
"save_safetensors": True,
# "deepspeed": str(AXOLOTL_ROOT / "deepspeed_configs/zero1.json"),
"use_tensorboard": True,
"save_first_step": False,
}
)

View File

@@ -56,7 +56,6 @@ class TestMultiGPURay:
"use_tensorboard": True,
"use_ray": True,
"ray_num_workers": 2,
"save_first_step": False,
}
)
@@ -116,7 +115,6 @@ class TestMultiGPURay:
"flash_attention": True,
"deepspeed": str(AXOLOTL_ROOT / "deepspeed_configs/zero2.json"),
"use_tensorboard": True,
"save_first_step": False,
}
)