checkpoint model on first step callback (#2906)

* checkpoint model on first step callback

* remove debug

* add test cases; update existing tests not to save on first step

* move test out of solo

* delete

* default to False

* typo
This commit is contained in:
Dan Saunders
2025-07-15 15:00:48 -04:00
committed by GitHub
parent d320ef6199
commit 10ba1622f7
146 changed files with 419 additions and 9 deletions

View File

@@ -35,7 +35,6 @@ wandb_watch:
wandb_name:
wandb_log_model:
gradient_accumulation_steps: 4
micro_batch_size: 1
num_epochs: 4
@@ -56,3 +55,5 @@ evals_per_epoch:
saves_per_epoch: 1
weight_decay: 0.0
special_tokens:
# save_first_step: true # uncomment this to validate checkpoint saving works with your config