checkpoint model on first step callback (#2906)
* checkpoint model on first step callback * remove debug * add test cases; update existing tests not to save on first step * move test out of solo * delete * default to False * typo
This commit is contained in:
@@ -56,6 +56,7 @@ class TestMultiGPURay:
|
||||
"use_tensorboard": True,
|
||||
"use_ray": True,
|
||||
"ray_num_workers": 2,
|
||||
"save_first_step": False,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -115,6 +116,7 @@ class TestMultiGPURay:
|
||||
"flash_attention": True,
|
||||
"deepspeed": str(AXOLOTL_ROOT / "deepspeed_configs/zero2.json"),
|
||||
"use_tensorboard": True,
|
||||
"save_first_step": False,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user