checkpoint model on first step callback (#2906)

* checkpoint model on first step callback

* remove debug

* add test cases; update existing tests not to save on first step

* move test out of solo

* delete

* default to False

* typo
This commit is contained in:
Dan Saunders
2025-07-15 15:00:48 -04:00
committed by GitHub
parent d320ef6199
commit 10ba1622f7
146 changed files with 419 additions and 9 deletions

View File

@@ -56,6 +56,7 @@ class TestDPOLlamaLora(unittest.TestCase):
"warmup_steps": 5,
"gradient_checkpointing": True,
"gradient_checkpointing_kwargs": {"use_reentrant": True},
"save_first_step": False,
}
)
@@ -105,6 +106,7 @@ class TestDPOLlamaLora(unittest.TestCase):
"warmup_steps": 5,
"gradient_checkpointing": True,
"gradient_checkpointing_kwargs": {"use_reentrant": True},
"save_first_step": False,
}
)
@@ -154,6 +156,7 @@ class TestDPOLlamaLora(unittest.TestCase):
"warmup_steps": 5,
"gradient_checkpointing": True,
"gradient_checkpointing_kwargs": {"use_reentrant": True},
"save_first_step": False,
}
)
@@ -203,6 +206,7 @@ class TestDPOLlamaLora(unittest.TestCase):
"warmup_steps": 5,
"gradient_checkpointing": True,
"gradient_checkpointing_kwargs": {"use_reentrant": True},
"save_first_step": False,
}
)
@@ -251,6 +255,7 @@ class TestDPOLlamaLora(unittest.TestCase):
"warmup_steps": 5,
"gradient_checkpointing": True,
"gradient_checkpointing_kwargs": {"use_reentrant": True},
"save_first_step": False,
}
)
@@ -302,6 +307,7 @@ class TestDPOLlamaLora(unittest.TestCase):
"warmup_steps": 5,
"gradient_checkpointing": True,
"gradient_checkpointing_kwargs": {"use_reentrant": True},
"save_first_step": False,
}
)
@@ -370,6 +376,7 @@ class TestDPOLlamaLora(unittest.TestCase):
"warmup_steps": 5,
"gradient_checkpointing": True,
"gradient_checkpointing_kwargs": {"use_reentrant": True},
"save_first_step": False,
}
)