Add KTO support (#1640)

* add kto support

* test cleanup

* fix outdated comment

* fix llama3 ultra

* chore: lint

* update to use rl_beta instead of dpo_beta

---------

Co-authored-by: Wing Lian <wing.lian@gmail.com>
This commit is contained in:
Ben Redmond
2024-05-20 16:05:16 -04:00
committed by GitHub
parent ba45531802
commit 22ae21a6c2
11 changed files with 434 additions and 17 deletions

View File

@@ -1117,6 +1117,15 @@ class TestValidation(BaseValidation):
validate_config(cfg)
assert len(self._caplog.records) == 0
def test_dpo_beta_deprecation(self, minimal_cfg):
cfg = DictDefault({"dpo_beta": 0.2}) | minimal_cfg
with self._caplog.at_level(logging.WARNING):
new_cfg = validate_config(cfg)
assert new_cfg["rl_beta"] == 0.2
assert new_cfg["dpo_beta"] is None
assert len(self._caplog.records) == 1
class TestValidationCheckModelConfig(BaseValidation):
"""