bump trl and accelerate for latest releases (#1730)

* bump trl and accelerate for latest releases

* ensure that the CI runs on new gh org

* drop kto_pair support since removed upstream
This commit is contained in:
Wing Lian
2024-07-10 11:15:44 -04:00
committed by GitHub
parent b3f680d305
commit a159724e44
11 changed files with 15 additions and 21 deletions

View File

@@ -805,11 +805,7 @@ def load_model(
if not reference_model or cfg.lora_model_dir:
# if we're not loading the reference model, then we're loading the model for training
# then the dpo trainer doesn't want the peft model loaded over it, it just wants the lora/peft config
if (
cfg.adapter
and cfg.rl in ["dpo", "ipo", "kto_pair", "kto"]
and not cfg.merge_lora
):
if cfg.adapter and cfg.rl in ["dpo", "ipo", "kto"] and not cfg.merge_lora:
_, lora_config = load_lora(model, cfg, inference=False, config_only=True)
else:
model, lora_config = load_adapter(model, cfg, cfg.adapter)