paired kto support (#1069)

This commit is contained in:
Wing Lian
2024-01-09 13:30:45 -05:00
committed by GitHub
parent 768d348f42
commit d7057ccd36
3 changed files with 6 additions and 1 deletions

View File

@@ -927,6 +927,8 @@ class HFDPOTrainerBuilder(TrainerBuilderBase):
dpo_trainer_kwargs["loss_type"] = "ipo"
if self.cfg.dpo_label_smoothing:
dpo_trainer_kwargs["label_smoothing"] = self.cfg.dpo_label_smoothing
elif self.cfg.rl == "kto_pair":
dpo_trainer_kwargs["loss_type"] = "kto_pair"
dpo_trainer = DPOTrainer(
self.model,