From df645906eb55a60be9c6b1cbe9cf42caa3f80299 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Thu, 2 May 2024 09:31:43 -0400 Subject: [PATCH] invert check --- src/axolotl/core/trainer_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index cc53fb79b..b1554f0c0 100644 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -837,7 +837,7 @@ class AxolotlDPOTrainer(DPOTrainer): The losses tensor contains the DPO loss for each example in the batch. The chosen_rewards and rejected_rewards tensors contain the rewards for the chosen and rejected responses, respectively. """ - if self.loss_type not in ["sigmoid", "hinge", "ipo", "kto_pair"]: + if self.loss_type in ["sigmoid", "hinge", "ipo", "kto_pair"]: return super().dpo_loss( policy_chosen_logps, policy_rejected_logps,