diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index 7c2be4956..a51211263 100755 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -1040,9 +1040,11 @@ class HFRLTrainerBuilder(TrainerBuilderBase): if self.cfg.dataset_processes: training_args_kwargs["dataset_num_proc"] = self.cfg.dataset_processes - if (self.cfg.trl and self.cfg.trl.beta) or self.cfg.rl_beta: - training_args_kwargs["beta"] = self.cfg.trl.beta or self.cfg.rl_beta - if self.cfg.orpo_alpha: + if self.cfg.trl and self.cfg.trl.beta is not None: + training_args_kwargs["beta"] = self.cfg.trl.beta + elif self.cfg.rl_beta is not None: + training_args_kwargs["beta"] = self.cfg.rl_beta + elif self.cfg.orpo_alpha is not None: # trl does some odd mapping of alpha to beta to reuse the beta parameter ??? training_args_kwargs["beta"] = self.cfg.orpo_alpha