From 60a8f0958ddc2334c6ffc7b56a70a7726241cb2c Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Thu, 17 Apr 2025 17:27:19 -0700 Subject: [PATCH] zero val fix for beta (#2538) --- src/axolotl/core/trainer_builder.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index 7c2be4956..a51211263 100755 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -1040,9 +1040,11 @@ class HFRLTrainerBuilder(TrainerBuilderBase): if self.cfg.dataset_processes: training_args_kwargs["dataset_num_proc"] = self.cfg.dataset_processes - if (self.cfg.trl and self.cfg.trl.beta) or self.cfg.rl_beta: - training_args_kwargs["beta"] = self.cfg.trl.beta or self.cfg.rl_beta - if self.cfg.orpo_alpha: + if self.cfg.trl and self.cfg.trl.beta is not None: + training_args_kwargs["beta"] = self.cfg.trl.beta + elif self.cfg.rl_beta is not None: + training_args_kwargs["beta"] = self.cfg.rl_beta + elif self.cfg.orpo_alpha is not None: # trl does some odd mapping of alpha to beta to reuse the beta parameter ??? training_args_kwargs["beta"] = self.cfg.orpo_alpha