zero val fix for beta (#2538)
This commit is contained in:
@@ -1040,9 +1040,11 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
|
|||||||
if self.cfg.dataset_processes:
|
if self.cfg.dataset_processes:
|
||||||
training_args_kwargs["dataset_num_proc"] = self.cfg.dataset_processes
|
training_args_kwargs["dataset_num_proc"] = self.cfg.dataset_processes
|
||||||
|
|
||||||
if (self.cfg.trl and self.cfg.trl.beta) or self.cfg.rl_beta:
|
if self.cfg.trl and self.cfg.trl.beta is not None:
|
||||||
training_args_kwargs["beta"] = self.cfg.trl.beta or self.cfg.rl_beta
|
training_args_kwargs["beta"] = self.cfg.trl.beta
|
||||||
if self.cfg.orpo_alpha:
|
elif self.cfg.rl_beta is not None:
|
||||||
|
training_args_kwargs["beta"] = self.cfg.rl_beta
|
||||||
|
elif self.cfg.orpo_alpha is not None:
|
||||||
# trl does some odd mapping of alpha to beta to reuse the beta parameter ???
|
# trl does some odd mapping of alpha to beta to reuse the beta parameter ???
|
||||||
training_args_kwargs["beta"] = self.cfg.orpo_alpha
|
training_args_kwargs["beta"] = self.cfg.orpo_alpha
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user