diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index b0eea55b1..3952cd593 100755 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -1687,6 +1687,7 @@ class HFRLTrainerBuilder(TrainerBuilderBase): # trl does some odd mapping of alpha to beta to reuse the beta parameter ??? training_args_kwargs["beta"] = self.cfg.orpo_alpha + training_args_kwargs["dataset_num_proc"] = self.cfg.dataset_processes training_args_cls = AxolotlDPOConfig if self.cfg.rpo_alpha is not None: training_args_kwargs["rpo_alpha"] = self.cfg.rpo_alpha @@ -1754,8 +1755,6 @@ class HFRLTrainerBuilder(TrainerBuilderBase): dpo_trainer_kwargs["max_target_length"] = None dpo_trainer_kwargs["max_prompt_length"] = self.cfg.sequence_len dpo_trainer_kwargs["generate_during_eval"] = True - if self.cfg.rl == "dpo": - dpo_trainer_kwargs["dataset_num_proc"] = self.cfg.dataset_processes elif self.cfg.rl == "orpo": trainer_cls = AxolotlORPOTrainer trainer_cls_args = [self.model]