set the number of dataset processes on the DPO Config rather than the trainer (#1762)

2024-07-17 15:38:37 -04:00
parent 8731b95d04
commit c86c32a627
1 changed files with 1 additions and 2 deletions
--- a/src/axolotl/core/trainer_builder.py
+++ b/src/axolotl/core/trainer_builder.py
@@ -1687,6 +1687,7 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
            # trl does some odd mapping of alpha to beta to reuse the beta parameter ???
            training_args_kwargs["beta"] = self.cfg.orpo_alpha

+        training_args_kwargs["dataset_num_proc"] = self.cfg.dataset_processes
        training_args_cls = AxolotlDPOConfig
        if self.cfg.rpo_alpha is not None:
            training_args_kwargs["rpo_alpha"] = self.cfg.rpo_alpha
@@ -1754,8 +1755,6 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
            dpo_trainer_kwargs["max_target_length"] = None
            dpo_trainer_kwargs["max_prompt_length"] = self.cfg.sequence_len
            dpo_trainer_kwargs["generate_during_eval"] = True
-            if self.cfg.rl == "dpo":
-                dpo_trainer_kwargs["dataset_num_proc"] = self.cfg.dataset_processes
        elif self.cfg.rl == "orpo":
            trainer_cls = AxolotlORPOTrainer
            trainer_cls_args = [self.model]