fix: pop optimizer cls in rl too

This commit is contained in:
NanoCode012
2025-05-16 13:39:41 +07:00
parent 336c5f9db9
commit 5c40896d19

View File

@@ -134,6 +134,13 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
def build(self, total_num_steps):
training_args = self.build_training_arguments(total_num_steps)
trainer_kwargs = {}
# Pop optimizer_cls_and_kwargs to trainer_kwargs
if "optimizer_cls_and_kwargs" in training_args:
trainer_kwargs["optimizer_cls_and_kwargs"] = training_args.pop(
"optimizer_cls_and_kwargs"
)
if self.cfg.rl is RLType.IPO:
if self.cfg.dpo_label_smoothing:
trainer_kwargs["label_smoothing"] = self.cfg.dpo_label_smoothing