From c810599c669ed514e88e6a6f9280a208ae223cf8 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Mon, 3 Feb 2025 00:47:48 -0500 Subject: [PATCH] order matters --- src/axolotl/core/trainer_builder.py | 2 +- src/axolotl/core/trainers/grpo/args.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index c5136ce0a..18c67919c 100755 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -1019,7 +1019,7 @@ class HFRLTrainerBuilder(TrainerBuilderBase): training_args_kwargs.update(DPOConfig.set_training_args_kwargs(self.cfg)) training_args = training_args_cls( # pylint: disable=unexpected-keyword-arg - output_dir=self.cfg.output_dir, + self.cfg.output_dir, per_device_train_batch_size=self.cfg.micro_batch_size, max_steps=self.cfg.max_steps or total_num_steps, gradient_accumulation_steps=self.cfg.gradient_accumulation_steps, diff --git a/src/axolotl/core/trainers/grpo/args.py b/src/axolotl/core/trainers/grpo/args.py index e5f2cc254..e14e6b0dc 100644 --- a/src/axolotl/core/trainers/grpo/args.py +++ b/src/axolotl/core/trainers/grpo/args.py @@ -9,7 +9,7 @@ from axolotl.core.training_args import AxolotlTrainingMixins @dataclass -class AxolotlGRPOConfig(GRPOConfig, AxolotlTrainingMixins): +class AxolotlGRPOConfig(AxolotlTrainingMixins, GRPOConfig): """ Axolotl GRPO Config for GRPO training """