order matters
This commit is contained in:
@@ -1019,7 +1019,7 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
|
|||||||
training_args_kwargs.update(DPOConfig.set_training_args_kwargs(self.cfg))
|
training_args_kwargs.update(DPOConfig.set_training_args_kwargs(self.cfg))
|
||||||
|
|
||||||
training_args = training_args_cls( # pylint: disable=unexpected-keyword-arg
|
training_args = training_args_cls( # pylint: disable=unexpected-keyword-arg
|
||||||
output_dir=self.cfg.output_dir,
|
self.cfg.output_dir,
|
||||||
per_device_train_batch_size=self.cfg.micro_batch_size,
|
per_device_train_batch_size=self.cfg.micro_batch_size,
|
||||||
max_steps=self.cfg.max_steps or total_num_steps,
|
max_steps=self.cfg.max_steps or total_num_steps,
|
||||||
gradient_accumulation_steps=self.cfg.gradient_accumulation_steps,
|
gradient_accumulation_steps=self.cfg.gradient_accumulation_steps,
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from axolotl.core.training_args import AxolotlTrainingMixins
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class AxolotlGRPOConfig(GRPOConfig, AxolotlTrainingMixins):
|
class AxolotlGRPOConfig(AxolotlTrainingMixins, GRPOConfig):
|
||||||
"""
|
"""
|
||||||
Axolotl GRPO Config for GRPO training
|
Axolotl GRPO Config for GRPO training
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user