From a1958b09dedc50cfce4efd4aa9c79b52edf80dab Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 5 Feb 2025 13:01:52 -0500 Subject: [PATCH] seperately include max_completion_len --- src/axolotl/core/trainers/grpo/__init__.py | 3 +++ src/axolotl/utils/config/models/input/v0_4_1/__init__.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/src/axolotl/core/trainers/grpo/__init__.py b/src/axolotl/core/trainers/grpo/__init__.py index b04373a95..f102d1c03 100644 --- a/src/axolotl/core/trainers/grpo/__init__.py +++ b/src/axolotl/core/trainers/grpo/__init__.py @@ -43,6 +43,9 @@ class GRPOStrategy: ] = cfg.grpo_vllm_gpu_memory_utilization if cfg.grpo_num_generations: grpo_args_kwargs["num_generations"] = cfg.grpo_num_generations + grpo_args_kwargs["max_completion_length"] = ( + cfg.max_completion_length or cfg.sequence_len + ) return grpo_args_kwargs @classmethod diff --git a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py index b09d89bd5..bb881bfd5 100644 --- a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py +++ b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py @@ -757,6 +757,12 @@ class AxolotlInputConfig( default=512, json_schema_extra={"description": "maximum prompt length for RL training"}, ) + max_completion_length: Optional[int] = Field( + default=None, + json_schema_extra={ + "description": "Maximum length of the completion for RL training" + }, + ) sample_packing: Optional[bool] = None sample_packing_group_size: Optional[int] = 100_000 sample_packing_bin_size: Optional[int] = 200