From 8aa722a140392f4657d378fdea1075a0a84d0dca Mon Sep 17 00:00:00 2001
From: NanoCode012 <nano@axolotl.ai>
Date: Fri, 21 Feb 2025 13:55:34 +0700
Subject: [PATCH] fix: ignore max_length for grpo

---
 src/axolotl/core/trainers/grpo/__init__.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/axolotl/core/trainers/grpo/__init__.py b/src/axolotl/core/trainers/grpo/__init__.py
index f4685893b..1b240c6b4 100644
--- a/src/axolotl/core/trainers/grpo/__init__.py
+++ b/src/axolotl/core/trainers/grpo/__init__.py
@@ -131,8 +131,8 @@ class GRPOStrategy:
         return None
 
     @classmethod
-    def get_blocklist_args_kwargs(cls) -> list[str]:
-        return ["dataset_num_proc"]
+    def get_blocklist_args_kwargs(cls):
+        return ["dataset_num_proc", "max_length"]
 
     @classmethod
     def get_reward_func(cls, reward_func_fqn: str) -> RewardFunc:
@@ -167,4 +167,4 @@ class GRPOStrategy:
             LOG.info(
                 f"Reward function {reward_func_fqn} is a pre-trained model path - if this is unexpected, please check the reward function path."
             )
-            return reward_func
+            return reward_func_fqn