fix: ignore max_length for grpo

2025-02-21 13:55:34 +07:00
parent edaec9fe98
commit 8aa722a140
1 changed files with 3 additions and 3 deletions
--- a/src/axolotl/core/trainers/grpo/init.py
+++ b/src/axolotl/core/trainers/grpo/init.py
@@ -131,8 +131,8 @@ class GRPOStrategy:
        return None

    @classmethod
-    def get_blocklist_args_kwargs(cls) -> list[str]:
-        return ["dataset_num_proc"]
+    def get_blocklist_args_kwargs(cls):
+        return ["dataset_num_proc", "max_length"]

    @classmethod
    def get_reward_func(cls, reward_func_fqn: str) -> RewardFunc:
@@ -167,4 +167,4 @@ class GRPOStrategy:
            LOG.info(
                f"Reward function {reward_func_fqn} is a pre-trained model path - if this is unexpected, please check the reward function path."
            )
-            return reward_func
+            return reward_func_fqn