fix: ignore max_length for grpo
This commit is contained in:
@@ -131,8 +131,8 @@ class GRPOStrategy:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_blocklist_args_kwargs(cls) -> list[str]:
|
def get_blocklist_args_kwargs(cls):
|
||||||
return ["dataset_num_proc"]
|
return ["dataset_num_proc", "max_length"]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_reward_func(cls, reward_func_fqn: str) -> RewardFunc:
|
def get_reward_func(cls, reward_func_fqn: str) -> RewardFunc:
|
||||||
@@ -167,4 +167,4 @@ class GRPOStrategy:
|
|||||||
LOG.info(
|
LOG.info(
|
||||||
f"Reward function {reward_func_fqn} is a pre-trained model path - if this is unexpected, please check the reward function path."
|
f"Reward function {reward_func_fqn} is a pre-trained model path - if this is unexpected, please check the reward function path."
|
||||||
)
|
)
|
||||||
return reward_func
|
return reward_func_fqn
|
||||||
|
|||||||
Reference in New Issue
Block a user