additional args for grpo config/trainer (#2598)

2025-04-30 13:11:12 -04:00
parent 1d6e931115
commit d49a4268b8
3 changed files with 46 additions and 0 deletions
--- a/src/axolotl/core/trainers/grpo/init.py
+++ b/src/axolotl/core/trainers/grpo/init.py
@@ -70,6 +70,13 @@ class GRPOStrategy:
        if trl.scale_rewards is not None:
            grpo_args_kwargs["scale_rewards"] = trl.scale_rewards

+        if trl.loss_type is not None:
+            grpo_args_kwargs["loss_type"] = trl.loss_type
+        if trl.mask_truncated_completions is not None:
+            grpo_args_kwargs["mask_truncated_completions"] = (
+                trl.mask_truncated_completions
+            )
+
        if trl.temperature is not None:
            grpo_args_kwargs["temperature"] = trl.temperature
        if trl.top_p is not None:
@@ -85,6 +92,11 @@ class GRPOStrategy:
            grpo_args_kwargs["num_iterations"] = trl.num_iterations
        if trl.epsilon is not None:
            grpo_args_kwargs["epsilon"] = trl.epsilon
+        if trl.epsilon_high is not None:
+            grpo_args_kwargs["epsilon_high"] = trl.epsilon_high
+
+        if trl.use_liger_loss is not None:
+            grpo_args_kwargs["use_liger_loss"] = trl.use_liger_loss

        return grpo_args_kwargs

--- a/src/axolotl/utils/schemas/config.py
+++ b/src/axolotl/utils/schemas/config.py
@@ -1150,6 +1150,18 @@ class AxolotlInputConfig(

        return data

+    @model_validator(mode="before")
+    @classmethod
+    def check_grpo_peft_liger(cls, data):
+        if (
+            data.get("rl") == "grpo"
+            and data.get("trl", {})
+            and data.get("trl").get("use_liger_loss")
+            and data.get("adapter")
+        ):
+            raise ValueError("PEFT + GRPO + Liger is not yet supported")
+        return data
+
    @model_validator(mode="after")
    def check_sequence_parallel_degree(self):
        if not self.sequence_parallel_degree:
--- a/src/axolotl/utils/schemas/trl.py
+++ b/src/axolotl/utils/schemas/trl.py
@@ -133,3 +133,25 @@ class TRLConfig(BaseModel):
            "description": "Epsilon value for clipping in the GRPO algorithm."
        },
    )
+    epsilon_high: float | None = Field(
+        default=None,
+        json_schema_extra={
+            "description": "Upper-bound epsilon value for clipping in the GRPO algorithm."
+        },
+    )
+    use_liger_loss: bool | None = Field(
+        default=None,
+        json_schema_extra={"description": "Whether to use Liger loss for GRPO."},
+    )
+    loss_type: str | None = Field(
+        default=None,
+        json_schema_extra={
+            "description": "Specifies the loss formulation to use. Supported values are `grpo`, `bnpo`, and `dr_grpo`."
+        },
+    )
+    mask_truncated_completions: bool = Field(
+        default=False,
+        json_schema_extra={
+            "description": "When enabled, truncated completions are excluded from the loss calculation."
+        },
+    )