From 28804b82e401a7e0b5c511a2714e241c025f91ef Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 27 Jul 2025 17:04:42 -0400 Subject: [PATCH] don't create a reference model if grpo beta is 0.0 (#2983) [skip ci] --- src/axolotl/train.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/axolotl/train.py b/src/axolotl/train.py index 967179903..d57cb463e 100644 --- a/src/axolotl/train.py +++ b/src/axolotl/train.py @@ -115,8 +115,11 @@ def setup_reference_model( LOG.debug("Passing model_ref: None to RL trainer") model_ref = None # explicit setting to None else: + reference_model: bool = True + if cfg.rl == RLType.GRPO and cfg.trl.beta == 0: + reference_model = False # load the model again for model_ref/baseline - model_loader = ModelLoader(cfg, tokenizer, reference_model=True) + model_loader = ModelLoader(cfg, tokenizer, reference_model=reference_model) model_ref, _ = model_loader.load() return model_ref