don't create a reference model if grpo beta is 0.0 (#2983) [skip ci]

This commit is contained in:
Wing Lian
2025-07-27 17:04:42 -04:00
committed by GitHub
parent add3e5076b
commit 28804b82e4

View File

@@ -115,8 +115,11 @@ def setup_reference_model(
LOG.debug("Passing model_ref: None to RL trainer")
model_ref = None # explicit setting to None
else:
reference_model: bool = True
if cfg.rl == RLType.GRPO and cfg.trl.beta == 0:
reference_model = False
# load the model again for model_ref/baseline
model_loader = ModelLoader(cfg, tokenizer, reference_model=True)
model_loader = ModelLoader(cfg, tokenizer, reference_model=reference_model)
model_ref, _ = model_loader.load()
return model_ref