don't create a reference model if grpo beta is 0.0 (#2983) [skip ci]
This commit is contained in:
@@ -115,8 +115,11 @@ def setup_reference_model(
|
|||||||
LOG.debug("Passing model_ref: None to RL trainer")
|
LOG.debug("Passing model_ref: None to RL trainer")
|
||||||
model_ref = None # explicit setting to None
|
model_ref = None # explicit setting to None
|
||||||
else:
|
else:
|
||||||
|
reference_model: bool = True
|
||||||
|
if cfg.rl == RLType.GRPO and cfg.trl.beta == 0:
|
||||||
|
reference_model = False
|
||||||
# load the model again for model_ref/baseline
|
# load the model again for model_ref/baseline
|
||||||
model_loader = ModelLoader(cfg, tokenizer, reference_model=True)
|
model_loader = ModelLoader(cfg, tokenizer, reference_model=reference_model)
|
||||||
model_ref, _ = model_loader.load()
|
model_ref, _ = model_loader.load()
|
||||||
return model_ref
|
return model_ref
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user