Distributed Muon Optimizer (#3264)

* init

* working

* updating configs

* removing unneeded files

* lint

* comments

* lint

* fix regex match

* bump contribs version

* comments

* fixing tests and imports

* muon imports in test v2

* test cleanup

* bump contribs version

---------

Co-authored-by: Salman Mohammadi <“salman.mohammadi@outlook.com”>
This commit is contained in:
salman
2025-12-19 16:43:47 +01:00
committed by GitHub
parent 3750d7dd64
commit bbd3486f57
9 changed files with 387 additions and 55 deletions

View File

@@ -123,6 +123,17 @@ class TestFSDPValidation:
assert cfg.fsdp_config.transformer_layer_cls_to_wrap == "LlamaDecoderLayer"
assert cfg.fsdp_config.reshard_after_forward is True
def test_muon_fsdp1_rejected(self, min_base_cfg):
cfg = min_base_cfg | DictDefault(
optimizer="muon",
fsdp_version=1,
fsdp_config={"reshard_after_forward": True},
)
with pytest.raises(
ValueError, match="Muon optimizer is only compatible with FSDP2"
):
validate_config(cfg)
@pytest.mark.parametrize(
"rl",
[