fix token state json and mistral tokenizer issue (#3522) [skip ci]

* fix token state json and mistral tokenizer issue

* centralize constants

* forgot to commit constants file

* Fix weakref in pickling relora state dict

* make curl a bit quieter so it doesn't log 2K lines

* fix path traversal for olmoe test

* more test fixes that weren't flagged previously

* chore: lint

* skip tests that fail b/c of OutOfResources

* scattermoe as slow tests

* update fbgemm-genai for torch 2.10
This commit is contained in:
Wing Lian
2026-03-21 22:46:10 -04:00
committed by GitHub
parent 2c05847a5f
commit 0ee98a0309
22 changed files with 249 additions and 57 deletions

View File

@@ -70,7 +70,7 @@ class TestMixtral(unittest.TestCase):
model, _, _ = train(cfg=cfg, dataset_meta=dataset_meta)
assert (
model.base_model.model.model.layers[0].block_sparse_moe.gate.weight.dtype
model.base_model.model.model.layers[0].mlp.gate.weight.dtype
== torch.float32
)
check_model_output_exists(temp_dir, cfg)
@@ -125,7 +125,7 @@ class TestMixtral(unittest.TestCase):
model, _, _ = train(cfg=cfg, dataset_meta=dataset_meta)
assert (
model.base_model.model.model.layers[0].block_sparse_moe.gate.weight.dtype
model.base_model.model.model.layers[0].mlp.gate.weight.dtype
== torch.float32
)
check_model_output_exists(temp_dir, cfg)
@@ -183,7 +183,7 @@ class TestMixtral(unittest.TestCase):
model, _, _ = train(cfg=cfg, dataset_meta=dataset_meta)
assert (
model.base_model.model.model.layers[0].block_sparse_moe.gate.weight.dtype
model.base_model.model.model.layers[0].mlp.gate.weight.dtype
== torch.float32
)
check_model_output_exists(temp_dir, cfg)
@@ -241,7 +241,7 @@ class TestMixtral(unittest.TestCase):
model, _, _ = train(cfg=cfg, dataset_meta=dataset_meta)
assert (
model.base_model.model.model.layers[0].block_sparse_moe.gate.weight.dtype
model.base_model.model.model.layers[0].mlp.gate.weight.dtype
== torch.float32
)
check_model_output_exists(temp_dir, cfg)