This commit is contained in:
Dan Saunders
2025-09-25 15:55:08 -04:00
parent 824a641cee
commit d90ade3b1b
2 changed files with 7 additions and 12 deletions

View File

@@ -100,18 +100,6 @@ ARCHETYPES = (
},
[(4, 2048), (8, 4096)],
),
(
"dbrx",
{
"hidden_size": 6144,
"moe_intermediate_size": 24576,
"n_experts": 16,
"top_k": 2,
"groups": 4,
"group_size": 192,
},
[(4, 4096), (8, 8192)],
),
(
"qwen",
{

View File

@@ -375,6 +375,13 @@ def patch_deepseek_v3_moe(
def patched_moe(self, hidden_states, topk_indices, topk_weights):
backend_sel = getattr(self, "_axolotl_triton_backend", backend)
group_size_sel = getattr(self, "_axolotl_group_size_m", group_size_m)
if backend_sel == "cg" and group_size_sel != _GROUP_SIZE_M:
LOG.debug(
"Adjusting group_size_m=%s to %s for CG backend",
group_size_sel,
_GROUP_SIZE_M,
)
group_size_sel = _GROUP_SIZE_M
try:
return _moe_triton_forward(
self,