fix
This commit is contained in:
@@ -100,18 +100,6 @@ ARCHETYPES = (
|
||||
},
|
||||
[(4, 2048), (8, 4096)],
|
||||
),
|
||||
(
|
||||
"dbrx",
|
||||
{
|
||||
"hidden_size": 6144,
|
||||
"moe_intermediate_size": 24576,
|
||||
"n_experts": 16,
|
||||
"top_k": 2,
|
||||
"groups": 4,
|
||||
"group_size": 192,
|
||||
},
|
||||
[(4, 4096), (8, 8192)],
|
||||
),
|
||||
(
|
||||
"qwen",
|
||||
{
|
||||
|
||||
@@ -375,6 +375,13 @@ def patch_deepseek_v3_moe(
|
||||
def patched_moe(self, hidden_states, topk_indices, topk_weights):
|
||||
backend_sel = getattr(self, "_axolotl_triton_backend", backend)
|
||||
group_size_sel = getattr(self, "_axolotl_group_size_m", group_size_m)
|
||||
if backend_sel == "cg" and group_size_sel != _GROUP_SIZE_M:
|
||||
LOG.debug(
|
||||
"Adjusting group_size_m=%s to %s for CG backend",
|
||||
group_size_sel,
|
||||
_GROUP_SIZE_M,
|
||||
)
|
||||
group_size_sel = _GROUP_SIZE_M
|
||||
try:
|
||||
return _moe_triton_forward(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user