fix
This commit is contained in:
@@ -100,18 +100,6 @@ ARCHETYPES = (
|
|||||||
},
|
},
|
||||||
[(4, 2048), (8, 4096)],
|
[(4, 2048), (8, 4096)],
|
||||||
),
|
),
|
||||||
(
|
|
||||||
"dbrx",
|
|
||||||
{
|
|
||||||
"hidden_size": 6144,
|
|
||||||
"moe_intermediate_size": 24576,
|
|
||||||
"n_experts": 16,
|
|
||||||
"top_k": 2,
|
|
||||||
"groups": 4,
|
|
||||||
"group_size": 192,
|
|
||||||
},
|
|
||||||
[(4, 4096), (8, 8192)],
|
|
||||||
),
|
|
||||||
(
|
(
|
||||||
"qwen",
|
"qwen",
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -375,6 +375,13 @@ def patch_deepseek_v3_moe(
|
|||||||
def patched_moe(self, hidden_states, topk_indices, topk_weights):
|
def patched_moe(self, hidden_states, topk_indices, topk_weights):
|
||||||
backend_sel = getattr(self, "_axolotl_triton_backend", backend)
|
backend_sel = getattr(self, "_axolotl_triton_backend", backend)
|
||||||
group_size_sel = getattr(self, "_axolotl_group_size_m", group_size_m)
|
group_size_sel = getattr(self, "_axolotl_group_size_m", group_size_m)
|
||||||
|
if backend_sel == "cg" and group_size_sel != _GROUP_SIZE_M:
|
||||||
|
LOG.debug(
|
||||||
|
"Adjusting group_size_m=%s to %s for CG backend",
|
||||||
|
group_size_sel,
|
||||||
|
_GROUP_SIZE_M,
|
||||||
|
)
|
||||||
|
group_size_sel = _GROUP_SIZE_M
|
||||||
try:
|
try:
|
||||||
return _moe_triton_forward(
|
return _moe_triton_forward(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Reference in New Issue
Block a user