Only fuse if flash_attn_fuse_mlp is True
This commit is contained in:
@@ -385,8 +385,9 @@ def load_model(
|
|||||||
**model_kwargs,
|
**model_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
LOG.info("Mixtral MoE: Replacing experts with SwiGLU")
|
if cfg.flash_attn_fuse_mlp:
|
||||||
replace_mixtral_mlp_with_swiglu(model)
|
LOG.info("Mixtral MoE: Replacing experts with SwiGLU")
|
||||||
|
replace_mixtral_mlp_with_swiglu(model)
|
||||||
|
|
||||||
elif model_type == "MambaLMHeadModel":
|
elif model_type == "MambaLMHeadModel":
|
||||||
# FIXME this is janky at best and hacked together to make it work
|
# FIXME this is janky at best and hacked together to make it work
|
||||||
|
|||||||
Reference in New Issue
Block a user