most model types now support flash attention 2 regardless of multipack support (#1854)
This commit is contained in:
@@ -17,6 +17,7 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [
|
||||
"qwen2_moe",
|
||||
"falcon",
|
||||
"phi",
|
||||
"phi3",
|
||||
"gemma",
|
||||
"gemma2",
|
||||
"gemmoe",
|
||||
|
||||
Reference in New Issue
Block a user