most model types now support flash attention 2 regardless of multipack support (#1854)

This commit is contained in:
Wing Lian
2024-08-22 16:39:23 -04:00
committed by GitHub
parent b33dc07a77
commit fefa95e350
2 changed files with 5 additions and 10 deletions

View File

@@ -17,6 +17,7 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [
"qwen2_moe",
"falcon",
"phi",
"phi3",
"gemma",
"gemma2",
"gemmoe",