skip some flash attn patches unless explicitly enabled (#643)

* skip some flash attn patches if explicitly disabled * make the other patches optional
2023-09-27 12:11:07 -04:00
parent e7d3e2dbb6
commit 895f0a0723
3 changed files with 38 additions and 24 deletions
--- a/README.md
+++ b/README.md
@@ -636,6 +636,8 @@ flash_optimum:
 xformers_attention:
 # whether to use flash attention patch https://github.com/Dao-AILab/flash-attention:
 flash_attention:
+flash_attn_cross_entropy:  # Whether to use flash-attention cross entropy implementation - advanced use only
+flash_attn_rms_norm:  # Whether to use flash-attention rms norm implementation - advanced use only
 # whether to use scaled-dot-product attention
 # https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
 sdp_attention: