skip some flash attn patches unless explicitly enabled (#643)
* skip some flash attn patches if explicitly disabled * make the other patches optional
This commit is contained in:
@@ -636,6 +636,8 @@ flash_optimum:
|
||||
xformers_attention:
|
||||
# whether to use flash attention patch https://github.com/Dao-AILab/flash-attention:
|
||||
flash_attention:
|
||||
flash_attn_cross_entropy: # Whether to use flash-attention cross entropy implementation - advanced use only
|
||||
flash_attn_rms_norm: # Whether to use flash-attention rms norm implementation - advanced use only
|
||||
# whether to use scaled-dot-product attention
|
||||
# https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
|
||||
sdp_attention:
|
||||
|
||||
Reference in New Issue
Block a user