Merge pull request #124 from OpenAccess-AI-Collective/xformers-fix

copy xformers attn from ooba since we removed dep on alpaca_lora_4bit
2023-05-31 00:11:40 -04:00
parent c7021e191f c56818b119
commit 2d0ba3b818
5 changed files with 251 additions and 1 deletions
--- a/README.md
+++ b/README.md
@@ -303,6 +303,9 @@ weight_decay:
 xformers_attention:
 # whether to use flash attention patch https://github.com/HazyResearch/flash-attention:
 flash_attention:  # require a100 for llama
+# whether to use scaled-dot-product attention
+# https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
+sdp_attention:

 # resume from a specific checkpoint dir
 resume_from_checkpoint: