update readme for SDP

This commit is contained in:
Wing Lian
2023-05-31 00:02:29 -04:00
parent 1076bcbbca
commit 2675fb756e

View File

@@ -300,6 +300,9 @@ weight_decay:
xformers_attention:
# whether to use flash attention patch https://github.com/HazyResearch/flash-attention:
flash_attention: # require a100 for llama
# whether to use scaled-dot-product attention
# https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
sdp_attention:
# resume from a specific checkpoint dir
resume_from_checkpoint: