From 2675fb756e13f4e6b7184628c4152929b0ff42c2 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 31 May 2023 00:02:29 -0400 Subject: [PATCH] update readme for SDP --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index e1391e39b..853681769 100644 --- a/README.md +++ b/README.md @@ -300,6 +300,9 @@ weight_decay: xformers_attention: # whether to use flash attention patch https://github.com/HazyResearch/flash-attention: flash_attention: # require a100 for llama +# whether to use scaled-dot-product attention +# https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html +sdp_attention: # resume from a specific checkpoint dir resume_from_checkpoint: