Feat: Add landmark attention

This commit is contained in:
NanoCode012
2023-06-09 12:54:08 +09:00
parent febe902517
commit 55b8542de8
4 changed files with 1635 additions and 7 deletions

View File

@@ -416,6 +416,8 @@ flash_attention: # require a100 for llama
# whether to use scaled-dot-product attention
# https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
sdp_attention:
# Landmark attention (only llama)
landmark_attention:
# resume from a specific checkpoint dir
resume_from_checkpoint: