migrate example configs to canonical attn_implementation

This commit is contained in:
Wing Lian
2026-04-23 22:15:07 +00:00
parent 2d64d009d8
commit 39226623d2
222 changed files with 209 additions and 243 deletions

View File

@@ -47,8 +47,7 @@ lora_dropout: 0.05
lora_target_linear: true
bf16: auto
flash_attention: false # strided EBFT overrides to flex_attention (or eager fallback) at runtime
flex_attention: true # fused flex_attention kernel compiles itself; don't set torch_compile: true
attn_implementation: flex_attention
# (full-model compile conflicts with gradient checkpointing + flex_attention)
gradient_checkpointing: true
gradient_checkpointing_kwargs: