fix: set replit mpt model to use eager attention

2025-05-07 16:55:01 +07:00
parent 6ee7cb30fa
commit d0c4930dd5
1 changed files with 1 additions and 1 deletions
--- a/examples/replit-3b/config-lora.yml
+++ b/examples/replit-3b/config-lora.yml
@@ -38,7 +38,7 @@ tf32: true
 gradient_checkpointing:
 resume_from_checkpoint:
 logging_steps: 1
-attention: flash
+attention: eager
 gptq_groupsize:
 gptq_model_v1:
 warmup_steps: 20