feat: Add Mistral Medium 3.5 (#3633)
* fix: clarify incompat * fix: transformers api change upstream * fix: add pre prop * feat: add examples * chore: cleanup * chore: update readme
This commit is contained in:
@@ -59,7 +59,7 @@ gradient_checkpointing: true
|
||||
resume_from_checkpoint:
|
||||
logging_steps: 1
|
||||
flash_attention: true
|
||||
scaling_softmax: true
|
||||
# scaling_softmax: true # needs flex_attention
|
||||
|
||||
warmup_ratio: 0.1
|
||||
evals_per_epoch: 1
|
||||
|
||||
Reference in New Issue
Block a user