feat: Add Mistral Medium 3.5 (#3633)

* fix: clarify incompat

* fix: transformers api change upstream

* fix: add pre prop

* feat: add examples

* chore: cleanup

* chore: update readme
This commit is contained in:
NanoCode012
2026-04-29 22:46:51 +07:00
committed by GitHub
parent ac77da96da
commit ebbd7fa847
9 changed files with 210 additions and 7 deletions

View File

@@ -26,7 +26,6 @@ lora_model_dir:
sequence_len: 2048
sample_packing: true
lora_r: 32
lora_alpha: 16
lora_dropout: 0
@@ -52,7 +51,7 @@ gradient_checkpointing: true
resume_from_checkpoint:
logging_steps: 1
flash_attention: true
scaling_softmax: true
# scaling_softmax: true # needs flex_attention
loss_watchdog_threshold: 5.0
loss_watchdog_patience: 3