fix deprecated type:sharegpt and flash_attention config keys

This commit is contained in:
2026-05-13 12:52:25 +00:00
parent e9a3fd483f
commit b9ceebfe7e

View File

@@ -33,13 +33,11 @@ default_system_message: >-
# OpenHermes-2.5: broad instruction coverage — sampled to 5% to keep balance
datasets:
- path: LDJnr/Capybara
type: sharegpt
conversation: llama3
type: chat_template
split: train
- path: teknium/OpenHermes-2.5
type: sharegpt
conversation: llama3
type: chat_template
split: "train[:5%]"
dataset_prepared_path: last_run_prepared
@@ -75,7 +73,7 @@ tf32: false
# --- Memory & speed ---
gradient_checkpointing: true
flash_attention: true
attn_implementation: flash_attention_2
# --- Logging & checkpointing ---
logging_steps: 10