fix deprecated type:sharegpt and flash_attention config keys

2026-05-13 12:52:25 +00:00
parent e9a3fd483f
commit b9ceebfe7e
1 changed files with 3 additions and 5 deletions
--- a/human_chat_qlora.yml
+++ b/human_chat_qlora.yml
@@ -33,13 +33,11 @@ default_system_message: >-
 # OpenHermes-2.5: broad instruction coverage — sampled to 5% to keep balance
 datasets:
  - path: LDJnr/Capybara
-    type: sharegpt
-    conversation: llama3
+    type: chat_template
    split: train

  - path: teknium/OpenHermes-2.5
-    type: sharegpt
-    conversation: llama3
+    type: chat_template
    split: "train[:5%]"

 dataset_prepared_path: last_run_prepared
@@ -75,7 +73,7 @@ tf32: false

 # --- Memory & speed ---
 gradient_checkpointing: true
-flash_attention: true
+attn_implementation: flash_attention_2

 # --- Logging & checkpointing ---
 logging_steps: 10