From b9ceebfe7e815e2972756a5e20e7203d7495d48b Mon Sep 17 00:00:00 2001 From: tocmo0nlord Date: Wed, 13 May 2026 12:52:25 +0000 Subject: [PATCH] fix deprecated type:sharegpt and flash_attention config keys --- human_chat_qlora.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/human_chat_qlora.yml b/human_chat_qlora.yml index dbe6e4db3..79ee0b40b 100644 --- a/human_chat_qlora.yml +++ b/human_chat_qlora.yml @@ -33,13 +33,11 @@ default_system_message: >- # OpenHermes-2.5: broad instruction coverage — sampled to 5% to keep balance datasets: - path: LDJnr/Capybara - type: sharegpt - conversation: llama3 + type: chat_template split: train - path: teknium/OpenHermes-2.5 - type: sharegpt - conversation: llama3 + type: chat_template split: "train[:5%]" dataset_prepared_path: last_run_prepared @@ -75,7 +73,7 @@ tf32: false # --- Memory & speed --- gradient_checkpointing: true -flash_attention: true +attn_implementation: flash_attention_2 # --- Logging & checkpointing --- logging_steps: 10