diff --git a/human_chat_qlora.yml b/human_chat_qlora.yml index dbe6e4db3..79ee0b40b 100644 --- a/human_chat_qlora.yml +++ b/human_chat_qlora.yml @@ -33,13 +33,11 @@ default_system_message: >- # OpenHermes-2.5: broad instruction coverage — sampled to 5% to keep balance datasets: - path: LDJnr/Capybara - type: sharegpt - conversation: llama3 + type: chat_template split: train - path: teknium/OpenHermes-2.5 - type: sharegpt - conversation: llama3 + type: chat_template split: "train[:5%]" dataset_prepared_path: last_run_prepared @@ -75,7 +73,7 @@ tf32: false # --- Memory & speed --- gradient_checkpointing: true -flash_attention: true +attn_implementation: flash_attention_2 # --- Logging & checkpointing --- logging_steps: 10