diff --git a/human_chat_qlora.yml b/human_chat_qlora.yml index cb17d5b6e..0ff381e14 100644 --- a/human_chat_qlora.yml +++ b/human_chat_qlora.yml @@ -29,15 +29,16 @@ default_system_message: >- a clear, useful, conversational answer as if talking to a knowledgeable friend. # --- Datasets --- -# Both Capybara and OpenHermes use ShareGPT format: -# field: conversations, role key: from, content key: value +# Both use ShareGPT format: conversations field, from/value keys +# SlimOrca: ~15k sample of high-quality multi-turn conversations +# OpenHermes-2.5: broad instruction coverage, 5% sample (~50k) datasets: - - path: LDJnr/Capybara + - path: Open-Orca/SlimOrca type: chat_template field_messages: conversations message_field_role: from message_field_content: value - split: train + split: "train[:3%]" - path: teknium/OpenHermes-2.5 type: chat_template