fix field_messages mapping for Capybara/OpenHermes ShareGPT format

This commit is contained in:
2026-05-13 12:56:03 +00:00
parent b9ceebfe7e
commit c02a76f132

View File

@@ -29,15 +29,21 @@ default_system_message: >-
a clear, useful, conversational answer as if talking to a knowledgeable friend.
# --- Datasets ---
# Capybara: ~16k carefully curated multi-turn conversations — best for natural tone
# OpenHermes-2.5: broad instruction coverage — sampled to 5% to keep balance
# Both Capybara and OpenHermes use ShareGPT format:
# field: conversations, role key: from, content key: value
datasets:
- path: LDJnr/Capybara
type: chat_template
field_messages: conversations
message_field_role: from
message_field_content: value
split: train
- path: teknium/OpenHermes-2.5
type: chat_template
field_messages: conversations
message_field_role: from
message_field_content: value
split: "train[:5%]"
dataset_prepared_path: last_run_prepared