fix deprecated type:sharegpt and flash_attention config keys
This commit is contained in:
@@ -33,13 +33,11 @@ default_system_message: >-
|
|||||||
# OpenHermes-2.5: broad instruction coverage — sampled to 5% to keep balance
|
# OpenHermes-2.5: broad instruction coverage — sampled to 5% to keep balance
|
||||||
datasets:
|
datasets:
|
||||||
- path: LDJnr/Capybara
|
- path: LDJnr/Capybara
|
||||||
type: sharegpt
|
type: chat_template
|
||||||
conversation: llama3
|
|
||||||
split: train
|
split: train
|
||||||
|
|
||||||
- path: teknium/OpenHermes-2.5
|
- path: teknium/OpenHermes-2.5
|
||||||
type: sharegpt
|
type: chat_template
|
||||||
conversation: llama3
|
|
||||||
split: "train[:5%]"
|
split: "train[:5%]"
|
||||||
|
|
||||||
dataset_prepared_path: last_run_prepared
|
dataset_prepared_path: last_run_prepared
|
||||||
@@ -75,7 +73,7 @@ tf32: false
|
|||||||
|
|
||||||
# --- Memory & speed ---
|
# --- Memory & speed ---
|
||||||
gradient_checkpointing: true
|
gradient_checkpointing: true
|
||||||
flash_attention: true
|
attn_implementation: flash_attention_2
|
||||||
|
|
||||||
# --- Logging & checkpointing ---
|
# --- Logging & checkpointing ---
|
||||||
logging_steps: 10
|
logging_steps: 10
|
||||||
|
|||||||
Reference in New Issue
Block a user