diff --git a/examples/mistral/mistral-dpo-qlora.yml b/examples/mistral/mistral-dpo-qlora.yml new file mode 100644 index 000000000..a558e0453 --- /dev/null +++ b/examples/mistral/mistral-dpo-qlora.yml @@ -0,0 +1,93 @@ +#Note that we are switching from the regular chat template to chatml. +#If you experience problems with the special tokens, training for more epochs can help. +#After training, merge the model before inference otherwise you might +#face problems with the special tokens. + +base_model: mistralai/Mistral-7B-Instruct-v0.2 +model_type: MistralForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: false +load_in_4bit: true +strict: false + +chat_template: chatml +rl: dpo +datasets: + - path: olivermolenschot/alpaca_messages_dpo_test + type: chat_template.default + field_messages: conversation + field_chosen: chosen + field_rejected: rejected + message_field_role: role + message_field_content: content + +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./outputs/dpo-qlora + +sequence_len: 2048 +sample_packing: false +pad_to_sequence_len: true + +adapter: qlora +lora_model_dir: +lora_r: 8 +lora_alpha: 16 +lora_dropout: 0.2 +lora_target_linear: true +lora_fan_in_fan_out: + +lora_target_modules: + - gate_proj + - down_proj + - up_proj + - q_proj + - v_proj + - k_proj + - o_proj +lora_modules_to_save: + - embed_tokens + - lm_head + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 16 +num_epochs: 6 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0001 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: false +s2_attention: + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + bos_token: "<|im_start|>" + eos_token: "<|im_end|>"