diff --git a/human_chat_qlora.yml b/human_chat_qlora.yml new file mode 100644 index 000000000..dbe6e4db3 --- /dev/null +++ b/human_chat_qlora.yml @@ -0,0 +1,86 @@ +# Llama 3.1 8B — Human-like QLoRA fine-tune +# +# Goal: natural, warm conversation; never corrects user errors; direct responses +# Hardware: single RTX 5080 (16 GB VRAM) +# Method: QLoRA (4-bit) via Axolotl +# +# Prerequisites: +# pip install -e '.[flash-attn]' (inside your axolotl repo) +# huggingface-cli login (meta-llama is a gated model) +# +# Run: +# axolotl train human_chat_qlora.yml +# axolotl merge-lora human_chat_qlora.yml # (optional) merge adapter into base + +base_model: meta-llama/Meta-Llama-3.1-8B-Instruct +model_type: LlamaForCausalLM +tokenizer_type: AutoTokenizer + +load_in_4bit: true +strict: false + +# --- System prompt baked into every conversation --- +# This is the primary lever for "no error correcting, more human-like" +chat_template: llama3 +default_system_message: >- + You are a direct, warm, and genuinely helpful assistant. + Respond to the user's intent naturally — never comment on typos, grammar, + or phrasing issues in their message. Just understand what they mean and give + a clear, useful, conversational answer as if talking to a knowledgeable friend. + +# --- Datasets --- +# Capybara: ~16k carefully curated multi-turn conversations — best for natural tone +# OpenHermes-2.5: broad instruction coverage — sampled to 5% to keep balance +datasets: + - path: LDJnr/Capybara + type: sharegpt + conversation: llama3 + split: train + + - path: teknium/OpenHermes-2.5 + type: sharegpt + conversation: llama3 + split: "train[:5%]" + +dataset_prepared_path: last_run_prepared +val_set_size: 0.01 +output_dir: ./outputs/llama31-8b-humanchat + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +# --- QLoRA adapter --- +adapter: qlora +lora_r: 64 +lora_alpha: 32 +lora_dropout: 0.05 +lora_target_linear: true + +# --- Training hyperparameters --- +# Effective batch = micro_batch_size x gradient_accumulation = 2 x 4 = 8 +micro_batch_size: 2 +gradient_accumulation_steps: 4 +num_epochs: 2 +optimizer: paged_adamw_32bit +lr_scheduler: cosine +learning_rate: 2e-4 +warmup_ratio: 0.05 +weight_decay: 0.1 + +train_on_inputs: false +group_by_length: false +bf16: auto +tf32: false + +# --- Memory & speed --- +gradient_checkpointing: true +flash_attention: true + +# --- Logging & checkpointing --- +logging_steps: 10 +evals_per_epoch: 2 +saves_per_epoch: 1 + +special_tokens: + pad_token: "<|eot_id|>"