more fixes and prep for llama training

2023-04-14 18:30:09 -04:00
parent f2a2029d0d
commit 949a27be21
3 changed files with 118 additions and 37 deletions
--- a/configs/llama_65B_alpaca.yml
+++ b/configs/llama_65B_alpaca.yml
@@ -0,0 +1,40 @@
+base_model: decapoda-research/llama-65b-hf
+model_type: LlamaForCausalLM
+tokenizer_type: LlamaTokenizer
+load_in_8bit: true
+datasets:
+  - path: data/alpaca_data_gpt4.jsonl
+    type: alpaca
+  - path: data/vicuna_cleaned.jsonl
+    type: sharegpt
+  - path: data/gpt4-instruct-similarity-0.6-dataset.jsonl
+    type: gpteacher
+  - path: data/roleplay-similarity_0.6-instruct-dataset.jsonl
+    type: gpteacher
+val_set_size: 0.04
+adapter: lora
+lora_model_dir:
+sequence_len: 2048
+lora_r: 8
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_modules:
+  - q_proj
+  - w_proj
+lora_fan_in_fan_out: true  # pythia/GPTNeoX lora specific
+wandb_project: llama-65b-lora
+wandb_watch:
+wandb_run_name:
+wandb_log_model: checkpoint
+output_dir: ./lora-llama-alpaca
+batch_size: 128
+micro_batch_size: 16
+num_epochs: 5
+learning_rate: 0.00003
+train_on_inputs: false
+group_by_length: false
+bf16: True
+tf32: True
+resume_from_checkpoint:
+local_rank:
+deepspeed:
--- a/configs/pythia_1_2B_alpaca.yml
+++ b/configs/pythia_1_2B_alpaca.yml
@@ -13,22 +13,24 @@ datasets:
    type: gpteacher
 val_set_size: 0.05
 adapter: lora
+lora_model_dir:
 sequence_len: 2048
 lora_r: 8
 lora_alpha: 32
 lora_dropout: 0.05
 lora_target_modules:
  - query_key_value
+#  - xxx
 lora_fan_in_fan_out: true  # pythia/GPTNeoX lora specific
 wandb_project: pythia-1.4b-lora
 wandb_watch:
 wandb_run_name:
 wandb_log_model: checkpoint
 output_dir: ./lora-alpaca
-batch_size: 32
+batch_size: 48
 micro_batch_size: 4
 num_epochs: 5
-learning_rate: 0.0003
+learning_rate: 0.00001
 train_on_inputs: false
 group_by_length: false
 bf16: True