diff --git a/configs/accelerate/default_config.yaml b/configs/accelerate/default_config.yaml deleted file mode 100644 index 9759703af..000000000 --- a/configs/accelerate/default_config.yaml +++ /dev/null @@ -1,15 +0,0 @@ -compute_environment: LOCAL_MACHINE -distributed_type: 'NO' -downcast_bf16: 'no' -gpu_ids: all -machine_rank: 0 -main_training_function: main -mixed_precision: bf16 -num_machines: 1 -num_processes: 1 -rdzv_backend: static -same_network: true -tpu_env: [] -tpu_use_cluster: false -tpu_use_sudo: false -use_cpu: false diff --git a/configs/llama_13B_alpaca.yml b/configs/llama_13B_alpaca.yml deleted file mode 100644 index 99c9883fe..000000000 --- a/configs/llama_13B_alpaca.yml +++ /dev/null @@ -1,39 +0,0 @@ -base_model: huggyllama/llama-13b -model_type: LlamaForCausalLM -tokenizer_type: LlamaTokenizer -load_in_8bit: true -datasets: - - path: anon8231489123/ShareGPT_Vicuna_unfiltered - data_files: ShareGPT_V3_unfiltered_cleaned_split_no_imsorry.json - type: sharegpt -dataset_prepared_path: last_run_prepared -val_set_size: 0.002 -adapter: -lora_model_dir: -sequence_len: 2048 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -lora_fan_in_fan_out: false -wandb_project: -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./llama-13b-sharegpt -gradient_accumulation_steps: 1 -micro_batch_size: 2 -warmup_steps: 1000 -save_steps: -eval_steps: -num_epochs: 5 -learning_rate: 0.00003 -train_on_inputs: false -group_by_length: false -bf16: true -tf32: true -early_stopping_patience: 5 -resume_from_checkpoint: -local_rank: diff --git a/configs/llama_65B_alpaca.yml b/configs/llama_65B_alpaca.yml deleted file mode 100644 index e7d2c211c..000000000 --- a/configs/llama_65B_alpaca.yml +++ /dev/null @@ -1,44 +0,0 @@ -base_model: huggyllama/llama-65b -model_type: LlamaForCausalLM -tokenizer_type: LlamaTokenizer -load_in_8bit: true -datasets: - - path: data/alpaca_data_gpt4.jsonl - type: alpaca - - path: anon8231489123/ShareGPT_Vicuna_unfiltered - data_files: ShareGPT_V3_unfiltered_cleaned_split_no_imsorry.json - type: sharegpt - - path: data/gpt4-instruct-similarity-0.6-dataset.jsonl - type: gpteacher - - path: data/roleplay-similarity_0.6-instruct-dataset.jsonl - type: gpteacher -dataset_prepared_path: last_run_prepared -val_set_size: 0.04 -adapter: lora -lora_model_dir: -sequence_len: 2048 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -lora_fan_in_fan_out: false -wandb_project: llama-65b-lora -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./lora-llama-alpaca -gradient_accumulation_steps: 1 -micro_batch_size: 16 -warmup_steps: 1000 -save_steps: -num_epochs: 5 -learning_rate: 0.00003 -train_on_inputs: false -group_by_length: false -bf16: true -tf32: true -early_stopping_patience: -resume_from_checkpoint: -local_rank: diff --git a/configs/llama_7B_4bit.yml b/configs/llama_7B_4bit.yml deleted file mode 100644 index a7451516c..000000000 --- a/configs/llama_7B_4bit.yml +++ /dev/null @@ -1,45 +0,0 @@ -base_model: decapoda-research/llama-7b-hf-int4 -base_model_config: decapoda-research/llama-7b-hf -model_type: LlamaForCausalLM -tokenizer_type: LlamaTokenizer -load_in_8bit: true -datasets: - - path: tatsu-lab/alpaca # original alpaca dataset - type: alpaca -dataset_prepared_path: data/last_run_prepared -val_set_size: 0.04 -adapter: lora -lora_model_dir: -sequence_len: 2048 -max_packed_sequence_len: 1024 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -# - k_proj -# - o_proj -lora_fan_in_fan_out: false -wandb_project: -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./lora-test -gradient_accumulation_steps: 1 -micro_batch_size: 2 -num_epochs: 3 -warmup_steps: 100 -learning_rate: 0.00003 -train_on_inputs: false -group_by_length: false -bf16: true -tf32: true -gradient_checkpointing: false -early_stopping_patience: 3 -resume_from_checkpoint: -auto_resume_from_checkpoints: true -local_rank: -load_4bit: true -xformers_attention: true -flash_attention: diff --git a/configs/quickstart.yml b/configs/quickstart.yml deleted file mode 100644 index 2362916fc..000000000 --- a/configs/quickstart.yml +++ /dev/null @@ -1,45 +0,0 @@ -base_model: decapoda-research/llama-7b-hf-int4 -base_model_config: decapoda-research/llama-7b-hf -model_type: LlamaForCausalLM -tokenizer_type: LlamaTokenizer -load_in_8bit: true -datasets: - - path: tatsu-lab/alpaca # original alpaca dataset - type: alpaca -dataset_prepared_path: data/last_run_prepared -val_set_size: 0.04 -adapter: lora -lora_model_dir: -sequence_len: 1024 -max_packed_sequence_len: 1024 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -# - k_proj -# - o_proj -lora_fan_in_fan_out: false -wandb_project: -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./lora-test -gradient_accumulation_steps: 1 -micro_batch_size: 1 -num_epochs: 3 -warmup_steps: 100 -learning_rate: 0.00003 -train_on_inputs: false -group_by_length: false -bf16: true -tf32: true -gradient_checkpointing: false -early_stopping_patience: 3 -resume_from_checkpoint: -auto_resume_from_checkpoints: true -local_rank: -gptq: true -xformers_attention: true -flash_attention: diff --git a/configs/vicuna_13B_4bit_reflect.yml b/configs/vicuna_13B_4bit_reflect.yml deleted file mode 100644 index 3e37f5334..000000000 --- a/configs/vicuna_13B_4bit_reflect.yml +++ /dev/null @@ -1,45 +0,0 @@ -base_model: anon8231489123/vicuna-13b-GPTQ-4bit-128g -base_model_config: anon8231489123/vicuna-13b-GPTQ-4bit-128g -model_type: LlamaForCausalLM -tokenizer_type: LlamaTokenizer -load_in_8bit: false -load_4bit: true -gptq_groupsize: 128 -gptq_model_v1: false -datasets: -# https://github.com/vaguenebula/AlpacaDataReflect/blob/main/alpaca_reflect_pruned.json - - path: data/alpaca_reflect_pruned.jsonl - type: reflection -dataset_prepared_path: data/last_run_prepared -val_set_size: 0.04 -adapter: lora -lora_model_dir: -sequence_len: 2048 -max_packed_sequence_len: 2048 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -# - k_proj -# - o_proj -lora_fan_in_fan_out: false -wandb_project: -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./lora-reflect -gradient_accumulation_steps: 1 -micro_batch_size: 2 -num_epochs: 3 -learning_rate: 0.00003 -train_on_inputs: false -group_by_length: false -bf16: true -tf32: true -gradient_checkpointing: false -early_stopping_patience: 3 -resume_from_checkpoint: -local_rank: -flash_attention: true diff --git a/configs/pythia_1_2B_alpaca.yml b/examples/pythia/lora.yml similarity index 56% rename from configs/pythia_1_2B_alpaca.yml rename to examples/pythia/lora.yml index 52ed58cb5..e2b28f218 100644 --- a/configs/pythia_1_2B_alpaca.yml +++ b/examples/pythia/lora.yml @@ -1,36 +1,29 @@ base_model: EleutherAI/pythia-1.4b-deduped -model_type: GPTNeoXForCausalLM -tokenizer_type: AutoTokenizer +base_model_config: EleutherAI/pythia-1.4b-deduped load_in_8bit: true datasets: - - path: data/alpaca_data_gpt4.jsonl + - path: teknium/GPT4-LLM-Cleaned type: alpaca - - path: data/vicuna_cleaned.jsonl - type: sharegpt - - path: data/gpt4-instruct-similarity-0.6-dataset.jsonl - type: gpteacher - - path: data/roleplay-similarity_0.6-instruct-dataset.jsonl - type: gpteacher dataset_prepared_path: last_run_prepared val_set_size: 0.05 adapter: lora lora_model_dir: -sequence_len: 2048 -lora_r: 8 +sequence_len: 512 +lora_r: 16 lora_alpha: 32 lora_dropout: 0.05 lora_target_modules: - query_key_value -# - xxx +lora_target_linear: lora_fan_in_fan_out: true # pythia/GPTNeoX lora specific -wandb_project: pythia-1.4b-lora +wandb_project: wandb_watch: wandb_run_id: wandb_log_model: -output_dir: ./lora-alpaca +output_dir: ./lora-alpaca-pythia gradient_accumulation_steps: 1 micro_batch_size: 4 -num_epochs: 5 +num_epochs: 3 learning_rate: 0.00001 train_on_inputs: false group_by_length: false @@ -39,3 +32,6 @@ tf32: True early_stopping_patience: resume_from_checkpoint: local_rank: +weight_decay: 0.1 +eval_steps: 20 +logging_steps: 1