From effbbf6dd13b564dcbafbbf155557bb43f76359a Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 10:38:24 -0400 Subject: [PATCH] more pruning --- configs/cerebras_1_3B_alpaca.yml | 40 --------------------- configs/galactica_1_3B.yml | 41 ---------------------- configs/gpt_neox_20b.yml | 39 --------------------- configs/stability_3b.yml | 56 ----------------------------- examples/cerebras/qlora.yml | 60 ++++++++++++++++++++++++++++++++ 5 files changed, 60 insertions(+), 176 deletions(-) delete mode 100644 configs/cerebras_1_3B_alpaca.yml delete mode 100644 configs/galactica_1_3B.yml delete mode 100644 configs/gpt_neox_20b.yml delete mode 100644 configs/stability_3b.yml create mode 100644 examples/cerebras/qlora.yml diff --git a/configs/cerebras_1_3B_alpaca.yml b/configs/cerebras_1_3B_alpaca.yml deleted file mode 100644 index 958bf4c5a..000000000 --- a/configs/cerebras_1_3B_alpaca.yml +++ /dev/null @@ -1,40 +0,0 @@ -base_model: cerebras/Cerebras-GPT-1.3B -model_type: AutoModelForCausalLM -tokenizer_type: AutoTokenizer -load_in_8bit: true -datasets: - - path: data/alpaca_data_gpt4.jsonl - type: alpaca - - path: data/vicuna_cleaned.jsonl - type: sharegpt - - path: data/gpt4-instruct-similarity-0.6-dataset.jsonl - type: gpteacher - - path: data/roleplay-similarity_0.6-instruct-dataset.jsonl - type: gpteacher -dataset_prepared_path: last_run_prepared -val_set_size: 0.05 -adapter: lora -sequence_len: 2048 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - c_attn -lora_fan_in_fan_out: false -wandb_project: pythia-1.4b-lora -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./lora-alpaca -gradient_accumulation_steps: 1 -micro_batch_size: 4 -num_epochs: 5 -learning_rate: 0.0003 -train_on_inputs: false -group_by_length: false -bf16: True -tf32: True -gradient_checkpointing: -early_stopping_patience: -resume_from_checkpoint: -local_rank: diff --git a/configs/galactica_1_3B.yml b/configs/galactica_1_3B.yml deleted file mode 100644 index 2abb4c6b4..000000000 --- a/configs/galactica_1_3B.yml +++ /dev/null @@ -1,41 +0,0 @@ -base_model: facebook/galactica-1.3b -model_type: AutoModelForCausalLM -tokenizer_type: AutoTokenizer -load_in_8bit: false -datasets: - - path: tatsu-lab/alpaca - type: alpaca -dataset_prepared_path: last_run_prepared -val_set_size: 0.1 -adapter: -lora_model_dir: -sequence_len: 1024 -max_packed_sequence_len: 1024 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -lora_fan_in_fan_out: false -wandb_project: -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./lora-llama-alpaca -gradient_accumulation_steps: 1 -micro_batch_size: 16 -num_epochs: 3 -learning_rate: 0.00003 -train_on_inputs: false -group_by_length: false -bf16: false -tf32: false -early_stopping_patience: -resume_from_checkpoint: -local_rank: -tokens: - pad_token: "[PAD]" - bos_token: "" - eos_token: "" - unk_token: "" diff --git a/configs/gpt_neox_20b.yml b/configs/gpt_neox_20b.yml deleted file mode 100644 index 730afb72c..000000000 --- a/configs/gpt_neox_20b.yml +++ /dev/null @@ -1,39 +0,0 @@ -base_model: EleutherAI/gpt-neox-20b -base_model_ignore_patterns: pytorch* # prefer safetensors -model_type: GPTNeoXForCausalLM -tokenizer_type: AutoTokenizer -load_in_8bit: true -datasets: - - path: nomic-ai/gpt4all-j-prompt-generations - type: alpaca - shards: 4 - shards_index: 0 -dataset_prepared_path: last_run_prepared -val_set_size: 0.05 -adapter: lora -lora_model_dir: -sequence_len: 2048 -max_packed_sequence_len: 2048 -lora_r: 8 -lora_alpha: 32 -lora_dropout: 0.05 -lora_target_modules: - - query_key_value -lora_fan_in_fan_out: true # pythia/GPTNeoX lora specific -wandb_project: gpt4all-neox-20b -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./gpt4all-neox-20b -gradient_accumulation_steps: 1 -micro_batch_size: 4 -num_epochs: 5 -learning_rate: 0.00003 -lr_scheduler: one_cycle -train_on_inputs: false -group_by_length: false -bf16: True -tf32: True -early_stopping_patience: -resume_from_checkpoint: -local_rank: diff --git a/configs/stability_3b.yml b/configs/stability_3b.yml deleted file mode 100644 index 83516a20a..000000000 --- a/configs/stability_3b.yml +++ /dev/null @@ -1,56 +0,0 @@ -base_model: stabilityai/stablelm-base-alpha-3b -base_model_config: stabilityai/stablelm-base-alpha-3b -load_in_8bit: false -datasets: - - path: vicgalle/alpaca-gpt4 - type: alpaca -dataset_prepared_path: last_run_prepared -val_set_size: 0.04 -adapter: -lora_model_dir: -sequence_len: 4096 -max_packed_sequence_len: 4096 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -lora_fan_in_fan_out: false -wandb_project: stable-alpaca-3b -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./stable-alpaca-3b -gradient_accumulation_steps: 1 -micro_batch_size: 1 -num_epochs: 1 -optimizer: adamw_bnb_8bit -torchdistx_path: -lr_scheduler: cosine -learning_rate: 0.0000002 -train_on_inputs: false -group_by_length: false -bf16: true -tf32: true -early_stopping_patience: -resume_from_checkpoint: -local_rank: -logging_steps: 1 -xformers_attention: true -flash_attention: -gptq_groupsize: -gptq_model_v1: -warmup_steps: 100 -eval_steps: 50 -save_steps: 200 -debug: -deepspeed: -weight_decay: 0.01 -fsdp: -fsdp_config: -#tokens: -# pad_token: "[PAD]" -# bos_token: "" -# eos_token: "" -# unk_token: "" diff --git a/examples/cerebras/qlora.yml b/examples/cerebras/qlora.yml new file mode 100644 index 000000000..9340299b9 --- /dev/null +++ b/examples/cerebras/qlora.yml @@ -0,0 +1,60 @@ +base_model: cerebras/Cerebras-GPT-1.3B +base_model_config: cerebras/Cerebras-GPT-1.3B +load_in_8bit: false +load_in_4bit: true +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0.01 +adapter: qlora +lora_model_dir: +sequence_len: 2048 +max_packed_sequence_len: 2048 +lora_r: 16 +lora_alpha: 32 +lora_dropout: 0.05 +lora_target_modules: + - c_fc + - c_attn + - c_proj +lora_target_linear: +lora_fan_in_fan_out: +wandb_project: +wandb_watch: +wandb_run_id: +wandb_log_model: +output_dir: ./qlora-out +batch_size: 4 +micro_batch_size: 4 +num_epochs: 2 +optimizer: paged_adamw_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.0002 +train_on_inputs: false +group_by_length: true +bf16: true +fp16: false +tf32: true +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: true +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 10 +eval_steps: 20 +save_steps: +debug: +deepspeed: +weight_decay: 0.1 +fsdp: +fsdp_config: +special_tokens: + pad_token: "<|endoftext|>"