Merge branch 'main' into flash-optimum

2023-06-12 13:12:15 -04:00
parent c9a149f9e8 93dacba228
commit fd2c9814c9
36 changed files with 461 additions and 1009 deletions
--- a/examples/cerebras/qlora.yml
+++ b/examples/cerebras/qlora.yml
@@ -0,0 +1,60 @@
+base_model: cerebras/Cerebras-GPT-1.3B
+base_model_config: cerebras/Cerebras-GPT-1.3B
+load_in_8bit: false
+load_in_4bit: true
+strict: false
+push_dataset_to_hub:
+datasets:
+  - path: teknium/GPT4-LLM-Cleaned
+    type: alpaca
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.01
+adapter: qlora
+lora_model_dir:
+sequence_len: 2048
+max_packed_sequence_len: 2048
+lora_r: 16
+lora_alpha: 32
+lora_dropout: 0.05
+lora_target_modules:
+  - c_fc
+  - c_attn
+  - c_proj
+lora_target_linear:
+lora_fan_in_fan_out:
+wandb_project:
+wandb_watch:
+wandb_run_id:
+wandb_log_model:
+output_dir: ./qlora-out
+batch_size: 4
+micro_batch_size: 4
+num_epochs: 2
+optimizer: paged_adamw_8bit
+torchdistx_path:
+lr_scheduler: cosine
+learning_rate: 0.0002
+train_on_inputs: false
+group_by_length: true
+bf16: true
+fp16: false
+tf32: true
+gradient_checkpointing: true
+early_stopping_patience:
+resume_from_checkpoint:
+local_rank:
+logging_steps: 1
+xformers_attention: true
+flash_attention:
+gptq_groupsize:
+gptq_model_v1:
+warmup_steps: 10
+eval_steps: 20
+save_steps:
+debug:
+deepspeed:
+weight_decay: 0.1
+fsdp:
+fsdp_config:
+special_tokens:
+  pad_token: "<|endoftext|>"
--- a/examples/falcon/config-7b-lora.yml
+++ b/examples/falcon/config-7b-lora.yml
@@ -23,7 +23,7 @@ lora_dropout: 0.0
 lora_target_modules:
 lora_target_linear: true
 lora_fan_in_fan_out:
-wandb_project: falcon-7b
+wandb_project:
 wandb_watch:
 wandb_run_id:
 wandb_log_model:
--- a/examples/falcon/config-7b.yml
+++ b/examples/falcon/config-7b.yml
@@ -23,7 +23,7 @@ lora_dropout: 0.0
 lora_target_modules:
 lora_target_linear: true
 lora_fan_in_fan_out:
-wandb_project: falcon-7b
+wandb_project:
 wandb_watch:
 wandb_run_id:
 wandb_log_model:
--- a/examples/gptj/qlora.yml
+++ b/examples/gptj/qlora.yml
@@ -0,0 +1,57 @@
+base_model: EleutherAI/gpt-j-6b
+base_model_config: EleutherAI/gpt-j-6b
+load_in_8bit: false
+load_in_4bit: true
+strict: false
+push_dataset_to_hub:
+datasets:
+  - path: teknium/GPT4-LLM-Cleaned
+    type: alpaca
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.01
+adapter: qlora
+lora_model_dir:
+sequence_len: 2048
+max_packed_sequence_len:
+lora_r: 8
+lora_alpha: 32
+lora_dropout: 0.05
+lora_target_modules:
+lora_target_linear: true
+lora_fan_in_fan_out:
+wandb_project:
+wandb_watch:
+wandb_run_id:
+wandb_log_model:
+output_dir: ./qlora-out
+gradient_accumulation_steps: 2
+micro_batch_size: 2
+num_epochs: 2
+optimizer: paged_adamw_8bit
+torchdistx_path:
+lr_scheduler: cosine
+learning_rate: 0.0001
+train_on_inputs: false
+group_by_length: true
+bf16: true
+fp16: false
+tf32: true
+gradient_checkpointing: true
+early_stopping_patience:
+resume_from_checkpoint:
+local_rank:
+logging_steps: 1
+xformers_attention: true
+flash_attention:
+gptq_groupsize:
+gptq_model_v1:
+warmup_steps: 10
+eval_steps: 20
+save_steps:
+debug:
+deepspeed:
+weight_decay: 0.1
+fsdp:
+fsdp_config:
+special_tokens:
+  pad_token: "<|endoftext|>"
--- a/examples/gptq-lora-7b/README.md
+++ b/examples/gptq-lora-7b/README.md
@@ -3,6 +3,6 @@
 This is a good place to start for beginners. This will run on an NVIDIA RTX4090 with no other changes needed.

 ```shell
-accelerate launch scripts/finetune.py examples/4bit-lora-7b/config.yml
+accelerate launch scripts/finetune.py examples/gptq-lora-7b/config.yml

 ```
--- a/examples/jeopardy-bot/config.yml
+++ b/examples/jeopardy-bot/config.yml
@@ -0,0 +1,55 @@
+base_model: huggyllama/llama-7b
+base_model_config: huggyllama/llama-7b
+model_type: LlamaForCausalLM
+tokenizer_type: LlamaTokenizer
+load_in_8bit: false
+datasets:
+  - path: openaccess-ai-collective/jeopardy
+    type: jeopardy
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.02
+adapter:
+lora_model_dir:
+sequence_len: 512
+max_packed_sequence_len:
+lora_r:
+lora_alpha:
+lora_dropout:
+lora_target_modules:
+lora_fan_in_fan_out: false
+wandb_project:
+wandb_watch:
+wandb_run_id:
+wandb_log_model:
+output_dir: ./jeopardy-bot-7b
+gradient_accumulation_steps: 1
+micro_batch_size: 1
+num_epochs: 3
+optimizer: adamw_bnb_8bit
+torchdistx_path:
+lr_scheduler: cosine
+learning_rate: 0.00003
+train_on_inputs: false
+group_by_length: false
+bf16: true
+tf32: true
+early_stopping_patience:
+resume_from_checkpoint:
+local_rank:
+logging_steps: 5
+xformers_attention: true
+flash_attention:
+gptq_groupsize:
+gptq_model_v1:
+warmup_steps: 20
+eval_steps: 110
+save_steps: 660
+debug:
+deepspeed:
+weight_decay: 0.1
+fsdp:
+fsdp_config:
+tokens:
+  bos_token: "<s>"
+  eos_token: "</s>"
+  unk_token: "<unk>"
--- a/examples/openllama-3b/README.md
+++ b/examples/openllama-3b/README.md
@@ -0,0 +1,16 @@
+# openllama-3b
+
+Basic full tune
+```shell
+accelerate launch scripts/finetune.py examples/openllama-3b/config.yml
+```
+
+LoRA
+```shell
+accelerate launch scripts/finetune.py examples/openllama-3b/lora.yml
+```
+
+QLoRA
+```shell
+accelerate launch scripts/finetune.py examples/openllama-3b/qlora.yml
+```
--- a/examples/openllama-3b/config.yml
+++ b/examples/openllama-3b/config.yml
@@ -0,0 +1,61 @@
+base_model: openlm-research/open_llama_3b
+base_model_config: openlm-research/open_llama_3b
+model_type: LlamaForCausalLM
+tokenizer_type: LlamaTokenizer
+load_in_8bit: false
+load_in_4bit: false
+strict: false
+push_dataset_to_hub:
+datasets:
+  - path: teknium/GPT4-LLM-Cleaned
+    type: alpaca
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.02
+adapter:
+lora_model_dir:
+sequence_len: 256
+max_packed_sequence_len:
+lora_r:
+lora_alpha:
+lora_dropout:
+lora_target_modules:
+lora_target_linear:
+lora_fan_in_fan_out:
+wandb_project:
+wandb_watch:
+wandb_run_id:
+wandb_log_model:
+output_dir: ./openllama-out
+batch_size: 16
+micro_batch_size: 4
+num_epochs: 3
+optimizer: adamw_bnb_8bit
+torchdistx_path:
+lr_scheduler: cosine
+learning_rate: 0.0002
+train_on_inputs: false
+group_by_length: false
+bf16: false
+fp16: true
+tf32: false
+gradient_checkpointing: true
+early_stopping_patience:
+resume_from_checkpoint:
+local_rank:
+logging_steps: 1
+xformers_attention: true
+flash_attention:
+gptq_groupsize:
+gptq_model_v1:
+warmup_steps: 10
+eval_steps: 50
+save_steps:
+debug:
+deepspeed:
+weight_decay: 0.0
+fsdp:
+fsdp_config:
+special_tokens:
+  bos_token: "<s>"
+  eos_token: "</s>"
+  unk_token: "<unk>"
--- a/examples/lora-openllama-3b/config.yml
+++ b/examples/lora-openllama-3b/config.yml
@@ -1,5 +1,5 @@
-base_model: openlm-research/open_llama_3b_600bt_preview
-base_model_config: openlm-research/open_llama_3b_600bt_preview
+base_model: openlm-research/open_llama_3b
+base_model_config: openlm-research/open_llama_3b
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 load_in_8bit: true
@@ -49,7 +49,7 @@ early_stopping_patience:
 resume_from_checkpoint:
 local_rank:
 logging_steps: 1
-xformers_attention:
+xformers_attention: true
 flash_attention:
 gptq_groupsize:
 gptq_model_v1:
--- a/examples/qlora-openllama-3b/config.yml
+++ b/examples/qlora-openllama-3b/config.yml
@@ -1,5 +1,5 @@
-base_model: openlm-research/open_llama_3b_600bt_preview
-base_model_config: openlm-research/open_llama_3b_600bt_preview
+base_model: openlm-research/open_llama_3b
+base_model_config: openlm-research/open_llama_3b
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 load_in_8bit: false
--- a/examples/pythia/lora.yml
+++ b/examples/pythia/lora.yml
@@ -0,0 +1,37 @@
+base_model: EleutherAI/pythia-1.4b-deduped
+base_model_config: EleutherAI/pythia-1.4b-deduped
+load_in_8bit: true
+datasets:
+  - path: teknium/GPT4-LLM-Cleaned
+    type: alpaca
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.05
+adapter: lora
+lora_model_dir:
+sequence_len: 512
+lora_r: 16
+lora_alpha: 32
+lora_dropout: 0.05
+lora_target_modules:
+  - query_key_value
+lora_target_linear:
+lora_fan_in_fan_out: true  # pythia/GPTNeoX lora specific
+wandb_project:
+wandb_watch:
+wandb_run_id:
+wandb_log_model:
+output_dir: ./lora-alpaca-pythia
+gradient_accumulation_steps: 1
+micro_batch_size: 4
+num_epochs: 3
+learning_rate: 0.00001
+train_on_inputs: false
+group_by_length: false
+bf16: True
+tf32: True
+early_stopping_patience:
+resume_from_checkpoint:
+local_rank:
+weight_decay: 0.1
+eval_steps: 20
+logging_steps: 1
--- a/examples/qlora-openllama-3b/README.md
+++ b/examples/qlora-openllama-3b/README.md
@@ -1,6 +0,0 @@
-# qlora-openllama-3b
-
-```shell
-accelerate launch scripts/finetune.py examples/qlora-openllama-3b/config.yml
-
-```