From c530e4b9c877815b8f23a730014a10b81e206cdf Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 10:09:05 -0400 Subject: [PATCH 1/8] more config pruning and migrating --- configs/llama_7B_alpaca.yml | 41 --------- configs/sample.yml | 87 ------------------- examples/gptj-qlora/config.yml | 57 ++++++++++++ .../jeopardy-bot/config.yml | 25 +++--- 4 files changed, 68 insertions(+), 142 deletions(-) delete mode 100644 configs/llama_7B_alpaca.yml delete mode 100644 configs/sample.yml create mode 100644 examples/gptj-qlora/config.yml rename configs/llama_7B_jeopardy.yml => examples/jeopardy-bot/config.yml (75%) diff --git a/configs/llama_7B_alpaca.yml b/configs/llama_7B_alpaca.yml deleted file mode 100644 index 7db2f65aa..000000000 --- a/configs/llama_7B_alpaca.yml +++ /dev/null @@ -1,41 +0,0 @@ -base_model: huggyllama/llama-7b -model_type: LlamaForCausalLM -tokenizer_type: LlamaTokenizer -load_in_8bit: true -datasets: - - path: data/alpaca_data_gpt4.jsonl - type: alpaca - - path: data/vicuna_cleaned.jsonl - type: sharegpt - - path: data/gpt4-instruct-similarity-0.6-dataset.jsonl - type: gpteacher - - path: data/roleplay-similarity_0.6-instruct-dataset.jsonl - type: gpteacher -dataset_prepared_path: last_run_prepared -val_set_size: 0.04 -adapter: lora -lora_model_dir: -sequence_len: 2048 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -lora_fan_in_fan_out: false -wandb_project: llama-7b-lora -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./lora-llama-alpaca -gradient_accumulation_steps: 1 -micro_batch_size: 16 -num_epochs: 5 -learning_rate: 0.00003 -train_on_inputs: false -group_by_length: false -bf16: true -tf32: true -early_stopping_patience: -resume_from_checkpoint: -local_rank: diff --git a/configs/sample.yml b/configs/sample.yml deleted file mode 100644 index ddd95cb55..000000000 --- a/configs/sample.yml +++ /dev/null @@ -1,87 +0,0 @@ -# this is the huggingface model that contains *.pt, *.safetensors, or *.bin files -# this can also be a relative path to a model on disk -base_model: decapoda-research/llama-7b-hf-int4 -# you can specify an ignore pattern if the model repo contains more than 1 model type (*.pt, etc) -base_model_ignore_patterns: -# if the base_model repo on hf hub doesn't include configuration .json files, -# you can set that here, or leave this empty to default to base_model -base_model_config: decapoda-research/llama-7b-hf -# If you want to specify the type of model to load, AutoModelForCausalLM is a good choice too -model_type: AutoModelForCausalLM -# Corresponding tokenizer for the model AutoTokenizer is a good choice -tokenizer_type: AutoTokenizer -# whether you are training a 4-bit quantized model -load_4bit: true -# this will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer -load_in_8bit: true -# a list of one or more datasets to finetune the model with -datasets: - # this can be either a hf dataset, or relative path - - path: vicgalle/alpaca-gpt4 - # The type of prompt to use for training. [alpaca, sharegpt, gpteacher, oasst, reflection] - type: alpaca -# axolotl attempts to save the dataset as an arrow after packing the data together so -# subsequent training attempts load faster, relative path -dataset_prepared_path: data/last_run_prepared -# How much of the dataset to set aside as evaluation. 1 = 100%, 0.50 = 50%, etc -val_set_size: 0.04 -# if you want to use lora, leave blank to train all parameters in original model -adapter: lora -# if you already have a lora model trained that you want to load, put that here -lora_model_dir: -# the maximum length of an input to train with, this should typically be less than 2048 -# as most models have a token/context limit of 2048 -sequence_len: 2048 -# max sequence length to concatenate training samples together up to -# inspired by StackLLaMA. see https://huggingface.co/blog/stackllama#supervised-fine-tuning -max_packed_sequence_len: 1024 -# lora hyperparameters -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -# - k_proj -# - o_proj -lora_fan_in_fan_out: false -# wandb configuration if your're using it -wandb_project: -wandb_watch: -wandb_run_id: -wandb_log_model: -# where to save the finsihed model to -output_dir: ./completed-model -# training hyperparameters -gradient_accumulation_steps: 1 -batch_size: -micro_batch_size: 2 -num_epochs: 3 -warmup_steps: 100 -learning_rate: 0.00003 -# whether to mask out or include the human's prompt from the training labels -train_on_inputs: false -# don't use this, leads to wonky training (according to someone on the internet) -group_by_length: false -# Use CUDA bf16 -bf16: true -# Use CUDA tf32 -tf32: true -# does not work with current implementation of 4-bit LoRA -gradient_checkpointing: false -# stop training after this many evaluation losses have increased in a row -# https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback -early_stopping_patience: 3 -# specify a scheduler to use with the optimizer. only one_cycle is supported currently -lr_scheduler: -# whether to use xformers attention patch https://github.com/facebookresearch/xformers: -xformers_attention: -# whether to use flash attention patch https://github.com/HazyResearch/flash-attention: -flash_attention: -# resume from a specific checkpoint dir -resume_from_checkpoint: -# if resume_from_checkpoint isn't set and you simply want it to start where it left off -# be careful with this being turned on between different models -auto_resume_from_checkpoints: false -# don't mess with this, it's here for accelerate and torchrun -local_rank: diff --git a/examples/gptj-qlora/config.yml b/examples/gptj-qlora/config.yml new file mode 100644 index 000000000..858c14862 --- /dev/null +++ b/examples/gptj-qlora/config.yml @@ -0,0 +1,57 @@ +base_model: EleutherAI/gpt-j-6b +base_model_config: EleutherAI/gpt-j-6b +load_in_8bit: false +load_in_4bit: true +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0.01 +adapter: qlora +lora_model_dir: +sequence_len: 2048 +max_packed_sequence_len: +lora_r: 8 +lora_alpha: 32 +lora_dropout: 0.05 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: +wandb_project: +wandb_watch: +wandb_run_id: +wandb_log_model: +output_dir: ./qlora-out +gradient_accumulation_steps: 2 +micro_batch_size: 2 +num_epochs: 2 +optimizer: paged_adamw_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.0001 +train_on_inputs: false +group_by_length: true +bf16: true +fp16: false +tf32: true +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: true +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 10 +eval_steps: 20 +save_steps: +debug: +deepspeed: +weight_decay: 0.1 +fsdp: +fsdp_config: +special_tokens: + pad_token: "<|endoftext|>" diff --git a/configs/llama_7B_jeopardy.yml b/examples/jeopardy-bot/config.yml similarity index 75% rename from configs/llama_7B_jeopardy.yml rename to examples/jeopardy-bot/config.yml index 287d6d6ab..b803c6074 100644 --- a/configs/llama_7B_jeopardy.yml +++ b/examples/jeopardy-bot/config.yml @@ -7,30 +7,28 @@ datasets: - path: openaccess-ai-collective/jeopardy type: jeopardy dataset_prepared_path: last_run_prepared -val_set_size: 0.01 +val_set_size: 0.02 adapter: lora_model_dir: -sequence_len: 2048 -max_packed_sequence_len: 2048 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 +sequence_len: 512 +max_packed_sequence_len: +lora_r: +lora_alpha: +lora_dropout: lora_target_modules: - - q_proj - - v_proj lora_fan_in_fan_out: false -wandb_project: jeopardy-bot-7b +wandb_project: wandb_watch: wandb_run_id: wandb_log_model: output_dir: ./jeopardy-bot-7b -gradient_accumulation_steps: 2 +gradient_accumulation_steps: 1 micro_batch_size: 1 -num_epochs: 2 +num_epochs: 3 optimizer: adamw_bnb_8bit torchdistx_path: lr_scheduler: cosine -learning_rate: 0.0000002 +learning_rate: 0.00003 train_on_inputs: false group_by_length: false bf16: true @@ -48,11 +46,10 @@ eval_steps: 110 save_steps: 660 debug: deepspeed: -weight_decay: 0.0001 +weight_decay: 0.1 fsdp: fsdp_config: tokens: - pad_token: "[PAD]" bos_token: "" eos_token: "" unk_token: "" From effbbf6dd13b564dcbafbbf155557bb43f76359a Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 10:38:24 -0400 Subject: [PATCH 2/8] more pruning --- configs/cerebras_1_3B_alpaca.yml | 40 --------------------- configs/galactica_1_3B.yml | 41 ---------------------- configs/gpt_neox_20b.yml | 39 --------------------- configs/stability_3b.yml | 56 ----------------------------- examples/cerebras/qlora.yml | 60 ++++++++++++++++++++++++++++++++ 5 files changed, 60 insertions(+), 176 deletions(-) delete mode 100644 configs/cerebras_1_3B_alpaca.yml delete mode 100644 configs/galactica_1_3B.yml delete mode 100644 configs/gpt_neox_20b.yml delete mode 100644 configs/stability_3b.yml create mode 100644 examples/cerebras/qlora.yml diff --git a/configs/cerebras_1_3B_alpaca.yml b/configs/cerebras_1_3B_alpaca.yml deleted file mode 100644 index 958bf4c5a..000000000 --- a/configs/cerebras_1_3B_alpaca.yml +++ /dev/null @@ -1,40 +0,0 @@ -base_model: cerebras/Cerebras-GPT-1.3B -model_type: AutoModelForCausalLM -tokenizer_type: AutoTokenizer -load_in_8bit: true -datasets: - - path: data/alpaca_data_gpt4.jsonl - type: alpaca - - path: data/vicuna_cleaned.jsonl - type: sharegpt - - path: data/gpt4-instruct-similarity-0.6-dataset.jsonl - type: gpteacher - - path: data/roleplay-similarity_0.6-instruct-dataset.jsonl - type: gpteacher -dataset_prepared_path: last_run_prepared -val_set_size: 0.05 -adapter: lora -sequence_len: 2048 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - c_attn -lora_fan_in_fan_out: false -wandb_project: pythia-1.4b-lora -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./lora-alpaca -gradient_accumulation_steps: 1 -micro_batch_size: 4 -num_epochs: 5 -learning_rate: 0.0003 -train_on_inputs: false -group_by_length: false -bf16: True -tf32: True -gradient_checkpointing: -early_stopping_patience: -resume_from_checkpoint: -local_rank: diff --git a/configs/galactica_1_3B.yml b/configs/galactica_1_3B.yml deleted file mode 100644 index 2abb4c6b4..000000000 --- a/configs/galactica_1_3B.yml +++ /dev/null @@ -1,41 +0,0 @@ -base_model: facebook/galactica-1.3b -model_type: AutoModelForCausalLM -tokenizer_type: AutoTokenizer -load_in_8bit: false -datasets: - - path: tatsu-lab/alpaca - type: alpaca -dataset_prepared_path: last_run_prepared -val_set_size: 0.1 -adapter: -lora_model_dir: -sequence_len: 1024 -max_packed_sequence_len: 1024 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -lora_fan_in_fan_out: false -wandb_project: -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./lora-llama-alpaca -gradient_accumulation_steps: 1 -micro_batch_size: 16 -num_epochs: 3 -learning_rate: 0.00003 -train_on_inputs: false -group_by_length: false -bf16: false -tf32: false -early_stopping_patience: -resume_from_checkpoint: -local_rank: -tokens: - pad_token: "[PAD]" - bos_token: "" - eos_token: "" - unk_token: "" diff --git a/configs/gpt_neox_20b.yml b/configs/gpt_neox_20b.yml deleted file mode 100644 index 730afb72c..000000000 --- a/configs/gpt_neox_20b.yml +++ /dev/null @@ -1,39 +0,0 @@ -base_model: EleutherAI/gpt-neox-20b -base_model_ignore_patterns: pytorch* # prefer safetensors -model_type: GPTNeoXForCausalLM -tokenizer_type: AutoTokenizer -load_in_8bit: true -datasets: - - path: nomic-ai/gpt4all-j-prompt-generations - type: alpaca - shards: 4 - shards_index: 0 -dataset_prepared_path: last_run_prepared -val_set_size: 0.05 -adapter: lora -lora_model_dir: -sequence_len: 2048 -max_packed_sequence_len: 2048 -lora_r: 8 -lora_alpha: 32 -lora_dropout: 0.05 -lora_target_modules: - - query_key_value -lora_fan_in_fan_out: true # pythia/GPTNeoX lora specific -wandb_project: gpt4all-neox-20b -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./gpt4all-neox-20b -gradient_accumulation_steps: 1 -micro_batch_size: 4 -num_epochs: 5 -learning_rate: 0.00003 -lr_scheduler: one_cycle -train_on_inputs: false -group_by_length: false -bf16: True -tf32: True -early_stopping_patience: -resume_from_checkpoint: -local_rank: diff --git a/configs/stability_3b.yml b/configs/stability_3b.yml deleted file mode 100644 index 83516a20a..000000000 --- a/configs/stability_3b.yml +++ /dev/null @@ -1,56 +0,0 @@ -base_model: stabilityai/stablelm-base-alpha-3b -base_model_config: stabilityai/stablelm-base-alpha-3b -load_in_8bit: false -datasets: - - path: vicgalle/alpaca-gpt4 - type: alpaca -dataset_prepared_path: last_run_prepared -val_set_size: 0.04 -adapter: -lora_model_dir: -sequence_len: 4096 -max_packed_sequence_len: 4096 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -lora_fan_in_fan_out: false -wandb_project: stable-alpaca-3b -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./stable-alpaca-3b -gradient_accumulation_steps: 1 -micro_batch_size: 1 -num_epochs: 1 -optimizer: adamw_bnb_8bit -torchdistx_path: -lr_scheduler: cosine -learning_rate: 0.0000002 -train_on_inputs: false -group_by_length: false -bf16: true -tf32: true -early_stopping_patience: -resume_from_checkpoint: -local_rank: -logging_steps: 1 -xformers_attention: true -flash_attention: -gptq_groupsize: -gptq_model_v1: -warmup_steps: 100 -eval_steps: 50 -save_steps: 200 -debug: -deepspeed: -weight_decay: 0.01 -fsdp: -fsdp_config: -#tokens: -# pad_token: "[PAD]" -# bos_token: "" -# eos_token: "" -# unk_token: "" diff --git a/examples/cerebras/qlora.yml b/examples/cerebras/qlora.yml new file mode 100644 index 000000000..9340299b9 --- /dev/null +++ b/examples/cerebras/qlora.yml @@ -0,0 +1,60 @@ +base_model: cerebras/Cerebras-GPT-1.3B +base_model_config: cerebras/Cerebras-GPT-1.3B +load_in_8bit: false +load_in_4bit: true +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0.01 +adapter: qlora +lora_model_dir: +sequence_len: 2048 +max_packed_sequence_len: 2048 +lora_r: 16 +lora_alpha: 32 +lora_dropout: 0.05 +lora_target_modules: + - c_fc + - c_attn + - c_proj +lora_target_linear: +lora_fan_in_fan_out: +wandb_project: +wandb_watch: +wandb_run_id: +wandb_log_model: +output_dir: ./qlora-out +batch_size: 4 +micro_batch_size: 4 +num_epochs: 2 +optimizer: paged_adamw_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.0002 +train_on_inputs: false +group_by_length: true +bf16: true +fp16: false +tf32: true +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: true +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 10 +eval_steps: 20 +save_steps: +debug: +deepspeed: +weight_decay: 0.1 +fsdp: +fsdp_config: +special_tokens: + pad_token: "<|endoftext|>" From a43bae9ff06d927083f60e76caaae9338d43b613 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 10:44:03 -0400 Subject: [PATCH 3/8] update the support matrix --- README.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 180d97932..1c1e1f65a 100644 --- a/README.md +++ b/README.md @@ -16,13 +16,14 @@ ## Axolotl supports -| | fp16/fp32 | fp16/fp32 w/ lora | qlora | 4bit-quant | 4bit-quant w/flash attention | flash attention | xformers attention | -|---------|:----------|:------------------|------|------------|------------------------------|-----------------|--------------------| -| llama | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Pythia | ✅ | ✅ | ❓ | ❌ | ❌ | ❌ | ❓ | -| cerebras | ✅ | ✅ | ❓ | ❌ | ❌ | ❌ | ❓ | -| mpt | ✅ | ❌ | ❓ | ❌ | ❌ | ❌ | ❓ | -| falcon | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❓ | +| | fp16/fp32 | fp16/fp32 w/ lora | qlora | 4bit-quant | 4bit-quant w/flash attention | flash attention | xformers attention | +|----------|:----------|:------------------|------|------------|------------------------------|----------------|-----------------| +| llama | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| Pythia | ✅ | ✅ | ❓ | ❌ | ❌ | ❌ | ❓ | +| cerebras | ✅ | ✅ | ❓ | ❌ | ❌ | ❌ | ✅ | +| mpt | ✅ | ❌ | ❓ | ❌ | ❌ | ❌ | ❓ | +| falcon | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | +| gpt-j | ✅ | ✅ | ✅ | ❌ | ❌ | ❓ | ✅ | ## Quickstart ⚡ From 280832cec2e8c32aecfa525a161b6932ee4078f9 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 10:52:36 -0400 Subject: [PATCH 4/8] more matrix updates --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1c1e1f65a..0edeff447 100644 --- a/README.md +++ b/README.md @@ -17,13 +17,13 @@ ## Axolotl supports | | fp16/fp32 | fp16/fp32 w/ lora | qlora | 4bit-quant | 4bit-quant w/flash attention | flash attention | xformers attention | -|----------|:----------|:------------------|------|------------|------------------------------|----------------|-----------------| -| llama | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Pythia | ✅ | ✅ | ❓ | ❌ | ❌ | ❌ | ❓ | -| cerebras | ✅ | ✅ | ❓ | ❌ | ❌ | ❌ | ✅ | -| mpt | ✅ | ❌ | ❓ | ❌ | ❌ | ❌ | ❓ | -| falcon | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | -| gpt-j | ✅ | ✅ | ✅ | ❌ | ❌ | ❓ | ✅ | +|----------|:----------|:------------------|-------|------------|------------------------------|-----------------|--------------------| +| llama | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| Pythia | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❓ | +| cerebras | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | +| mpt | ✅ | ❌ | ❓ | ❌ | ❌ | ❌ | ❓ | +| falcon | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | +| gpt-j | ✅ | ✅ | ✅ | ❌ | ❌ | ❓ | ✅ | ## Quickstart ⚡ From a6ebf57e827ff1d9c41238bf606ce7c3f7338f98 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 10:55:32 -0400 Subject: [PATCH 5/8] fix table formatting --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 0edeff447..8d201e739 100644 --- a/README.md +++ b/README.md @@ -16,14 +16,14 @@ ## Axolotl supports -| | fp16/fp32 | fp16/fp32 w/ lora | qlora | 4bit-quant | 4bit-quant w/flash attention | flash attention | xformers attention | -|----------|:----------|:------------------|-------|------------|------------------------------|-----------------|--------------------| -| llama | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Pythia | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❓ | -| cerebras | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | -| mpt | ✅ | ❌ | ❓ | ❌ | ❌ | ❌ | ❓ | -| falcon | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | -| gpt-j | ✅ | ✅ | ✅ | ❌ | ❌ | ❓ | ✅ | +| | fp16/fp32 | fp16/fp32 w/ lora | qlora | gptq | gptq w/ lora | gptq w/flash attention | flash attention | xformers attention | +|----------|:----------|:------------------|-------|------|:-------------|------------------------|-----------------|--------------------| +| llama | ✅ | ✅ | ✅ | ✅ | ❓ | ✅ | ✅ | ✅ | +| Pythia | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ❓ | +| cerebras | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ✅ | +| mpt | ✅ | ❌ | ❓ | ❌ | ❓ | ❌ | ❌ | ❓ | +| falcon | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ✅ | +| gpt-j | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❓ | ✅ | ## Quickstart ⚡ From d0d7eaa4f347c9fc6ba267d1966b83ea6f048a96 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 11:03:31 -0400 Subject: [PATCH 6/8] update openllama and clean up paths --- README.md | 16 ++++++++-------- .../{gptj-qlora/config.yml => gptj/qlora.yml} | 0 examples/openllama-3b/README.md | 16 ++++++++++++++++ .../config.yml => openllama-3b/lora.yml} | 4 ++-- .../config.yml => openllama-3b/qlora.yml} | 4 ++-- examples/qlora-openllama-3b/README.md | 6 ------ 6 files changed, 28 insertions(+), 18 deletions(-) rename examples/{gptj-qlora/config.yml => gptj/qlora.yml} (100%) create mode 100644 examples/openllama-3b/README.md rename examples/{lora-openllama-3b/config.yml => openllama-3b/lora.yml} (90%) rename examples/{qlora-openllama-3b/config.yml => openllama-3b/qlora.yml} (90%) delete mode 100644 examples/qlora-openllama-3b/README.md diff --git a/README.md b/README.md index 8d201e739..a31eee5fb 100644 --- a/README.md +++ b/README.md @@ -16,14 +16,14 @@ ## Axolotl supports -| | fp16/fp32 | fp16/fp32 w/ lora | qlora | gptq | gptq w/ lora | gptq w/flash attention | flash attention | xformers attention | -|----------|:----------|:------------------|-------|------|:-------------|------------------------|-----------------|--------------------| -| llama | ✅ | ✅ | ✅ | ✅ | ❓ | ✅ | ✅ | ✅ | -| Pythia | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ❓ | -| cerebras | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ✅ | -| mpt | ✅ | ❌ | ❓ | ❌ | ❓ | ❌ | ❌ | ❓ | -| falcon | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ✅ | -| gpt-j | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❓ | ✅ | +| | fp16/fp32 | lora | qlora | gptq | gptq w/ lora | gptq w/flash attn | flash attn | xformers attn | +|----------|:----------|:-----|-------|------|:-------------|-------------------|------------|---------------| +| llama | ✅ | ✅ | ✅ | ✅ | ❓ | ✅ | ✅ | ✅ | +| Pythia | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ❓ | +| cerebras | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ✅ | +| mpt | ✅ | ❌ | ❓ | ❌ | ❓ | ❌ | ❌ | ❓ | +| falcon | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ✅ | +| gpt-j | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❓ | ✅ | ## Quickstart ⚡ diff --git a/examples/gptj-qlora/config.yml b/examples/gptj/qlora.yml similarity index 100% rename from examples/gptj-qlora/config.yml rename to examples/gptj/qlora.yml diff --git a/examples/openllama-3b/README.md b/examples/openllama-3b/README.md new file mode 100644 index 000000000..9e8f3a9e8 --- /dev/null +++ b/examples/openllama-3b/README.md @@ -0,0 +1,16 @@ +# openllama-3b + +Basic full tune +```shell +accelerate launch scripts/finetune.py examples/qlora-openllama-3b/config.yml +``` + +LoRA +```shell +accelerate launch scripts/finetune.py examples/qlora-openllama-3b/lora.yml +``` + +QLoRA +```shell +accelerate launch scripts/finetune.py examples/qlora-openllama-3b/qlora.yml +``` diff --git a/examples/lora-openllama-3b/config.yml b/examples/openllama-3b/lora.yml similarity index 90% rename from examples/lora-openllama-3b/config.yml rename to examples/openllama-3b/lora.yml index 2e1644546..98e2c2adc 100644 --- a/examples/lora-openllama-3b/config.yml +++ b/examples/openllama-3b/lora.yml @@ -1,5 +1,5 @@ -base_model: openlm-research/open_llama_3b_600bt_preview -base_model_config: openlm-research/open_llama_3b_600bt_preview +base_model: openlm-research/open_llama_3b +base_model_config: openlm-research/open_llama_3b model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer load_in_8bit: true diff --git a/examples/qlora-openllama-3b/config.yml b/examples/openllama-3b/qlora.yml similarity index 90% rename from examples/qlora-openllama-3b/config.yml rename to examples/openllama-3b/qlora.yml index 87e1dfd94..83ae31f91 100644 --- a/examples/qlora-openllama-3b/config.yml +++ b/examples/openllama-3b/qlora.yml @@ -1,5 +1,5 @@ -base_model: openlm-research/open_llama_3b_600bt_preview -base_model_config: openlm-research/open_llama_3b_600bt_preview +base_model: openlm-research/open_llama_3b +base_model_config: openlm-research/open_llama_3b model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer load_in_8bit: false diff --git a/examples/qlora-openllama-3b/README.md b/examples/qlora-openllama-3b/README.md deleted file mode 100644 index d79ea7f3f..000000000 --- a/examples/qlora-openllama-3b/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# qlora-openllama-3b - -```shell -accelerate launch scripts/finetune.py examples/qlora-openllama-3b/config.yml - -``` From 336aa3fd487a8c35b7637fbbddac81a67d078a41 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 11:05:29 -0400 Subject: [PATCH 7/8] gptq lora llama is obviously good --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a31eee5fb..349dd370a 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ | | fp16/fp32 | lora | qlora | gptq | gptq w/ lora | gptq w/flash attn | flash attn | xformers attn | |----------|:----------|:-----|-------|------|:-------------|-------------------|------------|---------------| -| llama | ✅ | ✅ | ✅ | ✅ | ❓ | ✅ | ✅ | ✅ | +| llama | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Pythia | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ❓ | | cerebras | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ✅ | | mpt | ✅ | ❌ | ❓ | ❌ | ❓ | ❌ | ❌ | ❓ | From 6b3f509d9e14e58369a7c4322de78a46a90924ae Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 11:50:12 -0400 Subject: [PATCH 8/8] forgot to add this file --- examples/openllama-3b/config.yml | 61 ++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 examples/openllama-3b/config.yml diff --git a/examples/openllama-3b/config.yml b/examples/openllama-3b/config.yml new file mode 100644 index 000000000..248b740ff --- /dev/null +++ b/examples/openllama-3b/config.yml @@ -0,0 +1,61 @@ +base_model: openlm-research/open_llama_3b +base_model_config: openlm-research/open_llama_3b +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer +load_in_8bit: false +load_in_4bit: false +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0.02 +adapter: +lora_model_dir: +sequence_len: 256 +max_packed_sequence_len: +lora_r: +lora_alpha: +lora_dropout: +lora_target_modules: +lora_target_linear: +lora_fan_in_fan_out: +wandb_project: +wandb_watch: +wandb_run_id: +wandb_log_model: +output_dir: ./lora-out +batch_size: 16 +micro_batch_size: 4 +num_epochs: 3 +optimizer: adamw_bnb_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.0002 +train_on_inputs: false +group_by_length: false +bf16: false +fp16: true +tf32: false +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 10 +eval_steps: 50 +save_steps: +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: ""