From 193c73bce040fe965f5ea66d235e8823bd19e5e7 Mon Sep 17 00:00:00 2001 From: Angainor Development <54739135+AngainorDev@users.noreply.github.com> Date: Thu, 8 Jun 2023 09:18:58 +0200 Subject: [PATCH 01/24] Fix training over existing lora When training with Lora, and starting with an existing lora weights, current code produces a model with 0 trainable params and training can't work. Adding the "is_trainable" param allows the loaded peft to be trained and fixes the bug. --- src/axolotl/utils/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 58e0e97ec..b5d5124cb 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -402,6 +402,7 @@ def load_lora(model, cfg): model = PeftModel.from_pretrained( model, cfg.lora_model_dir, + is_trainable=True, device_map=cfg.device_map, # torch_dtype=torch.float16, ) From 813cfa4c14f990c53ed42e9decd84b3e41a91102 Mon Sep 17 00:00:00 2001 From: Angainor Development <54739135+AngainorDev@users.noreply.github.com> Date: Fri, 9 Jun 2023 08:49:32 +0200 Subject: [PATCH 02/24] WIP: Rely on cfg.inference --- src/axolotl/utils/models.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index b5d5124cb..c3f988e52 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -80,8 +80,7 @@ def load_model( model_type, tokenizer, cfg, - adapter="lora", - inference=False, + adapter="lora" ): # type: (str, str, str, str, DictDefault, Optional[str], bool) -> Tuple[PreTrainedModel, Optional[PeftConfig]] """ @@ -95,7 +94,7 @@ def load_model( ) if is_llama_derived_model and cfg.flash_attention: - if cfg.device not in ["mps", "cpu"] and inference is False: + if cfg.device not in ["mps", "cpu"] and cfg.inference is False: from axolotl.flash_attn import replace_llama_attn_with_flash_attn logging.info("patching with flash attention") @@ -402,7 +401,7 @@ def load_lora(model, cfg): model = PeftModel.from_pretrained( model, cfg.lora_model_dir, - is_trainable=True, + is_trainable=not cfg.inference, device_map=cfg.device_map, # torch_dtype=torch.float16, ) From bd3b53734459e0ace9795579150a3eff0ff4eaeb Mon Sep 17 00:00:00 2001 From: Angainor Development <54739135+AngainorDev@users.noreply.github.com> Date: Fri, 9 Jun 2023 08:59:05 +0200 Subject: [PATCH 03/24] Feed cfg.inference --- scripts/finetune.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/scripts/finetune.py b/scripts/finetune.py index 7c4d865fa..ab8f068aa 100644 --- a/scripts/finetune.py +++ b/scripts/finetune.py @@ -182,6 +182,9 @@ def train( if cfg.bf16: cfg.fp16 = True cfg.bf16 = False + + # Store inference mode into cfg when passed via args + cfg.inference = True if "inference" in kwargs else cfg.get("inference", False) # load the tokenizer first tokenizer_config = cfg.tokenizer_config or cfg.base_model_config @@ -189,8 +192,8 @@ def train( tokenizer = load_tokenizer(tokenizer_config, cfg.tokenizer_type, cfg) if check_not_in( - ["inference", "shard", "merge_lora"], kwargs - ): # don't need to load dataset for these + ["shard", "merge_lora"], kwargs + ) and not cfg.inference: # don't need to load dataset for these train_dataset, eval_dataset = load_prepare_datasets( tokenizer, cfg, DEFAULT_DATASET_PREPARED_PATH ) @@ -216,8 +219,7 @@ def train( cfg.model_type, tokenizer, cfg, - adapter=cfg.adapter, - inference=("inference" in kwargs), + adapter=cfg.adapter ) if "merge_lora" in kwargs and cfg.adapter is not None: @@ -230,7 +232,7 @@ def train( model.save_pretrained(str(Path(cfg.output_dir) / "merged")) return - if "inference" in kwargs: + if cfg.inference: logging.info("calling do_inference function") do_inference(cfg, model, tokenizer) return From c2508987a6354084bf8bbf328c8eccc81e3a9814 Mon Sep 17 00:00:00 2001 From: Angainor Development <54739135+AngainorDev@users.noreply.github.com> Date: Sat, 10 Jun 2023 19:06:10 +0200 Subject: [PATCH 04/24] Remove explicit definition of cfg.inference --- scripts/finetune.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/finetune.py b/scripts/finetune.py index ab8f068aa..faf1bb31d 100644 --- a/scripts/finetune.py +++ b/scripts/finetune.py @@ -182,9 +182,6 @@ def train( if cfg.bf16: cfg.fp16 = True cfg.bf16 = False - - # Store inference mode into cfg when passed via args - cfg.inference = True if "inference" in kwargs else cfg.get("inference", False) # load the tokenizer first tokenizer_config = cfg.tokenizer_config or cfg.base_model_config From a808bf913f79fa29596e283a0bb70954caac0645 Mon Sep 17 00:00:00 2001 From: Angainor Development <54739135+AngainorDev@users.noreply.github.com> Date: Sat, 10 Jun 2023 20:28:49 +0200 Subject: [PATCH 05/24] Fix missing cfg. --- src/axolotl/utils/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 7156adec0..67facd607 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -96,7 +96,7 @@ def load_model( ) if cfg.is_llama_derived_model and cfg.flash_attention: - if cfg.device not in ["mps", "cpu"] and inference is False: + if cfg.device not in ["mps", "cpu"] and not cfg.inference: from axolotl.flash_attn import replace_llama_attn_with_flash_attn logging.info("patching with flash attention") From 5ffefee37f351c827759394057a295aa99b7d64f Mon Sep 17 00:00:00 2001 From: Akshay Jain Date: Sat, 10 Jun 2023 18:34:54 -0700 Subject: [PATCH 06/24] Update FAQS.md Update FAQS.md with the following statement Error invalid argument at line 359 in file /workspace/bitsandbytes/csrc/pythonInterface.c /arrow/cpp/src/arrow/filesystem/s3fs.cc:2598: arrow::fs::FinalizeS3 was not called even though S3 was initialized. This could lead to a segmentation fault at exit try reinstalling bitsandbytes and transformers from source --- FAQS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/FAQS.md b/FAQS.md index bdf056be7..906cf2bef 100644 --- a/FAQS.md +++ b/FAQS.md @@ -2,3 +2,6 @@ - Can you train StableLM with this? Yes, but only with a single GPU atm. Multi GPU support is coming soon! Just waiting on this [PR](https://github.com/huggingface/transformers/pull/22874) - Will this work with Deepspeed? That's still a WIP, but setting `export ACCELERATE_USE_DEEPSPEED=true` should work in some cases +- Error invalid argument at line 359 in file /workspace/bitsandbytes/csrc/pythonInterface.c +/arrow/cpp/src/arrow/filesystem/s3fs.cc:2598: arrow::fs::FinalizeS3 was not called even though S3 was initialized. This could lead to a segmentation fault at exit. +
Try reinstalling bitsandbytes and transformers from source
From dd7d16d2ebee1ef21dc8061e8f271ae1d4329858 Mon Sep 17 00:00:00 2001 From: Akshay Jain Date: Sat, 10 Jun 2023 19:15:50 -0700 Subject: [PATCH 07/24] Update FAQS.md Updated FAQS.md with backticks around error message --- FAQS.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/FAQS.md b/FAQS.md index 906cf2bef..28183abc9 100644 --- a/FAQS.md +++ b/FAQS.md @@ -2,6 +2,6 @@ - Can you train StableLM with this? Yes, but only with a single GPU atm. Multi GPU support is coming soon! Just waiting on this [PR](https://github.com/huggingface/transformers/pull/22874) - Will this work with Deepspeed? That's still a WIP, but setting `export ACCELERATE_USE_DEEPSPEED=true` should work in some cases -- Error invalid argument at line 359 in file /workspace/bitsandbytes/csrc/pythonInterface.c -/arrow/cpp/src/arrow/filesystem/s3fs.cc:2598: arrow::fs::FinalizeS3 was not called even though S3 was initialized. This could lead to a segmentation fault at exit. -
Try reinstalling bitsandbytes and transformers from source
+- ```Error invalid argument at line 359 in file /workspace/bitsandbytes/csrc/pythonInterface.c``` +```/arrow/cpp/src/arrow/filesystem/s3fs.cc:2598: arrow::fs::FinalizeS3 was not called even though S3 was initialized.``` +This could lead to a segmentation fault at exit. Try reinstalling bitsandbytes and transformers from source. From 0e664a5ebcb6c09a24cadbc567be5eae5e7bdb42 Mon Sep 17 00:00:00 2001 From: Akshay Jain Date: Sat, 10 Jun 2023 19:26:12 -0700 Subject: [PATCH 08/24] Update FAQS.md Co-authored-by: NanoCode012 --- FAQS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FAQS.md b/FAQS.md index 28183abc9..29222992a 100644 --- a/FAQS.md +++ b/FAQS.md @@ -3,5 +3,5 @@ - Can you train StableLM with this? Yes, but only with a single GPU atm. Multi GPU support is coming soon! Just waiting on this [PR](https://github.com/huggingface/transformers/pull/22874) - Will this work with Deepspeed? That's still a WIP, but setting `export ACCELERATE_USE_DEEPSPEED=true` should work in some cases - ```Error invalid argument at line 359 in file /workspace/bitsandbytes/csrc/pythonInterface.c``` -```/arrow/cpp/src/arrow/filesystem/s3fs.cc:2598: arrow::fs::FinalizeS3 was not called even though S3 was initialized.``` +`/arrow/cpp/src/arrow/filesystem/s3fs.cc:2598: arrow::fs::FinalizeS3 was not called even though S3 was initialized.` This could lead to a segmentation fault at exit. Try reinstalling bitsandbytes and transformers from source. From e3e7b52a5b0549fc6abb75ab6d3aa377d9478924 Mon Sep 17 00:00:00 2001 From: Akshay Jain Date: Sat, 10 Jun 2023 23:36:14 -0700 Subject: [PATCH 09/24] Update FAQS.md Converted (```) to single backtick (') uniformly. --- FAQS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FAQS.md b/FAQS.md index 29222992a..f3c9dd525 100644 --- a/FAQS.md +++ b/FAQS.md @@ -2,6 +2,6 @@ - Can you train StableLM with this? Yes, but only with a single GPU atm. Multi GPU support is coming soon! Just waiting on this [PR](https://github.com/huggingface/transformers/pull/22874) - Will this work with Deepspeed? That's still a WIP, but setting `export ACCELERATE_USE_DEEPSPEED=true` should work in some cases -- ```Error invalid argument at line 359 in file /workspace/bitsandbytes/csrc/pythonInterface.c``` +- `Error invalid argument at line 359 in file /workspace/bitsandbytes/csrc/pythonInterface.c` `/arrow/cpp/src/arrow/filesystem/s3fs.cc:2598: arrow::fs::FinalizeS3 was not called even though S3 was initialized.` This could lead to a segmentation fault at exit. Try reinstalling bitsandbytes and transformers from source. From fe0b76854ec444643481da131228c8d214654f91 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 09:20:40 -0400 Subject: [PATCH 10/24] match up gradient checkpointing when using lora w config --- src/axolotl/utils/models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index fb363952c..b79f116fa 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -305,7 +305,9 @@ def load_model( or (cfg.adapter == "qlora" and cfg.load_in_4bit) ): logging.info("converting PEFT model w/ prepare_model_for_kbit_training") - model = prepare_model_for_kbit_training(model) + model = prepare_model_for_kbit_training( + model, use_gradient_checkpointing=cfg.gradient_checkpointing + ) model, lora_config = load_adapter(model, cfg, adapter) From b565ecf0a1d6bcecbcfa7366dc2ca04983ca0523 Mon Sep 17 00:00:00 2001 From: AngainorDev Date: Sun, 11 Jun 2023 15:23:38 +0200 Subject: [PATCH 11/24] Fix strict and Lint --- scripts/finetune.py | 10 +++++----- src/axolotl/utils/models.py | 9 ++------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/scripts/finetune.py b/scripts/finetune.py index 3222afd81..49bd505ce 100644 --- a/scripts/finetune.py +++ b/scripts/finetune.py @@ -158,7 +158,7 @@ def train( cfg_keys = cfg.keys() for k, _ in kwargs.items(): # if not strict, allow writing to cfg even if it's not in the yml already - if k in cfg_keys or cfg.strict is False: + if k in cfg_keys or not cfg.strict: # handle booleans if isinstance(cfg[k], bool): cfg[k] = bool(kwargs[k]) @@ -198,9 +198,9 @@ def train( logging.info(f"loading tokenizer... {tokenizer_config}") tokenizer = load_tokenizer(tokenizer_config, cfg.tokenizer_type, cfg) - if check_not_in( - ["shard", "merge_lora"], kwargs - ) and not cfg.inference: # don't need to load dataset for these + if ( + check_not_in(["shard", "merge_lora"], kwargs) and not cfg.inference + ): # don't need to load dataset for these train_dataset, eval_dataset = load_prepare_datasets( tokenizer, cfg, DEFAULT_DATASET_PREPARED_PATH ) @@ -226,7 +226,7 @@ def train( cfg.model_type, tokenizer, cfg, - adapter=cfg.adapter + adapter=cfg.adapter, ) if "merge_lora" in kwargs and cfg.adapter is not None: diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 67facd607..3a87392fc 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -77,14 +77,9 @@ def load_tokenizer( def load_model( - base_model, - base_model_config, - model_type, - tokenizer, - cfg, - adapter="lora" + base_model, base_model_config, model_type, tokenizer, cfg, adapter="lora" ): - # type: (str, str, str, AutoTokenizer, DictDefault, Optional[str], bool) -> Tuple[PreTrainedModel, Optional[PeftConfig]] + # type: (str, str, str, AutoTokenizer, DictDefault, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]] """ Load a model from a base model and a model type. """ From 14668fa54ec8c35771d50ff7956cbb6541e81f6a Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 09:26:10 -0400 Subject: [PATCH 12/24] new validation for mpt w grad checkpoints --- src/axolotl/utils/validation.py | 5 +++++ tests/test_validation.py | 14 ++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/axolotl/utils/validation.py b/src/axolotl/utils/validation.py index 04ffc4c1b..e2d0b34b1 100644 --- a/src/axolotl/utils/validation.py +++ b/src/axolotl/utils/validation.py @@ -57,6 +57,11 @@ def validate_config(cfg): if (cfg.base_model and "falcon" in cfg.base_model.lower()) and cfg.fsdp: raise ValueError("FSDP is not supported for falcon models") + if ( + cfg.base_model and "mpt" in cfg.base_model.lower() + ) and cfg.gradient_checkpointing: + raise ValueError("gradient_checkpointing is not supported for MPT models") + # TODO # MPT 7b # https://github.com/facebookresearch/bitsandbytes/issues/25 diff --git a/tests/test_validation.py b/tests/test_validation.py index 50bdf37e6..e28891060 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -198,3 +198,17 @@ class ValidationTest(unittest.TestCase): ) validate_config(cfg) + + def test_mpt_gradient_checkpointing(self): + regex_exp = r".*gradient_checkpointing is not supported for MPT models*" + + # Check for lower-case + cfg = DictDefault( + { + "base_model": "mosaicml/mpt-7b", + "gradient_checkpointing": True, + } + ) + + with pytest.raises(ValueError, match=regex_exp): + validate_config(cfg) From 77762a5d6b5ee73d5acbbb2e94508c23183f7391 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 09:41:41 -0400 Subject: [PATCH 13/24] get rid of some configs, formalize pythioa lora config --- configs/accelerate/default_config.yaml | 15 ------- configs/llama_13B_alpaca.yml | 39 ---------------- configs/llama_65B_alpaca.yml | 44 ------------------ configs/llama_7B_4bit.yml | 45 ------------------- configs/quickstart.yml | 45 ------------------- configs/vicuna_13B_4bit_reflect.yml | 45 ------------------- .../pythia/lora.yml | 26 +++++------ 7 files changed, 11 insertions(+), 248 deletions(-) delete mode 100644 configs/accelerate/default_config.yaml delete mode 100644 configs/llama_13B_alpaca.yml delete mode 100644 configs/llama_65B_alpaca.yml delete mode 100644 configs/llama_7B_4bit.yml delete mode 100644 configs/quickstart.yml delete mode 100644 configs/vicuna_13B_4bit_reflect.yml rename configs/pythia_1_2B_alpaca.yml => examples/pythia/lora.yml (56%) diff --git a/configs/accelerate/default_config.yaml b/configs/accelerate/default_config.yaml deleted file mode 100644 index 9759703af..000000000 --- a/configs/accelerate/default_config.yaml +++ /dev/null @@ -1,15 +0,0 @@ -compute_environment: LOCAL_MACHINE -distributed_type: 'NO' -downcast_bf16: 'no' -gpu_ids: all -machine_rank: 0 -main_training_function: main -mixed_precision: bf16 -num_machines: 1 -num_processes: 1 -rdzv_backend: static -same_network: true -tpu_env: [] -tpu_use_cluster: false -tpu_use_sudo: false -use_cpu: false diff --git a/configs/llama_13B_alpaca.yml b/configs/llama_13B_alpaca.yml deleted file mode 100644 index 99c9883fe..000000000 --- a/configs/llama_13B_alpaca.yml +++ /dev/null @@ -1,39 +0,0 @@ -base_model: huggyllama/llama-13b -model_type: LlamaForCausalLM -tokenizer_type: LlamaTokenizer -load_in_8bit: true -datasets: - - path: anon8231489123/ShareGPT_Vicuna_unfiltered - data_files: ShareGPT_V3_unfiltered_cleaned_split_no_imsorry.json - type: sharegpt -dataset_prepared_path: last_run_prepared -val_set_size: 0.002 -adapter: -lora_model_dir: -sequence_len: 2048 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -lora_fan_in_fan_out: false -wandb_project: -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./llama-13b-sharegpt -gradient_accumulation_steps: 1 -micro_batch_size: 2 -warmup_steps: 1000 -save_steps: -eval_steps: -num_epochs: 5 -learning_rate: 0.00003 -train_on_inputs: false -group_by_length: false -bf16: true -tf32: true -early_stopping_patience: 5 -resume_from_checkpoint: -local_rank: diff --git a/configs/llama_65B_alpaca.yml b/configs/llama_65B_alpaca.yml deleted file mode 100644 index e7d2c211c..000000000 --- a/configs/llama_65B_alpaca.yml +++ /dev/null @@ -1,44 +0,0 @@ -base_model: huggyllama/llama-65b -model_type: LlamaForCausalLM -tokenizer_type: LlamaTokenizer -load_in_8bit: true -datasets: - - path: data/alpaca_data_gpt4.jsonl - type: alpaca - - path: anon8231489123/ShareGPT_Vicuna_unfiltered - data_files: ShareGPT_V3_unfiltered_cleaned_split_no_imsorry.json - type: sharegpt - - path: data/gpt4-instruct-similarity-0.6-dataset.jsonl - type: gpteacher - - path: data/roleplay-similarity_0.6-instruct-dataset.jsonl - type: gpteacher -dataset_prepared_path: last_run_prepared -val_set_size: 0.04 -adapter: lora -lora_model_dir: -sequence_len: 2048 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -lora_fan_in_fan_out: false -wandb_project: llama-65b-lora -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./lora-llama-alpaca -gradient_accumulation_steps: 1 -micro_batch_size: 16 -warmup_steps: 1000 -save_steps: -num_epochs: 5 -learning_rate: 0.00003 -train_on_inputs: false -group_by_length: false -bf16: true -tf32: true -early_stopping_patience: -resume_from_checkpoint: -local_rank: diff --git a/configs/llama_7B_4bit.yml b/configs/llama_7B_4bit.yml deleted file mode 100644 index a7451516c..000000000 --- a/configs/llama_7B_4bit.yml +++ /dev/null @@ -1,45 +0,0 @@ -base_model: decapoda-research/llama-7b-hf-int4 -base_model_config: decapoda-research/llama-7b-hf -model_type: LlamaForCausalLM -tokenizer_type: LlamaTokenizer -load_in_8bit: true -datasets: - - path: tatsu-lab/alpaca # original alpaca dataset - type: alpaca -dataset_prepared_path: data/last_run_prepared -val_set_size: 0.04 -adapter: lora -lora_model_dir: -sequence_len: 2048 -max_packed_sequence_len: 1024 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -# - k_proj -# - o_proj -lora_fan_in_fan_out: false -wandb_project: -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./lora-test -gradient_accumulation_steps: 1 -micro_batch_size: 2 -num_epochs: 3 -warmup_steps: 100 -learning_rate: 0.00003 -train_on_inputs: false -group_by_length: false -bf16: true -tf32: true -gradient_checkpointing: false -early_stopping_patience: 3 -resume_from_checkpoint: -auto_resume_from_checkpoints: true -local_rank: -load_4bit: true -xformers_attention: true -flash_attention: diff --git a/configs/quickstart.yml b/configs/quickstart.yml deleted file mode 100644 index 2362916fc..000000000 --- a/configs/quickstart.yml +++ /dev/null @@ -1,45 +0,0 @@ -base_model: decapoda-research/llama-7b-hf-int4 -base_model_config: decapoda-research/llama-7b-hf -model_type: LlamaForCausalLM -tokenizer_type: LlamaTokenizer -load_in_8bit: true -datasets: - - path: tatsu-lab/alpaca # original alpaca dataset - type: alpaca -dataset_prepared_path: data/last_run_prepared -val_set_size: 0.04 -adapter: lora -lora_model_dir: -sequence_len: 1024 -max_packed_sequence_len: 1024 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -# - k_proj -# - o_proj -lora_fan_in_fan_out: false -wandb_project: -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./lora-test -gradient_accumulation_steps: 1 -micro_batch_size: 1 -num_epochs: 3 -warmup_steps: 100 -learning_rate: 0.00003 -train_on_inputs: false -group_by_length: false -bf16: true -tf32: true -gradient_checkpointing: false -early_stopping_patience: 3 -resume_from_checkpoint: -auto_resume_from_checkpoints: true -local_rank: -gptq: true -xformers_attention: true -flash_attention: diff --git a/configs/vicuna_13B_4bit_reflect.yml b/configs/vicuna_13B_4bit_reflect.yml deleted file mode 100644 index 3e37f5334..000000000 --- a/configs/vicuna_13B_4bit_reflect.yml +++ /dev/null @@ -1,45 +0,0 @@ -base_model: anon8231489123/vicuna-13b-GPTQ-4bit-128g -base_model_config: anon8231489123/vicuna-13b-GPTQ-4bit-128g -model_type: LlamaForCausalLM -tokenizer_type: LlamaTokenizer -load_in_8bit: false -load_4bit: true -gptq_groupsize: 128 -gptq_model_v1: false -datasets: -# https://github.com/vaguenebula/AlpacaDataReflect/blob/main/alpaca_reflect_pruned.json - - path: data/alpaca_reflect_pruned.jsonl - type: reflection -dataset_prepared_path: data/last_run_prepared -val_set_size: 0.04 -adapter: lora -lora_model_dir: -sequence_len: 2048 -max_packed_sequence_len: 2048 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -# - k_proj -# - o_proj -lora_fan_in_fan_out: false -wandb_project: -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./lora-reflect -gradient_accumulation_steps: 1 -micro_batch_size: 2 -num_epochs: 3 -learning_rate: 0.00003 -train_on_inputs: false -group_by_length: false -bf16: true -tf32: true -gradient_checkpointing: false -early_stopping_patience: 3 -resume_from_checkpoint: -local_rank: -flash_attention: true diff --git a/configs/pythia_1_2B_alpaca.yml b/examples/pythia/lora.yml similarity index 56% rename from configs/pythia_1_2B_alpaca.yml rename to examples/pythia/lora.yml index 52ed58cb5..e2b28f218 100644 --- a/configs/pythia_1_2B_alpaca.yml +++ b/examples/pythia/lora.yml @@ -1,36 +1,29 @@ base_model: EleutherAI/pythia-1.4b-deduped -model_type: GPTNeoXForCausalLM -tokenizer_type: AutoTokenizer +base_model_config: EleutherAI/pythia-1.4b-deduped load_in_8bit: true datasets: - - path: data/alpaca_data_gpt4.jsonl + - path: teknium/GPT4-LLM-Cleaned type: alpaca - - path: data/vicuna_cleaned.jsonl - type: sharegpt - - path: data/gpt4-instruct-similarity-0.6-dataset.jsonl - type: gpteacher - - path: data/roleplay-similarity_0.6-instruct-dataset.jsonl - type: gpteacher dataset_prepared_path: last_run_prepared val_set_size: 0.05 adapter: lora lora_model_dir: -sequence_len: 2048 -lora_r: 8 +sequence_len: 512 +lora_r: 16 lora_alpha: 32 lora_dropout: 0.05 lora_target_modules: - query_key_value -# - xxx +lora_target_linear: lora_fan_in_fan_out: true # pythia/GPTNeoX lora specific -wandb_project: pythia-1.4b-lora +wandb_project: wandb_watch: wandb_run_id: wandb_log_model: -output_dir: ./lora-alpaca +output_dir: ./lora-alpaca-pythia gradient_accumulation_steps: 1 micro_batch_size: 4 -num_epochs: 5 +num_epochs: 3 learning_rate: 0.00001 train_on_inputs: false group_by_length: false @@ -39,3 +32,6 @@ tf32: True early_stopping_patience: resume_from_checkpoint: local_rank: +weight_decay: 0.1 +eval_steps: 20 +logging_steps: 1 From c530e4b9c877815b8f23a730014a10b81e206cdf Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 10:09:05 -0400 Subject: [PATCH 14/24] more config pruning and migrating --- configs/llama_7B_alpaca.yml | 41 --------- configs/sample.yml | 87 ------------------- examples/gptj-qlora/config.yml | 57 ++++++++++++ .../jeopardy-bot/config.yml | 25 +++--- 4 files changed, 68 insertions(+), 142 deletions(-) delete mode 100644 configs/llama_7B_alpaca.yml delete mode 100644 configs/sample.yml create mode 100644 examples/gptj-qlora/config.yml rename configs/llama_7B_jeopardy.yml => examples/jeopardy-bot/config.yml (75%) diff --git a/configs/llama_7B_alpaca.yml b/configs/llama_7B_alpaca.yml deleted file mode 100644 index 7db2f65aa..000000000 --- a/configs/llama_7B_alpaca.yml +++ /dev/null @@ -1,41 +0,0 @@ -base_model: huggyllama/llama-7b -model_type: LlamaForCausalLM -tokenizer_type: LlamaTokenizer -load_in_8bit: true -datasets: - - path: data/alpaca_data_gpt4.jsonl - type: alpaca - - path: data/vicuna_cleaned.jsonl - type: sharegpt - - path: data/gpt4-instruct-similarity-0.6-dataset.jsonl - type: gpteacher - - path: data/roleplay-similarity_0.6-instruct-dataset.jsonl - type: gpteacher -dataset_prepared_path: last_run_prepared -val_set_size: 0.04 -adapter: lora -lora_model_dir: -sequence_len: 2048 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -lora_fan_in_fan_out: false -wandb_project: llama-7b-lora -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./lora-llama-alpaca -gradient_accumulation_steps: 1 -micro_batch_size: 16 -num_epochs: 5 -learning_rate: 0.00003 -train_on_inputs: false -group_by_length: false -bf16: true -tf32: true -early_stopping_patience: -resume_from_checkpoint: -local_rank: diff --git a/configs/sample.yml b/configs/sample.yml deleted file mode 100644 index ddd95cb55..000000000 --- a/configs/sample.yml +++ /dev/null @@ -1,87 +0,0 @@ -# this is the huggingface model that contains *.pt, *.safetensors, or *.bin files -# this can also be a relative path to a model on disk -base_model: decapoda-research/llama-7b-hf-int4 -# you can specify an ignore pattern if the model repo contains more than 1 model type (*.pt, etc) -base_model_ignore_patterns: -# if the base_model repo on hf hub doesn't include configuration .json files, -# you can set that here, or leave this empty to default to base_model -base_model_config: decapoda-research/llama-7b-hf -# If you want to specify the type of model to load, AutoModelForCausalLM is a good choice too -model_type: AutoModelForCausalLM -# Corresponding tokenizer for the model AutoTokenizer is a good choice -tokenizer_type: AutoTokenizer -# whether you are training a 4-bit quantized model -load_4bit: true -# this will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer -load_in_8bit: true -# a list of one or more datasets to finetune the model with -datasets: - # this can be either a hf dataset, or relative path - - path: vicgalle/alpaca-gpt4 - # The type of prompt to use for training. [alpaca, sharegpt, gpteacher, oasst, reflection] - type: alpaca -# axolotl attempts to save the dataset as an arrow after packing the data together so -# subsequent training attempts load faster, relative path -dataset_prepared_path: data/last_run_prepared -# How much of the dataset to set aside as evaluation. 1 = 100%, 0.50 = 50%, etc -val_set_size: 0.04 -# if you want to use lora, leave blank to train all parameters in original model -adapter: lora -# if you already have a lora model trained that you want to load, put that here -lora_model_dir: -# the maximum length of an input to train with, this should typically be less than 2048 -# as most models have a token/context limit of 2048 -sequence_len: 2048 -# max sequence length to concatenate training samples together up to -# inspired by StackLLaMA. see https://huggingface.co/blog/stackllama#supervised-fine-tuning -max_packed_sequence_len: 1024 -# lora hyperparameters -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -# - k_proj -# - o_proj -lora_fan_in_fan_out: false -# wandb configuration if your're using it -wandb_project: -wandb_watch: -wandb_run_id: -wandb_log_model: -# where to save the finsihed model to -output_dir: ./completed-model -# training hyperparameters -gradient_accumulation_steps: 1 -batch_size: -micro_batch_size: 2 -num_epochs: 3 -warmup_steps: 100 -learning_rate: 0.00003 -# whether to mask out or include the human's prompt from the training labels -train_on_inputs: false -# don't use this, leads to wonky training (according to someone on the internet) -group_by_length: false -# Use CUDA bf16 -bf16: true -# Use CUDA tf32 -tf32: true -# does not work with current implementation of 4-bit LoRA -gradient_checkpointing: false -# stop training after this many evaluation losses have increased in a row -# https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback -early_stopping_patience: 3 -# specify a scheduler to use with the optimizer. only one_cycle is supported currently -lr_scheduler: -# whether to use xformers attention patch https://github.com/facebookresearch/xformers: -xformers_attention: -# whether to use flash attention patch https://github.com/HazyResearch/flash-attention: -flash_attention: -# resume from a specific checkpoint dir -resume_from_checkpoint: -# if resume_from_checkpoint isn't set and you simply want it to start where it left off -# be careful with this being turned on between different models -auto_resume_from_checkpoints: false -# don't mess with this, it's here for accelerate and torchrun -local_rank: diff --git a/examples/gptj-qlora/config.yml b/examples/gptj-qlora/config.yml new file mode 100644 index 000000000..858c14862 --- /dev/null +++ b/examples/gptj-qlora/config.yml @@ -0,0 +1,57 @@ +base_model: EleutherAI/gpt-j-6b +base_model_config: EleutherAI/gpt-j-6b +load_in_8bit: false +load_in_4bit: true +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0.01 +adapter: qlora +lora_model_dir: +sequence_len: 2048 +max_packed_sequence_len: +lora_r: 8 +lora_alpha: 32 +lora_dropout: 0.05 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: +wandb_project: +wandb_watch: +wandb_run_id: +wandb_log_model: +output_dir: ./qlora-out +gradient_accumulation_steps: 2 +micro_batch_size: 2 +num_epochs: 2 +optimizer: paged_adamw_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.0001 +train_on_inputs: false +group_by_length: true +bf16: true +fp16: false +tf32: true +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: true +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 10 +eval_steps: 20 +save_steps: +debug: +deepspeed: +weight_decay: 0.1 +fsdp: +fsdp_config: +special_tokens: + pad_token: "<|endoftext|>" diff --git a/configs/llama_7B_jeopardy.yml b/examples/jeopardy-bot/config.yml similarity index 75% rename from configs/llama_7B_jeopardy.yml rename to examples/jeopardy-bot/config.yml index 287d6d6ab..b803c6074 100644 --- a/configs/llama_7B_jeopardy.yml +++ b/examples/jeopardy-bot/config.yml @@ -7,30 +7,28 @@ datasets: - path: openaccess-ai-collective/jeopardy type: jeopardy dataset_prepared_path: last_run_prepared -val_set_size: 0.01 +val_set_size: 0.02 adapter: lora_model_dir: -sequence_len: 2048 -max_packed_sequence_len: 2048 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 +sequence_len: 512 +max_packed_sequence_len: +lora_r: +lora_alpha: +lora_dropout: lora_target_modules: - - q_proj - - v_proj lora_fan_in_fan_out: false -wandb_project: jeopardy-bot-7b +wandb_project: wandb_watch: wandb_run_id: wandb_log_model: output_dir: ./jeopardy-bot-7b -gradient_accumulation_steps: 2 +gradient_accumulation_steps: 1 micro_batch_size: 1 -num_epochs: 2 +num_epochs: 3 optimizer: adamw_bnb_8bit torchdistx_path: lr_scheduler: cosine -learning_rate: 0.0000002 +learning_rate: 0.00003 train_on_inputs: false group_by_length: false bf16: true @@ -48,11 +46,10 @@ eval_steps: 110 save_steps: 660 debug: deepspeed: -weight_decay: 0.0001 +weight_decay: 0.1 fsdp: fsdp_config: tokens: - pad_token: "[PAD]" bos_token: "" eos_token: "" unk_token: "" From effbbf6dd13b564dcbafbbf155557bb43f76359a Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 10:38:24 -0400 Subject: [PATCH 15/24] more pruning --- configs/cerebras_1_3B_alpaca.yml | 40 --------------------- configs/galactica_1_3B.yml | 41 ---------------------- configs/gpt_neox_20b.yml | 39 --------------------- configs/stability_3b.yml | 56 ----------------------------- examples/cerebras/qlora.yml | 60 ++++++++++++++++++++++++++++++++ 5 files changed, 60 insertions(+), 176 deletions(-) delete mode 100644 configs/cerebras_1_3B_alpaca.yml delete mode 100644 configs/galactica_1_3B.yml delete mode 100644 configs/gpt_neox_20b.yml delete mode 100644 configs/stability_3b.yml create mode 100644 examples/cerebras/qlora.yml diff --git a/configs/cerebras_1_3B_alpaca.yml b/configs/cerebras_1_3B_alpaca.yml deleted file mode 100644 index 958bf4c5a..000000000 --- a/configs/cerebras_1_3B_alpaca.yml +++ /dev/null @@ -1,40 +0,0 @@ -base_model: cerebras/Cerebras-GPT-1.3B -model_type: AutoModelForCausalLM -tokenizer_type: AutoTokenizer -load_in_8bit: true -datasets: - - path: data/alpaca_data_gpt4.jsonl - type: alpaca - - path: data/vicuna_cleaned.jsonl - type: sharegpt - - path: data/gpt4-instruct-similarity-0.6-dataset.jsonl - type: gpteacher - - path: data/roleplay-similarity_0.6-instruct-dataset.jsonl - type: gpteacher -dataset_prepared_path: last_run_prepared -val_set_size: 0.05 -adapter: lora -sequence_len: 2048 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - c_attn -lora_fan_in_fan_out: false -wandb_project: pythia-1.4b-lora -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./lora-alpaca -gradient_accumulation_steps: 1 -micro_batch_size: 4 -num_epochs: 5 -learning_rate: 0.0003 -train_on_inputs: false -group_by_length: false -bf16: True -tf32: True -gradient_checkpointing: -early_stopping_patience: -resume_from_checkpoint: -local_rank: diff --git a/configs/galactica_1_3B.yml b/configs/galactica_1_3B.yml deleted file mode 100644 index 2abb4c6b4..000000000 --- a/configs/galactica_1_3B.yml +++ /dev/null @@ -1,41 +0,0 @@ -base_model: facebook/galactica-1.3b -model_type: AutoModelForCausalLM -tokenizer_type: AutoTokenizer -load_in_8bit: false -datasets: - - path: tatsu-lab/alpaca - type: alpaca -dataset_prepared_path: last_run_prepared -val_set_size: 0.1 -adapter: -lora_model_dir: -sequence_len: 1024 -max_packed_sequence_len: 1024 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -lora_fan_in_fan_out: false -wandb_project: -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./lora-llama-alpaca -gradient_accumulation_steps: 1 -micro_batch_size: 16 -num_epochs: 3 -learning_rate: 0.00003 -train_on_inputs: false -group_by_length: false -bf16: false -tf32: false -early_stopping_patience: -resume_from_checkpoint: -local_rank: -tokens: - pad_token: "[PAD]" - bos_token: "" - eos_token: "" - unk_token: "" diff --git a/configs/gpt_neox_20b.yml b/configs/gpt_neox_20b.yml deleted file mode 100644 index 730afb72c..000000000 --- a/configs/gpt_neox_20b.yml +++ /dev/null @@ -1,39 +0,0 @@ -base_model: EleutherAI/gpt-neox-20b -base_model_ignore_patterns: pytorch* # prefer safetensors -model_type: GPTNeoXForCausalLM -tokenizer_type: AutoTokenizer -load_in_8bit: true -datasets: - - path: nomic-ai/gpt4all-j-prompt-generations - type: alpaca - shards: 4 - shards_index: 0 -dataset_prepared_path: last_run_prepared -val_set_size: 0.05 -adapter: lora -lora_model_dir: -sequence_len: 2048 -max_packed_sequence_len: 2048 -lora_r: 8 -lora_alpha: 32 -lora_dropout: 0.05 -lora_target_modules: - - query_key_value -lora_fan_in_fan_out: true # pythia/GPTNeoX lora specific -wandb_project: gpt4all-neox-20b -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./gpt4all-neox-20b -gradient_accumulation_steps: 1 -micro_batch_size: 4 -num_epochs: 5 -learning_rate: 0.00003 -lr_scheduler: one_cycle -train_on_inputs: false -group_by_length: false -bf16: True -tf32: True -early_stopping_patience: -resume_from_checkpoint: -local_rank: diff --git a/configs/stability_3b.yml b/configs/stability_3b.yml deleted file mode 100644 index 83516a20a..000000000 --- a/configs/stability_3b.yml +++ /dev/null @@ -1,56 +0,0 @@ -base_model: stabilityai/stablelm-base-alpha-3b -base_model_config: stabilityai/stablelm-base-alpha-3b -load_in_8bit: false -datasets: - - path: vicgalle/alpaca-gpt4 - type: alpaca -dataset_prepared_path: last_run_prepared -val_set_size: 0.04 -adapter: -lora_model_dir: -sequence_len: 4096 -max_packed_sequence_len: 4096 -lora_r: 8 -lora_alpha: 16 -lora_dropout: 0.05 -lora_target_modules: - - q_proj - - v_proj -lora_fan_in_fan_out: false -wandb_project: stable-alpaca-3b -wandb_watch: -wandb_run_id: -wandb_log_model: -output_dir: ./stable-alpaca-3b -gradient_accumulation_steps: 1 -micro_batch_size: 1 -num_epochs: 1 -optimizer: adamw_bnb_8bit -torchdistx_path: -lr_scheduler: cosine -learning_rate: 0.0000002 -train_on_inputs: false -group_by_length: false -bf16: true -tf32: true -early_stopping_patience: -resume_from_checkpoint: -local_rank: -logging_steps: 1 -xformers_attention: true -flash_attention: -gptq_groupsize: -gptq_model_v1: -warmup_steps: 100 -eval_steps: 50 -save_steps: 200 -debug: -deepspeed: -weight_decay: 0.01 -fsdp: -fsdp_config: -#tokens: -# pad_token: "[PAD]" -# bos_token: "" -# eos_token: "" -# unk_token: "" diff --git a/examples/cerebras/qlora.yml b/examples/cerebras/qlora.yml new file mode 100644 index 000000000..9340299b9 --- /dev/null +++ b/examples/cerebras/qlora.yml @@ -0,0 +1,60 @@ +base_model: cerebras/Cerebras-GPT-1.3B +base_model_config: cerebras/Cerebras-GPT-1.3B +load_in_8bit: false +load_in_4bit: true +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0.01 +adapter: qlora +lora_model_dir: +sequence_len: 2048 +max_packed_sequence_len: 2048 +lora_r: 16 +lora_alpha: 32 +lora_dropout: 0.05 +lora_target_modules: + - c_fc + - c_attn + - c_proj +lora_target_linear: +lora_fan_in_fan_out: +wandb_project: +wandb_watch: +wandb_run_id: +wandb_log_model: +output_dir: ./qlora-out +batch_size: 4 +micro_batch_size: 4 +num_epochs: 2 +optimizer: paged_adamw_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.0002 +train_on_inputs: false +group_by_length: true +bf16: true +fp16: false +tf32: true +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: true +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 10 +eval_steps: 20 +save_steps: +debug: +deepspeed: +weight_decay: 0.1 +fsdp: +fsdp_config: +special_tokens: + pad_token: "<|endoftext|>" From a43bae9ff06d927083f60e76caaae9338d43b613 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 10:44:03 -0400 Subject: [PATCH 16/24] update the support matrix --- README.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 180d97932..1c1e1f65a 100644 --- a/README.md +++ b/README.md @@ -16,13 +16,14 @@ ## Axolotl supports -| | fp16/fp32 | fp16/fp32 w/ lora | qlora | 4bit-quant | 4bit-quant w/flash attention | flash attention | xformers attention | -|---------|:----------|:------------------|------|------------|------------------------------|-----------------|--------------------| -| llama | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Pythia | ✅ | ✅ | ❓ | ❌ | ❌ | ❌ | ❓ | -| cerebras | ✅ | ✅ | ❓ | ❌ | ❌ | ❌ | ❓ | -| mpt | ✅ | ❌ | ❓ | ❌ | ❌ | ❌ | ❓ | -| falcon | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❓ | +| | fp16/fp32 | fp16/fp32 w/ lora | qlora | 4bit-quant | 4bit-quant w/flash attention | flash attention | xformers attention | +|----------|:----------|:------------------|------|------------|------------------------------|----------------|-----------------| +| llama | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| Pythia | ✅ | ✅ | ❓ | ❌ | ❌ | ❌ | ❓ | +| cerebras | ✅ | ✅ | ❓ | ❌ | ❌ | ❌ | ✅ | +| mpt | ✅ | ❌ | ❓ | ❌ | ❌ | ❌ | ❓ | +| falcon | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | +| gpt-j | ✅ | ✅ | ✅ | ❌ | ❌ | ❓ | ✅ | ## Quickstart ⚡ From 280832cec2e8c32aecfa525a161b6932ee4078f9 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 10:52:36 -0400 Subject: [PATCH 17/24] more matrix updates --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1c1e1f65a..0edeff447 100644 --- a/README.md +++ b/README.md @@ -17,13 +17,13 @@ ## Axolotl supports | | fp16/fp32 | fp16/fp32 w/ lora | qlora | 4bit-quant | 4bit-quant w/flash attention | flash attention | xformers attention | -|----------|:----------|:------------------|------|------------|------------------------------|----------------|-----------------| -| llama | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Pythia | ✅ | ✅ | ❓ | ❌ | ❌ | ❌ | ❓ | -| cerebras | ✅ | ✅ | ❓ | ❌ | ❌ | ❌ | ✅ | -| mpt | ✅ | ❌ | ❓ | ❌ | ❌ | ❌ | ❓ | -| falcon | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | -| gpt-j | ✅ | ✅ | ✅ | ❌ | ❌ | ❓ | ✅ | +|----------|:----------|:------------------|-------|------------|------------------------------|-----------------|--------------------| +| llama | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| Pythia | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❓ | +| cerebras | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | +| mpt | ✅ | ❌ | ❓ | ❌ | ❌ | ❌ | ❓ | +| falcon | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | +| gpt-j | ✅ | ✅ | ✅ | ❌ | ❌ | ❓ | ✅ | ## Quickstart ⚡ From a6ebf57e827ff1d9c41238bf606ce7c3f7338f98 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 10:55:32 -0400 Subject: [PATCH 18/24] fix table formatting --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 0edeff447..8d201e739 100644 --- a/README.md +++ b/README.md @@ -16,14 +16,14 @@ ## Axolotl supports -| | fp16/fp32 | fp16/fp32 w/ lora | qlora | 4bit-quant | 4bit-quant w/flash attention | flash attention | xformers attention | -|----------|:----------|:------------------|-------|------------|------------------------------|-----------------|--------------------| -| llama | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Pythia | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❓ | -| cerebras | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | -| mpt | ✅ | ❌ | ❓ | ❌ | ❌ | ❌ | ❓ | -| falcon | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | -| gpt-j | ✅ | ✅ | ✅ | ❌ | ❌ | ❓ | ✅ | +| | fp16/fp32 | fp16/fp32 w/ lora | qlora | gptq | gptq w/ lora | gptq w/flash attention | flash attention | xformers attention | +|----------|:----------|:------------------|-------|------|:-------------|------------------------|-----------------|--------------------| +| llama | ✅ | ✅ | ✅ | ✅ | ❓ | ✅ | ✅ | ✅ | +| Pythia | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ❓ | +| cerebras | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ✅ | +| mpt | ✅ | ❌ | ❓ | ❌ | ❓ | ❌ | ❌ | ❓ | +| falcon | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ✅ | +| gpt-j | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❓ | ✅ | ## Quickstart ⚡ From d0d7eaa4f347c9fc6ba267d1966b83ea6f048a96 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 11:03:31 -0400 Subject: [PATCH 19/24] update openllama and clean up paths --- README.md | 16 ++++++++-------- .../{gptj-qlora/config.yml => gptj/qlora.yml} | 0 examples/openllama-3b/README.md | 16 ++++++++++++++++ .../config.yml => openllama-3b/lora.yml} | 4 ++-- .../config.yml => openllama-3b/qlora.yml} | 4 ++-- examples/qlora-openllama-3b/README.md | 6 ------ 6 files changed, 28 insertions(+), 18 deletions(-) rename examples/{gptj-qlora/config.yml => gptj/qlora.yml} (100%) create mode 100644 examples/openllama-3b/README.md rename examples/{lora-openllama-3b/config.yml => openllama-3b/lora.yml} (90%) rename examples/{qlora-openllama-3b/config.yml => openllama-3b/qlora.yml} (90%) delete mode 100644 examples/qlora-openllama-3b/README.md diff --git a/README.md b/README.md index 8d201e739..a31eee5fb 100644 --- a/README.md +++ b/README.md @@ -16,14 +16,14 @@ ## Axolotl supports -| | fp16/fp32 | fp16/fp32 w/ lora | qlora | gptq | gptq w/ lora | gptq w/flash attention | flash attention | xformers attention | -|----------|:----------|:------------------|-------|------|:-------------|------------------------|-----------------|--------------------| -| llama | ✅ | ✅ | ✅ | ✅ | ❓ | ✅ | ✅ | ✅ | -| Pythia | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ❓ | -| cerebras | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ✅ | -| mpt | ✅ | ❌ | ❓ | ❌ | ❓ | ❌ | ❌ | ❓ | -| falcon | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ✅ | -| gpt-j | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❓ | ✅ | +| | fp16/fp32 | lora | qlora | gptq | gptq w/ lora | gptq w/flash attn | flash attn | xformers attn | +|----------|:----------|:-----|-------|------|:-------------|-------------------|------------|---------------| +| llama | ✅ | ✅ | ✅ | ✅ | ❓ | ✅ | ✅ | ✅ | +| Pythia | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ❓ | +| cerebras | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ✅ | +| mpt | ✅ | ❌ | ❓ | ❌ | ❓ | ❌ | ❌ | ❓ | +| falcon | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ✅ | +| gpt-j | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❓ | ✅ | ## Quickstart ⚡ diff --git a/examples/gptj-qlora/config.yml b/examples/gptj/qlora.yml similarity index 100% rename from examples/gptj-qlora/config.yml rename to examples/gptj/qlora.yml diff --git a/examples/openllama-3b/README.md b/examples/openllama-3b/README.md new file mode 100644 index 000000000..9e8f3a9e8 --- /dev/null +++ b/examples/openllama-3b/README.md @@ -0,0 +1,16 @@ +# openllama-3b + +Basic full tune +```shell +accelerate launch scripts/finetune.py examples/qlora-openllama-3b/config.yml +``` + +LoRA +```shell +accelerate launch scripts/finetune.py examples/qlora-openllama-3b/lora.yml +``` + +QLoRA +```shell +accelerate launch scripts/finetune.py examples/qlora-openllama-3b/qlora.yml +``` diff --git a/examples/lora-openllama-3b/config.yml b/examples/openllama-3b/lora.yml similarity index 90% rename from examples/lora-openllama-3b/config.yml rename to examples/openllama-3b/lora.yml index 2e1644546..98e2c2adc 100644 --- a/examples/lora-openllama-3b/config.yml +++ b/examples/openllama-3b/lora.yml @@ -1,5 +1,5 @@ -base_model: openlm-research/open_llama_3b_600bt_preview -base_model_config: openlm-research/open_llama_3b_600bt_preview +base_model: openlm-research/open_llama_3b +base_model_config: openlm-research/open_llama_3b model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer load_in_8bit: true diff --git a/examples/qlora-openllama-3b/config.yml b/examples/openllama-3b/qlora.yml similarity index 90% rename from examples/qlora-openllama-3b/config.yml rename to examples/openllama-3b/qlora.yml index 87e1dfd94..83ae31f91 100644 --- a/examples/qlora-openllama-3b/config.yml +++ b/examples/openllama-3b/qlora.yml @@ -1,5 +1,5 @@ -base_model: openlm-research/open_llama_3b_600bt_preview -base_model_config: openlm-research/open_llama_3b_600bt_preview +base_model: openlm-research/open_llama_3b +base_model_config: openlm-research/open_llama_3b model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer load_in_8bit: false diff --git a/examples/qlora-openllama-3b/README.md b/examples/qlora-openllama-3b/README.md deleted file mode 100644 index d79ea7f3f..000000000 --- a/examples/qlora-openllama-3b/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# qlora-openllama-3b - -```shell -accelerate launch scripts/finetune.py examples/qlora-openllama-3b/config.yml - -``` From 336aa3fd487a8c35b7637fbbddac81a67d078a41 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 11:05:29 -0400 Subject: [PATCH 20/24] gptq lora llama is obviously good --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a31eee5fb..349dd370a 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ | | fp16/fp32 | lora | qlora | gptq | gptq w/ lora | gptq w/flash attn | flash attn | xformers attn | |----------|:----------|:-----|-------|------|:-------------|-------------------|------------|---------------| -| llama | ✅ | ✅ | ✅ | ✅ | ❓ | ✅ | ✅ | ✅ | +| llama | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Pythia | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ❓ | | cerebras | ✅ | ✅ | ✅ | ❌ | ❓ | ❌ | ❌ | ✅ | | mpt | ✅ | ❌ | ❓ | ❌ | ❓ | ❌ | ❌ | ❓ | From 6b3f509d9e14e58369a7c4322de78a46a90924ae Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 11 Jun 2023 11:50:12 -0400 Subject: [PATCH 21/24] forgot to add this file --- examples/openllama-3b/config.yml | 61 ++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 examples/openllama-3b/config.yml diff --git a/examples/openllama-3b/config.yml b/examples/openllama-3b/config.yml new file mode 100644 index 000000000..248b740ff --- /dev/null +++ b/examples/openllama-3b/config.yml @@ -0,0 +1,61 @@ +base_model: openlm-research/open_llama_3b +base_model_config: openlm-research/open_llama_3b +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer +load_in_8bit: false +load_in_4bit: false +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0.02 +adapter: +lora_model_dir: +sequence_len: 256 +max_packed_sequence_len: +lora_r: +lora_alpha: +lora_dropout: +lora_target_modules: +lora_target_linear: +lora_fan_in_fan_out: +wandb_project: +wandb_watch: +wandb_run_id: +wandb_log_model: +output_dir: ./lora-out +batch_size: 16 +micro_batch_size: 4 +num_epochs: 3 +optimizer: adamw_bnb_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.0002 +train_on_inputs: false +group_by_length: false +bf16: false +fp16: true +tf32: false +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 10 +eval_steps: 50 +save_steps: +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" From 4cd1deeef2203c628025439356df463d3174569c Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Mon, 12 Jun 2023 02:44:46 +0900 Subject: [PATCH 22/24] Add save_steps and eval_steps to Readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 349dd370a..8ba63299e 100644 --- a/README.md +++ b/README.md @@ -382,6 +382,8 @@ num_epochs: 3 warmup_steps: 100 learning_rate: 0.00003 logging_steps: +save_steps: +eval_steps: # whether to mask out or include the human's prompt from the training labels train_on_inputs: false From 9a58e99e812f1e4074da02ff56529b4986563931 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Mon, 12 Jun 2023 01:52:58 -0400 Subject: [PATCH 23/24] config fixes --- examples/falcon/config-7b-lora.yml | 2 +- examples/falcon/config-7b.yml | 2 +- examples/openllama-3b/config.yml | 4 ++-- examples/openllama-3b/lora.yml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/falcon/config-7b-lora.yml b/examples/falcon/config-7b-lora.yml index 090cc6bcf..8aa585851 100644 --- a/examples/falcon/config-7b-lora.yml +++ b/examples/falcon/config-7b-lora.yml @@ -23,7 +23,7 @@ lora_dropout: 0.0 lora_target_modules: lora_target_linear: true lora_fan_in_fan_out: -wandb_project: falcon-7b +wandb_project: wandb_watch: wandb_run_id: wandb_log_model: diff --git a/examples/falcon/config-7b.yml b/examples/falcon/config-7b.yml index dc67d6125..b267566ce 100644 --- a/examples/falcon/config-7b.yml +++ b/examples/falcon/config-7b.yml @@ -23,7 +23,7 @@ lora_dropout: 0.0 lora_target_modules: lora_target_linear: true lora_fan_in_fan_out: -wandb_project: falcon-7b +wandb_project: wandb_watch: wandb_run_id: wandb_log_model: diff --git a/examples/openllama-3b/config.yml b/examples/openllama-3b/config.yml index 248b740ff..6fd704ffc 100644 --- a/examples/openllama-3b/config.yml +++ b/examples/openllama-3b/config.yml @@ -25,7 +25,7 @@ wandb_project: wandb_watch: wandb_run_id: wandb_log_model: -output_dir: ./lora-out +output_dir: ./openllama-out batch_size: 16 micro_batch_size: 4 num_epochs: 3 @@ -43,7 +43,7 @@ early_stopping_patience: resume_from_checkpoint: local_rank: logging_steps: 1 -xformers_attention: +xformers_attention: true flash_attention: gptq_groupsize: gptq_model_v1: diff --git a/examples/openllama-3b/lora.yml b/examples/openllama-3b/lora.yml index 98e2c2adc..d1f252455 100644 --- a/examples/openllama-3b/lora.yml +++ b/examples/openllama-3b/lora.yml @@ -49,7 +49,7 @@ early_stopping_patience: resume_from_checkpoint: local_rank: logging_steps: 1 -xformers_attention: +xformers_attention: true flash_attention: gptq_groupsize: gptq_model_v1: From 52cde69288d5616ba8e7fd7a262b1247523eba38 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Mon, 12 Jun 2023 17:06:15 +0900 Subject: [PATCH 24/24] Fix config path after config moved --- README.md | 4 ++-- examples/gptq-lora-7b/README.md | 2 +- examples/openllama-3b/README.md | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 8ba63299e..7ebea8678 100644 --- a/README.md +++ b/README.md @@ -39,10 +39,10 @@ pip3 install -U git+https://github.com/huggingface/peft.git accelerate config # finetune lora -accelerate launch scripts/finetune.py examples/lora-openllama-3b/config.yml +accelerate launch scripts/finetune.py examples/openllama-3b/lora.yml # inference -accelerate launch scripts/finetune.py examples/lora-openllama-3b/config.yml \ +accelerate launch scripts/finetune.py examples/openllama-3b/lora.yml \ --inference --lora_model_dir="./lora-out" ``` diff --git a/examples/gptq-lora-7b/README.md b/examples/gptq-lora-7b/README.md index eefe98d3f..0bde51b06 100644 --- a/examples/gptq-lora-7b/README.md +++ b/examples/gptq-lora-7b/README.md @@ -3,6 +3,6 @@ This is a good place to start for beginners. This will run on an NVIDIA RTX4090 with no other changes needed. ```shell -accelerate launch scripts/finetune.py examples/4bit-lora-7b/config.yml +accelerate launch scripts/finetune.py examples/gptq-lora-7b/config.yml ``` diff --git a/examples/openllama-3b/README.md b/examples/openllama-3b/README.md index 9e8f3a9e8..3e9501a54 100644 --- a/examples/openllama-3b/README.md +++ b/examples/openllama-3b/README.md @@ -2,15 +2,15 @@ Basic full tune ```shell -accelerate launch scripts/finetune.py examples/qlora-openllama-3b/config.yml +accelerate launch scripts/finetune.py examples/openllama-3b/config.yml ``` LoRA ```shell -accelerate launch scripts/finetune.py examples/qlora-openllama-3b/lora.yml +accelerate launch scripts/finetune.py examples/openllama-3b/lora.yml ``` QLoRA ```shell -accelerate launch scripts/finetune.py examples/qlora-openllama-3b/qlora.yml +accelerate launch scripts/finetune.py examples/openllama-3b/qlora.yml ```