From 85522184917cd701863147aebcc5619efbf9bf06 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 25 May 2023 17:04:44 +0900 Subject: [PATCH 01/11] Improve Inference instruction --- README.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 969708d47..531270dcc 100644 --- a/README.md +++ b/README.md @@ -317,12 +317,16 @@ accelerate launch scripts/finetune.py configs/your_config.yml ### Inference -Add `--inference` flag to train command above +Pass the appropriate flag to the train command: -If you are inferencing a pretrained LORA, pass -```bash ---lora_model_dir ./completed-model -``` +- Pretrained LORA: + ```bash + --inference --lora_model_dir ./completed-model + ``` +- Full weights finetune: + ```bash + --inference --base_model ./completed-model + ``` ### Merge LORA to base From 9083910036c0d7daf56e8f7a1f8309694f6704ff Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 25 May 2023 17:26:39 +0900 Subject: [PATCH 02/11] Update lora config --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 531270dcc..c51d7101c 100644 --- a/README.md +++ b/README.md @@ -134,7 +134,7 @@ See sample configs in [configs](configs) folder or [examples](examples) for quic - lora ```yaml - adapter: lora # blank for full finetune + adapter: lora # qlora or leave blank for full finetune lora_r: 8 lora_alpha: 16 lora_dropout: 0.05 @@ -185,6 +185,8 @@ datasets: # The type of prompt to use for training. [alpaca, sharegpt, gpteacher, oasst, reflection] type: alpaca data_files: # path to source data files + shards: # true if use subset data. make sure to set `shards` param also +shards: # number of shards to split dataset into # axolotl attempts to save the dataset as an arrow after packing the data together so # subsequent training attempts load faster, relative path @@ -201,7 +203,7 @@ sequence_len: 2048 # inspired by StackLLaMA. see https://huggingface.co/blog/stackllama#supervised-fine-tuning max_packed_sequence_len: 1024 -# if you want to use lora, leave blank to train all parameters in original model +# if you want to use 'lora' or 'qlora' or leave blank to train all parameters in original model adapter: lora # if you already have a lora model trained that you want to load, put that here # lora hyperparameters @@ -293,6 +295,9 @@ torchdistx_path: # Debug mode debug: + +# Seed +seed: ``` From 5b712afbe4e30ddabe4b9d1b219a40c1c331c44f Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 25 May 2023 17:28:03 +0900 Subject: [PATCH 03/11] Update bf16 options --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c51d7101c..f02ae20a0 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ gptq_model_v1: false # v1 or v2 load_in_8bit: true # Use CUDA bf16 -bf16: true +bf16: true # bool or 'full' for `bf16_full_eval` # Use CUDA fp16 fp16: true # Use CUDA tf32 From 05c18340d69fd546c881f8e2968e5a8775e22a95 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 25 May 2023 17:32:03 +0900 Subject: [PATCH 04/11] Update scheduler configs --- README.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f02ae20a0..1beceda7a 100644 --- a/README.md +++ b/README.md @@ -254,8 +254,18 @@ gradient_checkpointing: false # stop training after this many evaluation losses have increased in a row # https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback early_stopping_patience: 3 -# specify a scheduler to use with the optimizer. only one_cycle is supported currently -lr_scheduler: + +# specify a scheduler and kwargs to use with the optimizer +lr_scheduler: # 'one_cycle' | 'log_sweep' | empty for cosine +lr_scheduler_kwargs: + +# for one_cycle optim +lr_div_factor: # learning rate div factor + +# for log_sweep optim +log_sweep_min_lr: +log_sweep_max_lr: + # specify optimizer optimizer: # specify weight decay From 29273b5a5b42f5efa9290d8c3987697e8b7375f7 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 25 May 2023 17:34:43 +0900 Subject: [PATCH 05/11] Add other minor configs --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1beceda7a..407e03ed3 100644 --- a/README.md +++ b/README.md @@ -226,6 +226,7 @@ lora_out_dir: lora_fan_in_fan_out: false # wandb configuration if you're using it +wandb_mode: wandb_project: wandb_watch: wandb_run_id: @@ -300,9 +301,12 @@ fsdp_config: # Deepspeed deepspeed: -# TODO +# Path to torch distx for optim 'adamw_anyprecision' torchdistx_path: +# Set padding for data collator to 'longest' +collator_pad_to_longest: + # Debug mode debug: From 7bc28eb8a8bf8786001c80afc991a8d4d4145b9c Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 25 May 2023 17:43:37 +0900 Subject: [PATCH 06/11] Add more data formats --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index 407e03ed3..1bab0722f 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,17 @@ Have dataset(s) in one of the following format (JSONL recommended): ```json {"instruction": "...", "input": "...", "output": "...", "reflection": "...", "corrected": "..."} ``` +- `explainchoice`: question, choices, (solution OR explanation) + ```json + {"question": "...", "choices": ["..."], "solution": "...", "explanation": "..."} + ``` +- `concisechoice`: question, choices, (solution OR explanation) + ```json + {"question": "...", "choices": ["..."], "solution": "...", "explanation": "..."} +- `summarizetldr`: article and summary + ```json + {"article": "...", "summary": "..."} + ``` > Have some new format to propose? Check if it's already defined in [data.py](src/axolotl/utils/data.py) in `dev` branch! From 2c34f8d0c769341ce6c0b4eb3fb4291f77fcf581 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 25 May 2023 17:44:58 +0900 Subject: [PATCH 07/11] Update dataset type --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1bab0722f..1f90be3d6 100644 --- a/README.md +++ b/README.md @@ -194,7 +194,7 @@ datasets: # this can be either a hf dataset, or relative path - path: vicgalle/alpaca-gpt4 # The type of prompt to use for training. [alpaca, sharegpt, gpteacher, oasst, reflection] - type: alpaca + type: alpaca # format OR format:prompt_style (chat/instruct) data_files: # path to source data files shards: # true if use subset data. make sure to set `shards` param also shards: # number of shards to split dataset into From 1377400c333d92769b0165eb1b3da850c9a71ad8 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 25 May 2023 22:33:45 +0900 Subject: [PATCH 08/11] Add info on Runtime Error --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 1f90be3d6..e039fa639 100644 --- a/README.md +++ b/README.md @@ -375,6 +375,10 @@ Please reduce any below - `eval_batch_size` - `sequence_len` +> RuntimeError: expected scalar type Float but found Half + +Try set `fp16: true` + ## Contributing 🤝 Bugs? Please check for open issue else create a new [Issue](https://github.com/OpenAccess-AI-Collective/axolotl/issues/new). From e65c203e9e7b799c526c43d5ba87d94d26a09e14 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 25 May 2023 22:45:58 +0900 Subject: [PATCH 09/11] Add more detail on minimum GPU --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index e039fa639..5a95eb474 100644 --- a/README.md +++ b/README.md @@ -135,11 +135,11 @@ See sample configs in [configs](configs) folder or [examples](examples) for quic - loading ```yaml - load_4bit: true load_in_8bit: true - bf16: true + load_in_8bit: true + bf16: true # require >=ampere fp16: true - tf32: true + tf32: true # require >=ampere ``` Note: Repo does not do 4-bit quantization. @@ -183,11 +183,11 @@ gptq_model_v1: false # v1 or v2 load_in_8bit: true # Use CUDA bf16 -bf16: true # bool or 'full' for `bf16_full_eval` +bf16: true # bool or 'full' for `bf16_full_eval`. require >=ampere # Use CUDA fp16 fp16: true # Use CUDA tf32 -tf32: true +tf32: true # require >=ampere # a list of one or more datasets to finetune the model with datasets: @@ -286,7 +286,7 @@ weight_decay: # whether to use xformers attention patch https://github.com/facebookresearch/xformers: xformers_attention: # whether to use flash attention patch https://github.com/HazyResearch/flash-attention: -flash_attention: +flash_attention: # require a100 for llama # resume from a specific checkpoint dir resume_from_checkpoint: From f92245dbd65141ba1d0a5f9c2cf12504107da789 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 25 May 2023 23:04:33 +0900 Subject: [PATCH 10/11] Fix missing closing code block --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5a95eb474..110fec95d 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,7 @@ Have dataset(s) in one of the following format (JSONL recommended): - `concisechoice`: question, choices, (solution OR explanation) ```json {"question": "...", "choices": ["..."], "solution": "...", "explanation": "..."} + ``` - `summarizetldr`: article and summary ```json {"article": "...", "summary": "..."} From a9e502ef45f29773986eda92eabb601477b12eb2 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 25 May 2023 23:48:18 +0900 Subject: [PATCH 11/11] Update 4bit notes --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 110fec95d..79e583c71 100644 --- a/README.md +++ b/README.md @@ -136,7 +136,7 @@ See sample configs in [configs](configs) folder or [examples](examples) for quic - loading ```yaml - load_in_8bit: true + load_in_4bit: true load_in_8bit: true bf16: true # require >=ampere fp16: true @@ -175,13 +175,15 @@ tokenizer_type: AutoTokenizer # Trust remote code for untrusted source trust_remote_code: -# whether you are training a 4-bit quantized model +# whether you are training a 4-bit GPTQ quantized model load_4bit: true gptq_groupsize: 128 # group size gptq_model_v1: false # v1 or v2 # this will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer load_in_8bit: true +# use bitsandbytes 4 bit +load_in_4bit: # Use CUDA bf16 bf16: true # bool or 'full' for `bf16_full_eval`. require >=ampere