From 85522184917cd701863147aebcc5619efbf9bf06 Mon Sep 17 00:00:00 2001
From: NanoCode012 <kevinvong@rocketmail.com>
Date: Thu, 25 May 2023 17:04:44 +0900
Subject: [PATCH 01/11] Improve Inference instruction

---
 README.md | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index 969708d47..531270dcc 100644
--- a/README.md
+++ b/README.md
@@ -317,12 +317,16 @@ accelerate launch scripts/finetune.py configs/your_config.yml
 
 ### Inference
 
-Add `--inference` flag to train command above
+Pass the appropriate flag to the train command:
 
-If you are inferencing a pretrained LORA, pass 
-```bash
---lora_model_dir ./completed-model
-```
+- Pretrained LORA:
+  ```bash
+  --inference --lora_model_dir ./completed-model
+  ```
+- Full weights finetune:
+  ```bash
+  --inference --base_model ./completed-model
+  ```
 
 ### Merge LORA to base
 

From 9083910036c0d7daf56e8f7a1f8309694f6704ff Mon Sep 17 00:00:00 2001
From: NanoCode012 <kevinvong@rocketmail.com>
Date: Thu, 25 May 2023 17:26:39 +0900
Subject: [PATCH 02/11] Update lora config

---
 README.md | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 531270dcc..c51d7101c 100644
--- a/README.md
+++ b/README.md
@@ -134,7 +134,7 @@ See sample configs in [configs](configs) folder or [examples](examples) for quic
 
 - lora
   ```yaml
-  adapter: lora # blank for full finetune
+  adapter: lora # qlora or leave blank for full finetune
   lora_r: 8
   lora_alpha: 16
   lora_dropout: 0.05
@@ -185,6 +185,8 @@ datasets:
   # The type of prompt to use for training. [alpaca, sharegpt, gpteacher, oasst, reflection]
     type: alpaca
     data_files: # path to source data files
+    shards: # true if use subset data. make sure to set `shards` param also
+shards: # number of shards to split dataset into
 
 # axolotl attempts to save the dataset as an arrow after packing the data together so
 # subsequent training attempts load faster, relative path
@@ -201,7 +203,7 @@ sequence_len: 2048
 # inspired by StackLLaMA. see https://huggingface.co/blog/stackllama#supervised-fine-tuning
 max_packed_sequence_len: 1024
 
-# if you want to use lora, leave blank to train all parameters in original model
+# if you want to use 'lora' or 'qlora' or leave blank to train all parameters in original model
 adapter: lora
 # if you already have a lora model trained that you want to load, put that here
 # lora hyperparameters
@@ -293,6 +295,9 @@ torchdistx_path:
 
 # Debug mode
 debug:
+
+# Seed
+seed:
 ```
 
 </details>

From 5b712afbe4e30ddabe4b9d1b219a40c1c331c44f Mon Sep 17 00:00:00 2001
From: NanoCode012 <kevinvong@rocketmail.com>
Date: Thu, 25 May 2023 17:28:03 +0900
Subject: [PATCH 03/11] Update bf16 options

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c51d7101c..f02ae20a0 100644
--- a/README.md
+++ b/README.md
@@ -172,7 +172,7 @@ gptq_model_v1: false # v1 or v2
 load_in_8bit: true
 
 # Use CUDA bf16
-bf16: true
+bf16: true # bool or 'full' for `bf16_full_eval`
 # Use CUDA fp16
 fp16: true
 # Use CUDA tf32

From 05c18340d69fd546c881f8e2968e5a8775e22a95 Mon Sep 17 00:00:00 2001
From: NanoCode012 <kevinvong@rocketmail.com>
Date: Thu, 25 May 2023 17:32:03 +0900
Subject: [PATCH 04/11] Update scheduler configs

---
 README.md | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index f02ae20a0..1beceda7a 100644
--- a/README.md
+++ b/README.md
@@ -254,8 +254,18 @@ gradient_checkpointing: false
 # stop training after this many evaluation losses have increased in a row
 # https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback
 early_stopping_patience: 3
-# specify a scheduler to use with the optimizer. only one_cycle is supported currently
-lr_scheduler:
+
+# specify a scheduler and kwargs to use with the optimizer
+lr_scheduler: # 'one_cycle' | 'log_sweep' | empty for cosine
+lr_scheduler_kwargs:
+
+# for one_cycle optim
+lr_div_factor: # learning rate div factor
+
+# for log_sweep optim
+log_sweep_min_lr:
+log_sweep_max_lr:
+
 # specify optimizer
 optimizer:
 # specify weight decay

From 29273b5a5b42f5efa9290d8c3987697e8b7375f7 Mon Sep 17 00:00:00 2001
From: NanoCode012 <kevinvong@rocketmail.com>
Date: Thu, 25 May 2023 17:34:43 +0900
Subject: [PATCH 05/11] Add other minor configs

---
 README.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1beceda7a..407e03ed3 100644
--- a/README.md
+++ b/README.md
@@ -226,6 +226,7 @@ lora_out_dir:
 lora_fan_in_fan_out: false
 
 # wandb configuration if you're using it
+wandb_mode:
 wandb_project:
 wandb_watch:
 wandb_run_id:
@@ -300,9 +301,12 @@ fsdp_config:
 # Deepspeed
 deepspeed:
 
-# TODO
+# Path to torch distx for optim 'adamw_anyprecision'
 torchdistx_path:
 
+# Set padding for data collator to 'longest'
+collator_pad_to_longest:
+
 # Debug mode
 debug:
 

From 7bc28eb8a8bf8786001c80afc991a8d4d4145b9c Mon Sep 17 00:00:00 2001
From: NanoCode012 <kevinvong@rocketmail.com>
Date: Thu, 25 May 2023 17:43:37 +0900
Subject: [PATCH 06/11] Add more data formats

---
 README.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/README.md b/README.md
index 407e03ed3..1bab0722f 100644
--- a/README.md
+++ b/README.md
@@ -97,6 +97,17 @@ Have dataset(s) in one of the following format (JSONL recommended):
   ```json
   {"instruction": "...", "input": "...", "output": "...", "reflection": "...", "corrected": "..."}
   ```
+- `explainchoice`: question, choices, (solution OR explanation)
+  ```json
+  {"question": "...", "choices": ["..."], "solution": "...", "explanation": "..."}
+  ```
+- `concisechoice`: question, choices, (solution OR explanation)
+  ```json
+  {"question": "...", "choices": ["..."], "solution": "...", "explanation": "..."}
+- `summarizetldr`: article and summary
+  ```json
+  {"article": "...", "summary": "..."}
+  ```
 
 > Have some new format to propose? Check if it's already defined in [data.py](src/axolotl/utils/data.py) in `dev` branch!
 

From 2c34f8d0c769341ce6c0b4eb3fb4291f77fcf581 Mon Sep 17 00:00:00 2001
From: NanoCode012 <kevinvong@rocketmail.com>
Date: Thu, 25 May 2023 17:44:58 +0900
Subject: [PATCH 07/11] Update dataset type

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1bab0722f..1f90be3d6 100644
--- a/README.md
+++ b/README.md
@@ -194,7 +194,7 @@ datasets:
   # this can be either a hf dataset, or relative path
   - path: vicgalle/alpaca-gpt4
   # The type of prompt to use for training. [alpaca, sharegpt, gpteacher, oasst, reflection]
-    type: alpaca
+    type: alpaca # format OR format:prompt_style (chat/instruct)
     data_files: # path to source data files
     shards: # true if use subset data. make sure to set `shards` param also
 shards: # number of shards to split dataset into

From 1377400c333d92769b0165eb1b3da850c9a71ad8 Mon Sep 17 00:00:00 2001
From: NanoCode012 <kevinvong@rocketmail.com>
Date: Thu, 25 May 2023 22:33:45 +0900
Subject: [PATCH 08/11] Add info on Runtime Error

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 1f90be3d6..e039fa639 100644
--- a/README.md
+++ b/README.md
@@ -375,6 +375,10 @@ Please reduce any below
   - `eval_batch_size`
   - `sequence_len`
 
+> RuntimeError: expected scalar type Float but found Half
+
+Try set `fp16: true`
+
 ## Contributing 🤝
 
 Bugs? Please check for open issue else create a new [Issue](https://github.com/OpenAccess-AI-Collective/axolotl/issues/new).

From e65c203e9e7b799c526c43d5ba87d94d26a09e14 Mon Sep 17 00:00:00 2001
From: NanoCode012 <kevinvong@rocketmail.com>
Date: Thu, 25 May 2023 22:45:58 +0900
Subject: [PATCH 09/11] Add more detail on minimum GPU

---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index e039fa639..5a95eb474 100644
--- a/README.md
+++ b/README.md
@@ -135,11 +135,11 @@ See sample configs in [configs](configs) folder or [examples](examples) for quic
 
 - loading
   ```yaml
-  load_4bit: true
   load_in_8bit: true
-  bf16: true
+  load_in_8bit: true
+  bf16: true # require >=ampere
   fp16: true
-  tf32: true
+  tf32: true # require >=ampere
   ```
   Note: Repo does not do 4-bit quantization.
 
@@ -183,11 +183,11 @@ gptq_model_v1: false # v1 or v2
 load_in_8bit: true
 
 # Use CUDA bf16
-bf16: true # bool or 'full' for `bf16_full_eval`
+bf16: true # bool or 'full' for `bf16_full_eval`. require >=ampere
 # Use CUDA fp16
 fp16: true
 # Use CUDA tf32
-tf32: true
+tf32: true # require >=ampere
 
 # a list of one or more datasets to finetune the model with
 datasets:
@@ -286,7 +286,7 @@ weight_decay:
 # whether to use xformers attention patch https://github.com/facebookresearch/xformers:
 xformers_attention:
 # whether to use flash attention patch https://github.com/HazyResearch/flash-attention:
-flash_attention:
+flash_attention:  # require a100 for llama
 
 # resume from a specific checkpoint dir
 resume_from_checkpoint:

From f92245dbd65141ba1d0a5f9c2cf12504107da789 Mon Sep 17 00:00:00 2001
From: NanoCode012 <kevinvong@rocketmail.com>
Date: Thu, 25 May 2023 23:04:33 +0900
Subject: [PATCH 10/11] Fix missing closing code block

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 5a95eb474..110fec95d 100644
--- a/README.md
+++ b/README.md
@@ -104,6 +104,7 @@ Have dataset(s) in one of the following format (JSONL recommended):
 - `concisechoice`: question, choices, (solution OR explanation)
   ```json
   {"question": "...", "choices": ["..."], "solution": "...", "explanation": "..."}
+  ```
 - `summarizetldr`: article and summary
   ```json
   {"article": "...", "summary": "..."}

From a9e502ef45f29773986eda92eabb601477b12eb2 Mon Sep 17 00:00:00 2001
From: NanoCode012 <kevinvong@rocketmail.com>
Date: Thu, 25 May 2023 23:48:18 +0900
Subject: [PATCH 11/11] Update 4bit notes

---
 README.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 110fec95d..79e583c71 100644
--- a/README.md
+++ b/README.md
@@ -136,7 +136,7 @@ See sample configs in [configs](configs) folder or [examples](examples) for quic
 
 - loading
   ```yaml
-  load_in_8bit: true
+  load_in_4bit: true
   load_in_8bit: true
   bf16: true # require >=ampere
   fp16: true
@@ -175,13 +175,15 @@ tokenizer_type: AutoTokenizer
 # Trust remote code for untrusted source
 trust_remote_code:
 
-# whether you are training a 4-bit quantized model
+# whether you are training a 4-bit GPTQ quantized model
 load_4bit: true
 gptq_groupsize: 128 # group size
 gptq_model_v1: false # v1 or v2
 
 # this will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
 load_in_8bit: true
+# use bitsandbytes 4 bit
+load_in_4bit:
 
 # Use CUDA bf16
 bf16: true # bool or 'full' for `bf16_full_eval`. require >=ampere