Merge pull request #48 from NanoCode012/feat/update-readme

Feat: Minor update readme from dev changes
2023-05-25 23:49:58 +09:00
parent 68f0c71424 a9e502ef45
commit 7ec105041d
1 changed files with 59 additions and 18 deletions
--- a/README.md
+++ b/README.md
@@ -97,6 +97,18 @@ Have dataset(s) in one of the following format (JSONL recommended):
  ```json
  {"instruction": "...", "input": "...", "output": "...", "reflection": "...", "corrected": "..."}
  ```
 - `explainchoice`: question, choices, (solution OR explanation)
  ```json
  {"question": "...", "choices": ["..."], "solution": "...", "explanation": "..."}
  ```
 - `concisechoice`: question, choices, (solution OR explanation)
  ```json
  {"question": "...", "choices": ["..."], "solution": "...", "explanation": "..."}
  ```
 - `summarizetldr`: article and summary
  ```json
  {"article": "...", "summary": "..."}
  ```
 > Have some new format to propose? Check if it's already defined in [data.py](src/axolotl/utils/data.py) in `dev` branch!
@@ -124,17 +136,17 @@ See sample configs in [configs](configs) folder or [examples](examples) for quic
 - loading
  ```yaml
-  load_4bit: true
+  load_in_4bit: true
  load_in_8bit: true
-  bf16: true
+  bf16: true # require >=ampere
  fp16: true
-  tf32: true
+  tf32: true # require >=ampere
  ```
  Note: Repo does not do 4-bit quantization.
 - lora
  ```yaml
-  adapter: lora # blank for full finetune
+  adapter: lora # qlora or leave blank for full finetune
  lora_r: 8
  lora_alpha: 16
  lora_dropout: 0.05
@@ -163,28 +175,32 @@ tokenizer_type: AutoTokenizer
 # Trust remote code for untrusted source
 trust_remote_code:
-# whether you are training a 4-bit quantized model
+# whether you are training a 4-bit GPTQ quantized model
 load_4bit: true
 gptq_groupsize: 128 # group size
 gptq_model_v1: false # v1 or v2
 # this will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
 load_in_8bit: true
 # use bitsandbytes 4 bit
 load_in_4bit:
 # Use CUDA bf16
-bf16: true
+bf16: true # bool or 'full' for `bf16_full_eval`. require >=ampere
 # Use CUDA fp16
 fp16: true
 # Use CUDA tf32
-tf32: true
+tf32: true # require >=ampere
 # a list of one or more datasets to finetune the model with
 datasets:
  # this can be either a hf dataset, or relative path
  - path: vicgalle/alpaca-gpt4
  # The type of prompt to use for training. [alpaca, sharegpt, gpteacher, oasst, reflection]
-    type: alpaca
+    type: alpaca # format OR format:prompt_style (chat/instruct)
    data_files: # path to source data files
    shards: # true if use subset data. make sure to set `shards` param also
 shards: # number of shards to split dataset into
 # axolotl attempts to save the dataset as an arrow after packing the data together so
 # subsequent training attempts load faster, relative path
@@ -201,7 +217,7 @@ sequence_len: 2048
 # inspired by StackLLaMA. see https://huggingface.co/blog/stackllama#supervised-fine-tuning
 max_packed_sequence_len: 1024
-# if you want to use lora, leave blank to train all parameters in original model
+# if you want to use 'lora' or 'qlora' or leave blank to train all parameters in original model
 adapter: lora
 # if you already have a lora model trained that you want to load, put that here
 # lora hyperparameters
@@ -224,6 +240,7 @@ lora_out_dir:
 lora_fan_in_fan_out: false
 # wandb configuration if you're using it
 wandb_mode:
 wandb_project:
 wandb_watch:
 wandb_run_id:
@@ -252,8 +269,18 @@ gradient_checkpointing: false
 # stop training after this many evaluation losses have increased in a row
 # https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback
 early_stopping_patience: 3
-# specify a scheduler to use with the optimizer. only one_cycle is supported currently
+
-lr_scheduler:
+# specify a scheduler and kwargs to use with the optimizer
 lr_scheduler: # 'one_cycle' | 'log_sweep' | empty for cosine
 lr_scheduler_kwargs:
 # for one_cycle optim
 lr_div_factor: # learning rate div factor
 # for log_sweep optim
 log_sweep_min_lr:
 log_sweep_max_lr:
 # specify optimizer
 optimizer:
 # specify weight decay
@@ -262,7 +289,7 @@ weight_decay:
 # whether to use xformers attention patch https://github.com/facebookresearch/xformers:
 xformers_attention:
 # whether to use flash attention patch https://github.com/HazyResearch/flash-attention:
-flash_attention:
+flash_attention:  # require a100 for llama
 # resume from a specific checkpoint dir
 resume_from_checkpoint:
@@ -288,11 +315,17 @@ fsdp_config:
 # Deepspeed
 deepspeed:
-# TODO
+# Path to torch distx for optim 'adamw_anyprecision'
 torchdistx_path:
 # Set padding for data collator to 'longest'
 collator_pad_to_longest:
 # Debug mode
 debug:
 # Seed
 seed:
 ```
 </details>
@@ -317,12 +350,16 @@ accelerate launch scripts/finetune.py configs/your_config.yml
 ### Inference
-Add `--inference` flag to train command above
+Pass the appropriate flag to the train command:
-If you are inferencing a pretrained LORA, pass 
+- Pretrained LORA:
-```bash
+  ```bash
--lora_model_dir ./completed-model
+  --inference --lora_model_dir ./completed-model
-```
+  ```
 - Full weights finetune:
  ```bash
  --inference --base_model ./completed-model
  ```
 ### Merge LORA to base
@@ -341,6 +378,10 @@ Please reduce any below
  - `eval_batch_size`
  - `sequence_len`
 > RuntimeError: expected scalar type Float but found Half
 Try set `fp16: true`
 ## Contributing 🤝
 Bugs? Please check for open issue else create a new [Issue](https://github.com/OpenAccess-AI-Collective/axolotl/issues/new).