add float16 docs and tweak typehints

This commit is contained in:
Wing Lian
2023-06-15 00:26:44 -04:00
parent 6f849809c5
commit 88e17ffc50
2 changed files with 13 additions and 3 deletions

View File

@@ -264,6 +264,8 @@ See sample configs in [configs](configs) folder or [examples](examples) for quic
bf16: true # require >=ampere bf16: true # require >=ampere
fp16: true fp16: true
tf32: true # require >=ampere tf32: true # require >=ampere
bfloat16: true # require >=ampere, use instead of bf16 when you don't want AMP
float16: true # use instead of fp16 when you don't want AMP
``` ```
Note: Repo does not do 4-bit quantization. Note: Repo does not do 4-bit quantization.
@@ -522,6 +524,12 @@ Add below flag to train command above
--merge_lora --lora_model_dir="./completed-model" --load_in_8bit=False --load_in_4bit=False --merge_lora --lora_model_dir="./completed-model" --load_in_8bit=False --load_in_4bit=False
``` ```
If you run out of CUDA memory, you can try to merge in system RAM with
```bash
CUDA_VISIBLE_DEVICES="" python3 scripts/finetune.py ...
```
## Common Errors 🧰 ## Common Errors 🧰
> Cuda out of memory > Cuda out of memory

View File

@@ -11,13 +11,14 @@ import bitsandbytes as bnb
import torch import torch
import transformers import transformers
from optimum.bettertransformer import BetterTransformer from optimum.bettertransformer import BetterTransformer
from transformers import PreTrainedModel # noqa: F401 from transformers import ( # noqa: F401
from transformers import (
AutoConfig, AutoConfig,
AutoModelForCausalLM, AutoModelForCausalLM,
AutoTokenizer, AutoTokenizer,
BitsAndBytesConfig, BitsAndBytesConfig,
LlamaConfig, LlamaConfig,
PreTrainedModel,
PreTrainedTokenizerBase,
) )
from axolotl.prompt_tokenizers import LLAMA_DEFAULT_PAD_TOKEN from axolotl.prompt_tokenizers import LLAMA_DEFAULT_PAD_TOKEN
@@ -71,7 +72,7 @@ def load_tokenizer(
def load_model( def load_model(
base_model, base_model_config, model_type, tokenizer, cfg, adapter="lora" base_model, base_model_config, model_type, tokenizer, cfg, adapter="lora"
): ):
# type: (str, str, str, AutoTokenizer, DictDefault, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]] # type: (str, str, str, PreTrainedTokenizerBase, DictDefault, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
""" """
Load a model from a base model and a model type. Load a model from a base model and a model type.
""" """
@@ -284,6 +285,7 @@ def load_model(
model = AutoModelForCausalLM.from_pretrained( model = AutoModelForCausalLM.from_pretrained(
base_model, base_model,
load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None, load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
load_in_4bit=cfg.load_in_4bit and cfg.adapter is not None,
torch_dtype=torch_dtype, torch_dtype=torch_dtype,
device_map=cfg.device_map, device_map=cfg.device_map,
trust_remote_code=cfg.trust_remote_code or False, trust_remote_code=cfg.trust_remote_code or False,