update config doc
This commit is contained in:
@@ -55,38 +55,48 @@ overrides_of_model_config:
|
|||||||
overrides_of_model_kwargs:
|
overrides_of_model_kwargs:
|
||||||
# use_cache: False
|
# use_cache: False
|
||||||
|
|
||||||
# optional overrides to the bnb 4bit quantization configuration
|
|
||||||
# https://huggingface.co/docs/transformers/main/main_classes/quantization#transformers.BitsAndBytesConfig
|
|
||||||
bnb_config_kwargs:
|
|
||||||
# These are default values
|
|
||||||
llm_int8_has_fp16_weight: false
|
|
||||||
bnb_4bit_quant_type: nf4
|
|
||||||
bnb_4bit_use_double_quant: true
|
|
||||||
|
|
||||||
# Overrides quantization method to use HQQ instead of default bnb.
|
|
||||||
|
# Quantization configuration.
|
||||||
# See: https://huggingface.co/docs/transformers/main/en//quantization/hqq
|
# See: https://huggingface.co/docs/transformers/main/en//quantization/hqq
|
||||||
# https://github.com/mobiusml/hqq
|
# https://github.com/mobiusml/hqq
|
||||||
use_hqq: true
|
quantization:
|
||||||
hqq_config:
|
backend: bnb | hqq | gptq
|
||||||
- nbits: 4
|
bits: 8
|
||||||
|
# optional overrides to the bnb 4bit quantization configuration
|
||||||
|
# https://huggingface.co/docs/transformers/main/main_classes/quantization#transformers.BitsAndBytesConfig
|
||||||
|
bnb_config_kwargs:
|
||||||
|
# These are default values
|
||||||
|
llm_int8_has_fp16_weight: false
|
||||||
|
bnb_4bit_quant_type: nf4
|
||||||
|
bnb_4bit_use_double_quant: true
|
||||||
|
|
||||||
|
# If using hqq config, additional config paramters are needed. See: https://huggingface.co/docs/transformers/main/en//quantization/hqq
|
||||||
|
hqq_config:
|
||||||
|
# pick one of the following, depending on if you want to uniformly quantize the whole model or
|
||||||
|
# apply different quantization settings to specific layers in the model:
|
||||||
|
|
||||||
|
# if uniformly quantize the whole model:
|
||||||
group_size: 64
|
group_size: 64
|
||||||
target_modules:
|
# if we want to invoke dynamic_config in order to apply specific layers with different quantization settings:
|
||||||
- self_attn.k_proj
|
- nbits: 4
|
||||||
- self_attn.v_proj
|
group_size: 64
|
||||||
- self_attn.o_proj
|
target_modules:
|
||||||
- nbits: 3
|
- self_attn.k_proj
|
||||||
group_size: 32
|
- self_attn.v_proj
|
||||||
target_modules:
|
- self_attn.o_proj
|
||||||
- mlp.gate_proj
|
- nbits: 3
|
||||||
- mlp.up_proj
|
group_size: 32
|
||||||
- mlp.down_proj
|
target_modules:
|
||||||
|
- mlp.gate_proj
|
||||||
|
- mlp.up_proj
|
||||||
|
- mlp.down_proj
|
||||||
|
|
||||||
|
# (Internal Use Only)
|
||||||
# Whether you are training a 4-bit GPTQ quantized model
|
# Whether you are training a 4-bit GPTQ quantized model
|
||||||
gptq: true
|
gptq:
|
||||||
|
|
||||||
# This will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
|
# This will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
|
||||||
load_in_8bit: true
|
load_in_8bit:
|
||||||
# Use bitsandbytes 4 bit
|
# Use bitsandbytes 4 bit
|
||||||
load_in_4bit:
|
load_in_4bit:
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user