From 320aff18674e0929446b0b0de7b3b20bf2a467c5 Mon Sep 17 00:00:00 2001 From: Sunny Liu Date: Mon, 21 Apr 2025 10:59:04 -0400 Subject: [PATCH] update config doc --- docs/config.qmd | 60 ++++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/docs/config.qmd b/docs/config.qmd index 4ccfa3bc0..510a46f44 100644 --- a/docs/config.qmd +++ b/docs/config.qmd @@ -55,38 +55,48 @@ overrides_of_model_config: overrides_of_model_kwargs: # use_cache: False -# optional overrides to the bnb 4bit quantization configuration -# https://huggingface.co/docs/transformers/main/main_classes/quantization#transformers.BitsAndBytesConfig -bnb_config_kwargs: - # These are default values - llm_int8_has_fp16_weight: false - bnb_4bit_quant_type: nf4 - bnb_4bit_use_double_quant: true -# Overrides quantization method to use HQQ instead of default bnb. + +# Quantization configuration. # See: https://huggingface.co/docs/transformers/main/en//quantization/hqq # https://github.com/mobiusml/hqq -use_hqq: true -hqq_config: - - nbits: 4 +quantization: + backend: bnb | hqq | gptq + bits: 8 + # optional overrides to the bnb 4bit quantization configuration + # https://huggingface.co/docs/transformers/main/main_classes/quantization#transformers.BitsAndBytesConfig + bnb_config_kwargs: + # These are default values + llm_int8_has_fp16_weight: false + bnb_4bit_quant_type: nf4 + bnb_4bit_use_double_quant: true + + # If using hqq config, additional config paramters are needed. See: https://huggingface.co/docs/transformers/main/en//quantization/hqq + hqq_config: + # pick one of the following, depending on if you want to uniformly quantize the whole model or + # apply different quantization settings to specific layers in the model: + + # if uniformly quantize the whole model: group_size: 64 - target_modules: - - self_attn.k_proj - - self_attn.v_proj - - self_attn.o_proj - - nbits: 3 - group_size: 32 - target_modules: - - mlp.gate_proj - - mlp.up_proj - - mlp.down_proj - + # if we want to invoke dynamic_config in order to apply specific layers with different quantization settings: + - nbits: 4 + group_size: 64 + target_modules: + - self_attn.k_proj + - self_attn.v_proj + - self_attn.o_proj + - nbits: 3 + group_size: 32 + target_modules: + - mlp.gate_proj + - mlp.up_proj + - mlp.down_proj +# (Internal Use Only) # Whether you are training a 4-bit GPTQ quantized model -gptq: true - +gptq: # This will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer -load_in_8bit: true +load_in_8bit: # Use bitsandbytes 4 bit load_in_4bit: