update config doc

2025-04-21 10:59:04 -04:00
parent ac24eba2ac
commit 320aff1867
1 changed files with 35 additions and 25 deletions
--- a/docs/config.qmd
+++ b/docs/config.qmd
@@ -55,19 +55,30 @@ overrides_of_model_config:
 overrides_of_model_kwargs:
  # use_cache: False
-# optional overrides to the bnb 4bit quantization configuration
+
-# https://huggingface.co/docs/transformers/main/main_classes/quantization#transformers.BitsAndBytesConfig
+
-bnb_config_kwargs:
+# Quantization configuration.
 # See: https://huggingface.co/docs/transformers/main/en//quantization/hqq
 #      https://github.com/mobiusml/hqq
 quantization:
  backend: bnb | hqq | gptq
  bits: 8
  # optional overrides to the bnb 4bit quantization configuration
  # https://huggingface.co/docs/transformers/main/main_classes/quantization#transformers.BitsAndBytesConfig
  bnb_config_kwargs:
    # These are default values
    llm_int8_has_fp16_weight: false
    bnb_4bit_quant_type: nf4
    bnb_4bit_use_double_quant: true
-# Overrides quantization method to use HQQ instead of default bnb.
+  # If using hqq config, additional config paramters are needed. See: https://huggingface.co/docs/transformers/main/en//quantization/hqq
-# See: https://huggingface.co/docs/transformers/main/en//quantization/hqq
+  hqq_config:
-#      https://github.com/mobiusml/hqq
+    # pick one of the following, depending on if you want to uniformly quantize the whole model or
-use_hqq: true
+    # apply different quantization settings to specific layers in the model:
-hqq_config:
+
    # if uniformly quantize the whole model:
    group_size: 64
    # if we want to invoke dynamic_config in order to apply specific layers with different quantization settings:
    - nbits: 4
      group_size: 64
      target_modules:
@@ -81,12 +92,11 @@ hqq_config:
        - mlp.up_proj
        - mlp.down_proj
-
+# (Internal Use Only)
 # Whether you are training a 4-bit GPTQ quantized model
-gptq: true
+gptq:
 # This will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
-load_in_8bit: true
+load_in_8bit:
 # Use bitsandbytes 4 bit
 load_in_4bit: