update config doc

2025-04-21 10:59:04 -04:00
parent ac24eba2ac
commit 320aff1867
1 changed files with 35 additions and 25 deletions
--- a/docs/config.qmd
+++ b/docs/config.qmd
@@ -55,38 +55,48 @@ overrides_of_model_config:
 overrides_of_model_kwargs:
  # use_cache: False
 # optional overrides to the bnb 4bit quantization configuration
 # https://huggingface.co/docs/transformers/main/main_classes/quantization#transformers.BitsAndBytesConfig
 bnb_config_kwargs:
  # These are default values
  llm_int8_has_fp16_weight: false
  bnb_4bit_quant_type: nf4
  bnb_4bit_use_double_quant: true
-# Overrides quantization method to use HQQ instead of default bnb.
+
 # Quantization configuration.
 # See: https://huggingface.co/docs/transformers/main/en//quantization/hqq
 #      https://github.com/mobiusml/hqq
-use_hqq: true
+quantization:
-hqq_config:
+  backend: bnb | hqq | gptq
-  - nbits: 4
+  bits: 8
  # optional overrides to the bnb 4bit quantization configuration
  # https://huggingface.co/docs/transformers/main/main_classes/quantization#transformers.BitsAndBytesConfig
  bnb_config_kwargs:
    # These are default values
    llm_int8_has_fp16_weight: false
    bnb_4bit_quant_type: nf4
    bnb_4bit_use_double_quant: true
  # If using hqq config, additional config paramters are needed. See: https://huggingface.co/docs/transformers/main/en//quantization/hqq
  hqq_config:
    # pick one of the following, depending on if you want to uniformly quantize the whole model or
    # apply different quantization settings to specific layers in the model:
    # if uniformly quantize the whole model:
    group_size: 64
-    target_modules:
+    # if we want to invoke dynamic_config in order to apply specific layers with different quantization settings:
-      - self_attn.k_proj
+    - nbits: 4
-      - self_attn.v_proj
+      group_size: 64
-      - self_attn.o_proj
+      target_modules:
-  - nbits: 3
+        - self_attn.k_proj
-    group_size: 32
+        - self_attn.v_proj
-    target_modules:
+        - self_attn.o_proj
-      - mlp.gate_proj
+    - nbits: 3
-      - mlp.up_proj
+      group_size: 32
-      - mlp.down_proj
+      target_modules:
-
+        - mlp.gate_proj
        - mlp.up_proj
        - mlp.down_proj
 # (Internal Use Only)
 # Whether you are training a 4-bit GPTQ quantized model
-gptq: true
+gptq:
 # This will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
-load_in_8bit: true
+load_in_8bit:
 # Use bitsandbytes 4 bit
 load_in_4bit: