From 320aff18674e0929446b0b0de7b3b20bf2a467c5 Mon Sep 17 00:00:00 2001
From: Sunny Liu <sunny19981005@outlook.com>
Date: Mon, 21 Apr 2025 10:59:04 -0400
Subject: [PATCH] update config doc

---
 docs/config.qmd | 60 ++++++++++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 25 deletions(-)

diff --git a/docs/config.qmd b/docs/config.qmd
index 4ccfa3bc0..510a46f44 100644
--- a/docs/config.qmd
+++ b/docs/config.qmd
@@ -55,38 +55,48 @@ overrides_of_model_config:
 overrides_of_model_kwargs:
   # use_cache: False
 
-# optional overrides to the bnb 4bit quantization configuration
-# https://huggingface.co/docs/transformers/main/main_classes/quantization#transformers.BitsAndBytesConfig
-bnb_config_kwargs:
-  # These are default values
-  llm_int8_has_fp16_weight: false
-  bnb_4bit_quant_type: nf4
-  bnb_4bit_use_double_quant: true
 
-# Overrides quantization method to use HQQ instead of default bnb.
+
+# Quantization configuration.
 # See: https://huggingface.co/docs/transformers/main/en//quantization/hqq
 #      https://github.com/mobiusml/hqq
-use_hqq: true
-hqq_config:
-  - nbits: 4
+quantization:
+  backend: bnb | hqq | gptq
+  bits: 8
+  # optional overrides to the bnb 4bit quantization configuration
+  # https://huggingface.co/docs/transformers/main/main_classes/quantization#transformers.BitsAndBytesConfig
+  bnb_config_kwargs:
+    # These are default values
+    llm_int8_has_fp16_weight: false
+    bnb_4bit_quant_type: nf4
+    bnb_4bit_use_double_quant: true
+
+  # If using hqq config, additional config paramters are needed. See: https://huggingface.co/docs/transformers/main/en//quantization/hqq
+  hqq_config:
+    # pick one of the following, depending on if you want to uniformly quantize the whole model or
+    # apply different quantization settings to specific layers in the model:
+
+    # if uniformly quantize the whole model:
     group_size: 64
-    target_modules:
-      - self_attn.k_proj
-      - self_attn.v_proj
-      - self_attn.o_proj
-  - nbits: 3
-    group_size: 32
-    target_modules:
-      - mlp.gate_proj
-      - mlp.up_proj
-      - mlp.down_proj
-
+    # if we want to invoke dynamic_config in order to apply specific layers with different quantization settings:
+    - nbits: 4
+      group_size: 64
+      target_modules:
+        - self_attn.k_proj
+        - self_attn.v_proj
+        - self_attn.o_proj
+    - nbits: 3
+      group_size: 32
+      target_modules:
+        - mlp.gate_proj
+        - mlp.up_proj
+        - mlp.down_proj
 
+# (Internal Use Only)
 # Whether you are training a 4-bit GPTQ quantized model
-gptq: true
-
+gptq:
 # This will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
-load_in_8bit: true
+load_in_8bit:
 # Use bitsandbytes 4 bit
 load_in_4bit: