diff --git a/docs/config.qmd b/docs/config.qmd index a67734498..4ccfa3bc0 100644 --- a/docs/config.qmd +++ b/docs/config.qmd @@ -63,6 +63,24 @@ bnb_config_kwargs: bnb_4bit_quant_type: nf4 bnb_4bit_use_double_quant: true +# Overrides quantization method to use HQQ instead of default bnb. +# See: https://huggingface.co/docs/transformers/main/en//quantization/hqq +# https://github.com/mobiusml/hqq +use_hqq: true +hqq_config: + - nbits: 4 + group_size: 64 + target_modules: + - self_attn.k_proj + - self_attn.v_proj + - self_attn.o_proj + - nbits: 3 + group_size: 32 + target_modules: + - mlp.gate_proj + - mlp.up_proj + - mlp.down_proj + # Whether you are training a 4-bit GPTQ quantized model gptq: true diff --git a/requirements.txt b/requirements.txt index 2827c2ca1..5c5a72b14 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,6 +22,7 @@ hf_xet==1.0.0 optimum==1.16.2 hf_transfer +hqq sentencepiece gradio==5.23.3 diff --git a/src/axolotl/utils/schemas/config.py b/src/axolotl/utils/schemas/config.py index b3625ee25..258382168 100644 --- a/src/axolotl/utils/schemas/config.py +++ b/src/axolotl/utils/schemas/config.py @@ -44,7 +44,7 @@ from axolotl.utils.schemas.model import ( ) from axolotl.utils.schemas.multimodal import MultiModalConfig from axolotl.utils.schemas.peft import LoraConfig, ReLoRAConfig -from axolotl.utils.schemas.quant import HQQConfig +from axolotl.utils.schemas.quant import QuantizationConfig from axolotl.utils.schemas.training import HyperparametersConfig from axolotl.utils.schemas.trl import TRLConfig from axolotl.utils.schemas.vllm import VllmConfig @@ -60,7 +60,7 @@ class AxolotlInputConfig( ModelOutputConfig, LoraConfig, ReLoRAConfig, - HQQConfig, + QuantizationConfig, HyperparametersConfig, WandbConfig, MLFlowConfig,