From 8926fe9981bcf2fd868b4816fd5c0cc7ee528162 Mon Sep 17 00:00:00 2001 From: Sunny Liu Date: Fri, 18 Apr 2025 15:48:23 -0400 Subject: [PATCH] lax config requirement - qlora + hqq --- src/axolotl/utils/schemas/peft.py | 4 +- tests/e2e/test_quantization.py | 62 ++++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/src/axolotl/utils/schemas/peft.py b/src/axolotl/utils/schemas/peft.py index 5d408e1fe..c05cc2ce6 100644 --- a/src/axolotl/utils/schemas/peft.py +++ b/src/axolotl/utils/schemas/peft.py @@ -93,7 +93,7 @@ class LoraConfig(BaseModel): if self.gptq: raise ValueError("Can't merge qlora if gptq") - if self.load_in_4bit: + if self.load_in_4bit and not self.use_hqq: raise ValueError("Can't merge qlora if loaded in 4bit") else: @@ -103,7 +103,7 @@ class LoraConfig(BaseModel): if self.gptq: raise ValueError("Can't load qlora if gptq") - if not self.load_in_4bit: + if not self.load_in_4bit and not self.use_hqq: raise ValueError("Require cfg.load_in_4bit to be True for qlora") return self diff --git a/tests/e2e/test_quantization.py b/tests/e2e/test_quantization.py index f6dab452a..c777e9e4a 100644 --- a/tests/e2e/test_quantization.py +++ b/tests/e2e/test_quantization.py @@ -25,7 +25,7 @@ class TestHQQ(unittest.TestCase): Test cases for training of HQQ-quantized llama models""" @with_temp_dir - def test_hqq_qlora(self, temp_dir): + def test_hqq_lora(self, temp_dir): # pylint: disable=duplicate-code cfg = DictDefault( { @@ -37,7 +37,7 @@ class TestHQQ(unittest.TestCase): "hqq_config": [ { "nbits": 8, - "group_size": 32, + "group_size": 64, } ], "adapter": "lora", @@ -81,3 +81,61 @@ class TestHQQ(unittest.TestCase): check_tensorboard( temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high" ) + + @with_temp_dir + def test_hqq_qlora(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "HuggingFaceTB/SmolLM2-135M", + "sequence_len": 1024, + "sample_packing": True, + "flash_attention": True, + "use_hqq": True, + "hqq_config": [ + { + "nbits": 4, + "group_size": 64, + } + ], + "adapter": "qlora", + "lora_r": 16, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_target_linear": True, + "val_set_size": 0.0, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, + "datasets": [ + { + "path": "vicgalle/alpaca-gpt4", + "type": "alpaca", + }, + ], + "num_epochs": 1, + "micro_batch_size": 2, + "gradient_accumulation_steps": 2, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch_fused", + "lr_scheduler": "cosine", + "max_steps": 5, + "use_tensorboard": True, + } + ) + if is_torch_bf16_gpu_available(): + cfg.bf16 = True + else: + cfg.fp16 = True + + cfg = validate_config(cfg) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, dataset_meta=dataset_meta) + + check_tensorboard( + temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high" + )