update test_models.py to conform to new quantization config

This commit is contained in:
Sunny Liu
2025-04-21 11:34:37 -04:00
parent ffd4ef1ece
commit 9be971d47c

View File

@@ -21,8 +21,10 @@ class TestModelsUtils:
"base_model": "JackFram/llama-68m",
"model_type": "LlamaForCausalLM",
"tokenizer_type": "LlamaTokenizer",
"load_in_8bit": True,
"load_in_4bit": False,
"quantization": {
"backend": "bnb",
"bits": 8,
},
"adapter": "lora",
"flash_attention": False,
"sample_packing": True,