support to disable exllama for gptq (#604)

* support to disable exllama for gptq

* update property instead of item

* fix config key
This commit is contained in:
Wing Lian
2023-09-19 17:51:08 -04:00
committed by GitHub
parent aa656e04bd
commit faecff9798
2 changed files with 5 additions and 3 deletions

View File

@@ -196,6 +196,10 @@ def load_model(
if not hasattr(model_config, "quantization_config"):
LOG.warning("model config does not contain quantization_config information")
else:
if cfg.gptq_disable_exllama is not None:
model_config.quantization_config[
"disable_exllama"
] = cfg.gptq_disable_exllama
model_kwargs["quantization_config"] = GPTQConfig(
**model_config.quantization_config
)