Merge pull request #32 from NanoCode012/patch-2
Feat: Set `half` using `cfg.fp16` for 4bit
This commit is contained in:
@@ -112,6 +112,7 @@ def load_model(
|
|||||||
base_model_config if base_model_config else base_model,
|
base_model_config if base_model_config else base_model,
|
||||||
model_path,
|
model_path,
|
||||||
device_map=cfg.device_map,
|
device_map=cfg.device_map,
|
||||||
|
half=cfg.fp16,
|
||||||
groupsize=cfg.gptq_groupsize if cfg.gptq_groupsize else -1,
|
groupsize=cfg.gptq_groupsize if cfg.gptq_groupsize else -1,
|
||||||
is_v1_model=cfg.gptq_model_v1
|
is_v1_model=cfg.gptq_model_v1
|
||||||
if cfg.gptq_model_v1 is not None
|
if cfg.gptq_model_v1 is not None
|
||||||
|
|||||||
Reference in New Issue
Block a user