load model faster w low_cpu_mem_usage

This commit is contained in:
Wing Lian
2023-09-08 02:06:12 -04:00
parent fb8ee37ca6
commit 9c52a83403

View File

@@ -340,6 +340,7 @@ def load_model(
base_model,
config=config,
trust_remote_code=cfg.trust_remote_code or False,
low_cpu_mem_usage=True,
).half()
model = tp.tensor_parallel(model, sharded=False)
else: