fix so inference can be run against quantized models without adapters (#1834)

* fix so inference can be run against quantized models without adapters

* Update error msg [skip e2e]

Co-authored-by: NanoCode012 <nano@axolotl.ai>

---------

Co-authored-by: NanoCode012 <nano@axolotl.ai>
This commit is contained in:
Wing Lian
2024-12-03 00:02:38 -05:00
committed by GitHub
parent ff4794cd8e
commit b9bb02406a
2 changed files with 6 additions and 4 deletions

View File

@@ -19,7 +19,7 @@ from axolotl.common.cli import TrainerCliArgs
def do_cli(config: Path = Path("examples/"), gradio=False, **kwargs):
# pylint: disable=duplicate-code
print_axolotl_text_art()
parsed_cfg = load_cfg(config, **kwargs)
parsed_cfg = load_cfg(config, inference=True, **kwargs)
parsed_cfg.sample_packing = False
parser = transformers.HfArgumentParser((TrainerCliArgs))
parsed_cli_args, _ = parser.parse_args_into_dataclasses(

View File

@@ -323,11 +323,13 @@ class LoraConfig(BaseModel):
@model_validator(mode="before")
@classmethod
def validate_adapter(cls, data):
if not data.get("adapter") and (
data.get("load_in_8bit") or data.get("load_in_4bit")
if (
not data.get("adapter")
and not data.get("inference")
and (data.get("load_in_8bit") or data.get("load_in_4bit"))
):
raise ValueError(
"load_in_8bit and load_in_4bit are not supported without setting an adapter."
"load_in_8bit and load_in_4bit are not supported without setting an adapter for training."
"If you want to full finetune, please turn off load_in_8bit and load_in_4bit."
)
return data