fix so inference can be run against quantized models without adapters (#1834)
* fix so inference can be run against quantized models without adapters * Update error msg [skip e2e] Co-authored-by: NanoCode012 <nano@axolotl.ai> --------- Co-authored-by: NanoCode012 <nano@axolotl.ai>
This commit is contained in:
@@ -19,7 +19,7 @@ from axolotl.common.cli import TrainerCliArgs
|
|||||||
def do_cli(config: Path = Path("examples/"), gradio=False, **kwargs):
|
def do_cli(config: Path = Path("examples/"), gradio=False, **kwargs):
|
||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
print_axolotl_text_art()
|
print_axolotl_text_art()
|
||||||
parsed_cfg = load_cfg(config, **kwargs)
|
parsed_cfg = load_cfg(config, inference=True, **kwargs)
|
||||||
parsed_cfg.sample_packing = False
|
parsed_cfg.sample_packing = False
|
||||||
parser = transformers.HfArgumentParser((TrainerCliArgs))
|
parser = transformers.HfArgumentParser((TrainerCliArgs))
|
||||||
parsed_cli_args, _ = parser.parse_args_into_dataclasses(
|
parsed_cli_args, _ = parser.parse_args_into_dataclasses(
|
||||||
|
|||||||
@@ -323,11 +323,13 @@ class LoraConfig(BaseModel):
|
|||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def validate_adapter(cls, data):
|
def validate_adapter(cls, data):
|
||||||
if not data.get("adapter") and (
|
if (
|
||||||
data.get("load_in_8bit") or data.get("load_in_4bit")
|
not data.get("adapter")
|
||||||
|
and not data.get("inference")
|
||||||
|
and (data.get("load_in_8bit") or data.get("load_in_4bit"))
|
||||||
):
|
):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"load_in_8bit and load_in_4bit are not supported without setting an adapter."
|
"load_in_8bit and load_in_4bit are not supported without setting an adapter for training."
|
||||||
"If you want to full finetune, please turn off load_in_8bit and load_in_4bit."
|
"If you want to full finetune, please turn off load_in_8bit and load_in_4bit."
|
||||||
)
|
)
|
||||||
return data
|
return data
|
||||||
|
|||||||
Reference in New Issue
Block a user