diff --git a/requirements.txt b/requirements.txt index f32af373b..dc74b916f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,7 +25,7 @@ numpy>=1.24.4 # qlora things evaluate==0.4.1 scipy -scikit-learn==1.2.2 +scikit-learn==1.4.2 pynvml art fschat @ git+https://github.com/lm-sys/FastChat.git@27a05b04a35510afb1d767ae7e5990cbd278f8fe diff --git a/src/axolotl/cli/preprocess.py b/src/axolotl/cli/preprocess.py index e0dd7c2dc..e12462c00 100644 --- a/src/axolotl/cli/preprocess.py +++ b/src/axolotl/cli/preprocess.py @@ -82,7 +82,14 @@ def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs): # "copying from a non-meta parameter in the checkpoint to a meta parameter in the current model" warnings.simplefilter("ignore") with init_empty_weights(include_buffers=True): - AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) + # fmt: off + try: + AutoModelForCausalLM.from_pretrained( + model_name, trust_remote_code=True + ) + except Exception as exc: # pylint: disable=broad-exception-caught,unused-variable # nosec B110 # noqa F841 + pass + # fmt: on LOG.info( Fore.GREEN diff --git a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py index bc4c79a6c..309469cb4 100644 --- a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py +++ b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py @@ -344,6 +344,8 @@ class ModelInputConfig(BaseModel): ) trust_remote_code: Optional[bool] = None + model_kwargs: Optional[Dict[str, Any]] = None + @field_validator("trust_remote_code") @classmethod def hint_trust_remote_code(cls, trust_remote_code): @@ -637,6 +639,8 @@ class AxolotlInputConfig( flash_attn_fuse_mlp: Optional[bool] = None flash_optimum: Optional[bool] = None + eager_attention: Optional[bool] = None + unsloth_cross_entropy_loss: Optional[bool] = None unsloth_lora_mlp: Optional[bool] = None unsloth_lora_qkv: Optional[bool] = None @@ -1302,6 +1306,19 @@ class AxolotlConfigWCapabilities(AxolotlInputConfig): return data + @model_validator(mode="before") + @classmethod + def check_hopper_8bit_lora(cls, data): + is_sm_90: bool = ( + data["capabilities"] + and data["capabilities"].get("compute_capability") == "sm_90" + ) + if data.get("adapter") and data.get("load_in_8bit") and is_sm_90: + # see https://github.com/bitsandbytes-foundation/bitsandbytes/issues/538#issuecomment-2262945464 + raise ValueError("8-bit LoRA is not supported on Hopper GPUs") + + return data + @model_validator(mode="before") @classmethod def check_fsdp_deepspeed(cls, data): diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index 02234d8b7..26796f2e5 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -390,6 +390,14 @@ def calculate_total_num_steps(cfg, train_dataset, update=True): return total_num_steps +def setup_torch_compile_env(cfg): + if cfg.torch_compile: + if not cfg.torch_compile_backend: + os.environ["ACCELERATE_DYNAMO_BACKEND"] = "INDUCTOR" + else: + os.environ["ACCELERATE_DYNAMO_BACKEND"] = cfg.torch_compile_backend.upper() + + def setup_deepspeed_env(cfg, stage=None): os.environ["ACCELERATE_USE_DEEPSPEED"] = "true" os.environ["ACCELERATE_DEEPSPEED_CONFIG_FILE"] = cfg.deepspeed @@ -434,6 +442,8 @@ def prepare_optim_env(cfg): stage = deepspeed_config.get("zero_optimization", {}).get("stage", None) setup_deepspeed_env(cfg, stage=stage) + setup_torch_compile_env(cfg) + if (cfg.bf16 == "auto" and is_torch_bf16_gpu_available()) or cfg.bf16 is True: os.environ["ACCELERATE_MIXED_PRECISION"] = "bf16" elif cfg.fp16: