diff --git a/examples/phi/README.md b/examples/phi/README.md index 6e12eec18..1109db0b5 100644 --- a/examples/phi/README.md +++ b/examples/phi/README.md @@ -1,7 +1,11 @@ # Phi -Due to some nuances with the phi code, please use deepspeed when training phi. +Due to some nuances with the phi code, please use deepspeed when training phi for full finetune. ```shell -accelerate launch scripts/finetune.py examples/phi/phi-ft.yml --deepspeed deepspeed/zero1.json +accelerate launch -m axolotl.cli.train examples/phi/phi-ft.yml --deepspeed deepspeed/zero1.json + +# OR + +python -m axolotl.cli.train examples/phi/phi-qlora.yml ``` diff --git a/examples/phi/phi-qlora.yml b/examples/phi/phi-qlora.yml new file mode 100644 index 000000000..f3a06c39d --- /dev/null +++ b/examples/phi/phi-qlora.yml @@ -0,0 +1,75 @@ +base_model: microsoft/phi-1_5 +base_model_config: microsoft/phi-1_5 +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer +is_llama_derived_model: false +trust_remote_code: true + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: garage-bAInd/Open-Platypus + type: alpaca + +dataset_prepared_path: last_run_prepared +val_set_size: 0.05 +output_dir: ./phi-sft-out + +sequence_len: 1024 +sample_packing: false # not CURRENTLY compatible with LoRAs +pad_to_sequence_len: + +adapter: qlora +lora_model_dir: +lora_r: 64 +lora_alpha: 32 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_run_id: +wandb_log_model: + +gradient_accumulation_steps: 1 +micro_batch_size: 1 +num_epochs: 4 +optimizer: adamw_torch +adam_beta2: 0.95 +adam_epsilon: 0.00001 +max_grad_norm: 1.0 +lr_scheduler: cosine +learning_rate: 0.000003 + +train_on_inputs: false +group_by_length: true +bf16: true +fp16: false +tf32: true + +gradient_checkpointing: +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: + +warmup_steps: 100 +eval_steps: 0.05 +save_steps: +debug: +deepspeed: +weight_decay: 0.1 +fsdp: +fsdp_config: +resize_token_embeddings_to_32x: true +special_tokens: + bos_token: "<|endoftext|>" + eos_token: "<|endoftext|>" + unk_token: "<|endoftext|>" + pad_token: "<|endoftext|>" diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py index a31f34b73..dab6961a2 100644 --- a/src/axolotl/utils/config.py +++ b/src/axolotl/utils/config.py @@ -75,6 +75,7 @@ def normalize_config(cfg): cfg.torch_dtype = torch.float32 model_config = load_model_config(cfg) + cfg.model_config_type = model_config.model_type # figure out if the model is llama cfg.is_llama_derived_model = ( @@ -237,6 +238,21 @@ def validate_config(cfg): raise ValueError( "`early_stopping_patience` requires that eval_steps should evenly divide save_steps." ) + + if cfg.model_type == "MixFormerSequentialForCausalLM" and cfg.adapter is not None: + LOG.warning("Use AutoModelForCausalLM for phi/MixFormer models with qLoRA") + + if cfg.model_config_type == "mixformer-sequential": + if cfg.sample_packing: + if cfg.adapter is not None: + LOG.warning( + "phi/MixFormer models are not currently compatible with LoRA and sample_packing" + ) + if cfg.model_type == "AutoModelForCausalLM": + raise ValueError( + "`model_type: MixFormerSequentialForCausalLM` required for sample_packing" + ) + # TODO # MPT 7b # https://github.com/facebookresearch/bitsandbytes/issues/25 diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index ab5bbc267..9582205f9 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -1,6 +1,5 @@ """Module for models and model loading""" - - +import importlib import logging import math import os @@ -155,11 +154,26 @@ def load_model( LOG.info("patching _expand_mask") hijack_expand_mask() + model_config = load_model_config(cfg) + + # special handling b/c remote MixFormers code doesn't have _no_split_modules set + if ( + "MixFormerSequentialConfig" in model_config.__class__.__name__ + and cfg.model_type == "AutoModelForCausalLM" + ): + module_name = model_config.__class__.__module__.replace( + ".configuration_mixformer_sequential", ".modeling_mixformer_sequential" + ) + modeling_phi = importlib.import_module(module_name) + # pylint:disable=protected-access + modeling_phi.MixFormerSequentialForCausalLM._no_split_modules = [ + "ParallelBlock" + ] + model_kwargs = {} if cfg.model_revision: model_kwargs["revision"] = cfg.model_revision if cfg.gptq: - model_config = load_model_config(cfg) if not hasattr(model_config, "quantization_config"): LOG.warning("model config does not contain quantization_config information") else: