diff --git a/README.md b/README.md index 32817b709..6064d1b21 100644 --- a/README.md +++ b/README.md @@ -14,12 +14,13 @@ ## Axolotl supports -| | fp16/fp32 | fp16/fp32 w/ lora | 4bit-quant | 4bit-quant w/flash attention | flash attention | xformers attention | -|----------|:----------|:------------------|------------|------------------------------|-----------------|--------------------| -| llama | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Pythia | ✅ | ✅ | ❌ | ❌ | ❌ | ❓ | -| cerebras | ✅ | ✅ | ❌ | ❌ | ❌ | ❓ | -| mpt | ✅ | ❌ | ❌ | ❌ | ❌ | ❓ | +| | fp16/fp32 | fp16/fp32 w/ lora | qlora | 4bit-quant | 4bit-quant w/flash attention | flash attention | xformers attention | +|---------|:----------|:------------------|------|------------|------------------------------|-----------------|--------------------| +| llama | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| Pythia | ✅ | ✅ | ❓ | ❌ | ❌ | ❌ | ❓ | +| cerebras | ✅ | ✅ | ❓ | ❌ | ❌ | ❌ | ❓ | +| mpt | ✅ | ❌ | ❓ | ❌ | ❌ | ❌ | ❓ | +| falcon | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❓ | ## Quickstart ⚡ diff --git a/examples/falcon/config-7b-lora.yml b/examples/falcon/config-7b-lora.yml new file mode 100644 index 000000000..1291198cf --- /dev/null +++ b/examples/falcon/config-7b-lora.yml @@ -0,0 +1,64 @@ +base_model: tiiuae/falcon-7b +base_model_config: tiiuae/falcon-7b +trust_remote_code: true +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer +load_in_8bit: true +load_in_4bit: false +gptq: false +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca:chat +dataset_prepared_path: last_run_prepared +val_set_size: 0.01 +adapter: lora +lora_model_dir: +sequence_len: 2048 +max_packed_sequence_len: +lora_r: 16 +lora_alpha: 32 +lora_dropout: 0.0 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: +wandb_project: falcon-7b +wandb_watch: +wandb_run_id: +wandb_log_model: +output_dir: ./falcon-7b +batch_size: 2 +micro_batch_size: 1 +num_epochs: 4 +optimizer: adamw_bnb_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.00003 +train_on_inputs: false +group_by_length: false +bf16: true +fp16: false +tf32: true +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: true +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 40 +eval_steps: 5 +save_steps: 43 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + pad_token: "<|endoftext|>" + bos_token: ">>ABSTRACT<<" + eos_token: "<|endoftext|>" + diff --git a/examples/falcon/config-7b.yml b/examples/falcon/config-7b.yml new file mode 100644 index 000000000..787c4121c --- /dev/null +++ b/examples/falcon/config-7b.yml @@ -0,0 +1,64 @@ +base_model: tiiuae/falcon-7b +base_model_config: tiiuae/falcon-7b +trust_remote_code: true +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer +load_in_8bit: false +load_in_4bit: false +gptq: false +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca:chat +dataset_prepared_path: last_run_prepared +val_set_size: 0.01 +adapter: +lora_model_dir: +sequence_len: 2048 +max_packed_sequence_len: +lora_r: 64 +lora_alpha: 32 +lora_dropout: 0.0 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: +wandb_project: falcon-7b +wandb_watch: +wandb_run_id: +wandb_log_model: +output_dir: ./falcon-7b +batch_size: 2 +micro_batch_size: 1 +num_epochs: 4 +optimizer: adamw_bnb_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.00003 +train_on_inputs: false +group_by_length: false +bf16: true +fp16: false +tf32: true +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: true +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 40 +eval_steps: 5 +save_steps: 43 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + pad_token: "<|endoftext|>" + bos_token: ">>ABSTRACT<<" + eos_token: "<|endoftext|>" + diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 721584888..c7eddf1f9 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -364,7 +364,7 @@ def load_lora(model, cfg): PeftModel, ) - lora_target_modules = list(cfg.lora_target_modules) + lora_target_modules = list(cfg.lora_target_modules or []) if cfg.lora_target_linear: bits = None diff --git a/src/axolotl/utils/validation.py b/src/axolotl/utils/validation.py index d56f28f6d..f85cc37f7 100644 --- a/src/axolotl/utils/validation.py +++ b/src/axolotl/utils/validation.py @@ -8,12 +8,12 @@ def validate_config(cfg): if cfg.adapter == "qlora": if cfg.merge_lora: # can't merge qlora if loaded in 8bit or 4bit - assert cfg.load_in_8bit is False - assert cfg.gptq is False - assert cfg.load_in_4bit is False + assert cfg.load_in_8bit is not True + assert cfg.gptq is not True + assert cfg.load_in_4bit is not True else: - assert cfg.load_in_8bit is False - assert cfg.gptq is False + assert cfg.load_in_8bit is not True + assert cfg.gptq is not True assert cfg.load_in_4bit is True if not cfg.load_in_8bit and cfg.adapter == "lora":