diff --git a/README.md b/README.md
index c70abf648..9afc37c43 100644
--- a/README.md
+++ b/README.md
@@ -96,7 +96,7 @@ accelerate launch -m axolotl.cli.train examples/openllama-3b/lora.yml
 
 # inference
 accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
-    --lora_model_dir="./lora-out"
+    --peft_model_dir="./lora-out"
 ```
 
 ## Installation
@@ -531,7 +531,7 @@ total_num_tokens:
 adapter: lora
 # If you already have a lora model trained that you want to load, put that here.
 # This means after training, if you want to test the model, you should set this to the value of `lora_out_dir`.
-lora_model_dir:
+peft_model_dir:
 
 # LoRA hyperparameters
 # For more details about the following options, see:
@@ -869,7 +869,7 @@ Pass the appropriate flag to the train command:
 
 - Pretrained LORA:
   ```bash
-  python -m axolotl.cli.inference examples/your_config.yml --lora_model_dir="./lora-output-dir"
+  python -m axolotl.cli.inference examples/your_config.yml --peft_model_dir="./lora-output-dir"
   ```
 - Full weights finetune:
   ```bash
@@ -890,7 +890,7 @@ Please use `--sample_packing False` if you have it on and receive the error simi
 Add below flag to train command above
 
 ```bash
-python3 -m axolotl.cli.merge_lora examples/your_config.yml --lora_model_dir="./completed-model" --load_in_8bit=False --load_in_4bit=False
+python3 -m axolotl.cli.merge_lora examples/your_config.yml --peft_model_dir="./completed-model" --load_in_8bit=False --load_in_4bit=False
 ```
 
 If you run out of CUDA memory, you can try to merge in system RAM with
diff --git a/examples/cerebras/btlm-ft.yml b/examples/cerebras/btlm-ft.yml
index 4fd34aa5f..6b419072f 100644
--- a/examples/cerebras/btlm-ft.yml
+++ b/examples/cerebras/btlm-ft.yml
@@ -18,7 +18,7 @@ dataset_prepared_path: last_prepared_run
 val_set_size: 0.01
 
 adapter:
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len:
 sample_packing: false
diff --git a/examples/cerebras/qlora.yml b/examples/cerebras/qlora.yml
index a13517f3e..bab3d04d5 100644
--- a/examples/cerebras/qlora.yml
+++ b/examples/cerebras/qlora.yml
@@ -10,7 +10,7 @@ datasets:
 dataset_prepared_path:
 val_set_size: 0.01
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len: 2048
 lora_r: 16
diff --git a/examples/code-llama/13b/lora.yml b/examples/code-llama/13b/lora.yml
index 91807846b..74aab34f9 100644
--- a/examples/code-llama/13b/lora.yml
+++ b/examples/code-llama/13b/lora.yml
@@ -20,7 +20,7 @@ sample_packing: true
 pad_to_sequence_len: true
 
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
diff --git a/examples/code-llama/13b/qlora.yml b/examples/code-llama/13b/qlora.yml
index 9fa05ffab..a6b941089 100644
--- a/examples/code-llama/13b/qlora.yml
+++ b/examples/code-llama/13b/qlora.yml
@@ -16,7 +16,7 @@ val_set_size: 0.01
 output_dir: ./qlora-out
 
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/code-llama/34b/lora.yml b/examples/code-llama/34b/lora.yml
index a342b6ebc..9623c9c2c 100644
--- a/examples/code-llama/34b/lora.yml
+++ b/examples/code-llama/34b/lora.yml
@@ -20,7 +20,7 @@ sample_packing: true
 pad_to_sequence_len: true
 
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
diff --git a/examples/code-llama/34b/qlora.yml b/examples/code-llama/34b/qlora.yml
index 1501dd9a3..6a980e407 100644
--- a/examples/code-llama/34b/qlora.yml
+++ b/examples/code-llama/34b/qlora.yml
@@ -16,7 +16,7 @@ val_set_size: 0.01
 output_dir: ./qlora-out
 
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/code-llama/7b/lora.yml b/examples/code-llama/7b/lora.yml
index 638dddc43..4546d737b 100644
--- a/examples/code-llama/7b/lora.yml
+++ b/examples/code-llama/7b/lora.yml
@@ -20,7 +20,7 @@ sample_packing: true
 pad_to_sequence_len: true
 
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
diff --git a/examples/code-llama/7b/qlora.yml b/examples/code-llama/7b/qlora.yml
index 5b3b33822..0b6d1c02d 100644
--- a/examples/code-llama/7b/qlora.yml
+++ b/examples/code-llama/7b/qlora.yml
@@ -16,7 +16,7 @@ val_set_size: 0.01
 output_dir: ./qlora-out
 
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/falcon/config-7b-lora.yml b/examples/falcon/config-7b-lora.yml
index f45deb643..79b7b62a3 100644
--- a/examples/falcon/config-7b-lora.yml
+++ b/examples/falcon/config-7b-lora.yml
@@ -15,7 +15,7 @@ datasets:
 dataset_prepared_path:
 val_set_size: 0.01
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len:
 lora_r: 16
diff --git a/examples/falcon/config-7b-qlora.yml b/examples/falcon/config-7b-qlora.yml
index f59341965..8fa914820 100644
--- a/examples/falcon/config-7b-qlora.yml
+++ b/examples/falcon/config-7b-qlora.yml
@@ -22,7 +22,7 @@ dataset_prepared_path:
 val_set_size: 0.01
 # enable QLoRA
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len:
 
diff --git a/examples/falcon/config-7b.yml b/examples/falcon/config-7b.yml
index 777a97b31..adaea40ed 100644
--- a/examples/falcon/config-7b.yml
+++ b/examples/falcon/config-7b.yml
@@ -15,7 +15,7 @@ datasets:
 dataset_prepared_path:
 val_set_size: 0.01
 adapter:
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len:
 lora_r: 64
diff --git a/examples/gptj/qlora.yml b/examples/gptj/qlora.yml
index 696747dfe..b1b575155 100644
--- a/examples/gptj/qlora.yml
+++ b/examples/gptj/qlora.yml
@@ -10,7 +10,7 @@ datasets:
 dataset_prepared_path:
 val_set_size: 0.01
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len:
 lora_r: 8
diff --git a/examples/jeopardy-bot/config.yml b/examples/jeopardy-bot/config.yml
index 32e7a34ee..946308d36 100644
--- a/examples/jeopardy-bot/config.yml
+++ b/examples/jeopardy-bot/config.yml
@@ -9,7 +9,7 @@ datasets:
 dataset_prepared_path:
 val_set_size: 0.02
 adapter:
-lora_model_dir:
+peft_model_dir:
 sequence_len: 512
 max_packed_sequence_len:
 lora_r:
diff --git a/examples/llama-2/gptq-lora.yml b/examples/llama-2/gptq-lora.yml
index 257433f26..48ac43a96 100644
--- a/examples/llama-2/gptq-lora.yml
+++ b/examples/llama-2/gptq-lora.yml
@@ -18,7 +18,7 @@ datasets:
 dataset_prepared_path:
 val_set_size: 0.01
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 4096
 sample_packing:
 lora_r: 8
diff --git a/examples/llama-2/ia3.yml b/examples/llama-2/ia3.yml
index a914a9179..48d81d5f0 100644
--- a/examples/llama-2/ia3.yml
+++ b/examples/llama-2/ia3.yml
@@ -20,7 +20,7 @@ sample_packing: true
 pad_to_sequence_len: true
 
 adapter: ia3
-ia3_model_dir:
+peft_model_dir:
 ia3_target_modules:
   - k_proj
   - v_proj
diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml
index 8c0e3e910..e196183ca 100644
--- a/examples/llama-2/lora.yml
+++ b/examples/llama-2/lora.yml
@@ -20,7 +20,7 @@ sample_packing: true
 pad_to_sequence_len: true
 
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml
index b8209934c..bdf40e1a8 100644
--- a/examples/llama-2/qlora.yml
+++ b/examples/llama-2/qlora.yml
@@ -16,7 +16,7 @@ val_set_size: 0.01
 output_dir: ./qlora-out
 
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/llama-2/relora.yml b/examples/llama-2/relora.yml
index 9f27cafea..be3b38efc 100644
--- a/examples/llama-2/relora.yml
+++ b/examples/llama-2/relora.yml
@@ -16,7 +16,7 @@ val_set_size: 0.01
 output_dir: ./relora-out
 
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/llama-2/tiny-llama.yml b/examples/llama-2/tiny-llama.yml
index 0b56ea7d3..3526333b1 100644
--- a/examples/llama-2/tiny-llama.yml
+++ b/examples/llama-2/tiny-llama.yml
@@ -20,7 +20,7 @@ sequence_len: 4096
 sample_packing: true
 
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
diff --git a/examples/mpt-7b/config.yml b/examples/mpt-7b/config.yml
index 8d9b429b1..b57a83cd6 100644
--- a/examples/mpt-7b/config.yml
+++ b/examples/mpt-7b/config.yml
@@ -9,7 +9,7 @@ datasets:
 dataset_prepared_path:
 val_set_size: 0.02
 adapter:
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len:
 lora_r: 8
diff --git a/examples/openllama-3b/config.yml b/examples/openllama-3b/config.yml
index dd11d53b0..1f4338ca3 100644
--- a/examples/openllama-3b/config.yml
+++ b/examples/openllama-3b/config.yml
@@ -12,7 +12,7 @@ datasets:
 dataset_prepared_path:
 val_set_size: 0.02
 adapter:
-lora_model_dir:
+peft_model_dir:
 sequence_len: 1024
 sample_packing: true
 lora_r:
diff --git a/examples/openllama-3b/lora.yml b/examples/openllama-3b/lora.yml
index fad3fb551..a59b5883f 100644
--- a/examples/openllama-3b/lora.yml
+++ b/examples/openllama-3b/lora.yml
@@ -12,7 +12,7 @@ datasets:
 dataset_prepared_path:
 val_set_size: 0.02
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 1024
 sample_packing: true
 lora_r: 8
diff --git a/examples/openllama-3b/qlora.yml b/examples/openllama-3b/qlora.yml
index 80d4d727b..9ebd774fb 100644
--- a/examples/openllama-3b/qlora.yml
+++ b/examples/openllama-3b/qlora.yml
@@ -12,7 +12,7 @@ datasets:
 dataset_prepared_path:
 val_set_size: 0.01
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 1024
 sample_packing: true
 lora_r: 8
diff --git a/examples/phi/phi-ft.yml b/examples/phi/phi-ft.yml
index 668eea317..30052252b 100644
--- a/examples/phi/phi-ft.yml
+++ b/examples/phi/phi-ft.yml
@@ -22,7 +22,7 @@ sample_packing: true
 pad_to_sequence_len:
 
 adapter:
-lora_model_dir:
+peft_model_dir:
 lora_r:
 lora_alpha:
 lora_dropout:
diff --git a/examples/phi/phi-qlora.yml b/examples/phi/phi-qlora.yml
index a548b3f05..d3116e207 100644
--- a/examples/phi/phi-qlora.yml
+++ b/examples/phi/phi-qlora.yml
@@ -22,7 +22,7 @@ sample_packing: false  # not CURRENTLY compatible with LoRAs
 pad_to_sequence_len:
 
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 lora_r: 64
 lora_alpha: 32
 lora_dropout: 0.05
diff --git a/examples/pythia-12b/config.yml b/examples/pythia-12b/config.yml
index 4e0e1523a..30040efb0 100644
--- a/examples/pythia-12b/config.yml
+++ b/examples/pythia-12b/config.yml
@@ -13,7 +13,7 @@ datasets:
 dataset_prepared_path:
 val_set_size: 0.05
 adapter:
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len: 2048
 lora_r: 64
diff --git a/examples/pythia/lora.yml b/examples/pythia/lora.yml
index 6ff036621..fe20da563 100644
--- a/examples/pythia/lora.yml
+++ b/examples/pythia/lora.yml
@@ -7,7 +7,7 @@ datasets:
 dataset_prepared_path:
 val_set_size: 0.05
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 512
 lora_r: 16
 lora_alpha: 32
diff --git a/examples/redpajama/config-3b.yml b/examples/redpajama/config-3b.yml
index 97f31c87a..3dc3a1674 100644
--- a/examples/redpajama/config-3b.yml
+++ b/examples/redpajama/config-3b.yml
@@ -10,7 +10,7 @@ datasets:
 dataset_prepared_path:
 val_set_size: 0.02
 adapter:
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len:
 lora_r: 8
diff --git a/examples/replit-3b/config-lora.yml b/examples/replit-3b/config-lora.yml
index d345e25a0..09174e6ba 100644
--- a/examples/replit-3b/config-lora.yml
+++ b/examples/replit-3b/config-lora.yml
@@ -8,7 +8,7 @@ datasets:
 dataset_prepared_path:
 val_set_size: 0.05
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len:
 lora_r: 8
diff --git a/examples/xgen-7b/xgen-7b-8k-qlora.yml b/examples/xgen-7b/xgen-7b-8k-qlora.yml
index 352dcb610..1fb61118d 100644
--- a/examples/xgen-7b/xgen-7b-8k-qlora.yml
+++ b/examples/xgen-7b/xgen-7b-8k-qlora.yml
@@ -20,7 +20,7 @@ dataset_prepared_path:
 val_set_size: 0.01
 # enable QLoRA
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 8192
 max_packed_sequence_len:
 
diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py
index 9503d838c..1c0a15d67 100644
--- a/src/axolotl/utils/config.py
+++ b/src/axolotl/utils/config.py
@@ -190,7 +190,10 @@ def validate_config(cfg):
                 raise ValueError("Require cfg.load_in_4bit to be True for qlora")
 
     if not cfg.load_in_8bit and cfg.adapter == "lora":
-        LOG.warning("We recommend setting `load_in_8bit: true` for LORA finetuning")
+        LOG.warning("We recommend setting `load_in_8bit: true` for LoRA finetuning")
+
+    if not cfg.load_in_8bit and cfg.adapter == "ia3":
+        LOG.warning("We recommend setting `load_in_8bit: true` for IA3 finetuning")
 
     if cfg.relora_steps:
         if cfg.adapter not in ("lora", "qlora"):
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index 5a11c1711..b99c95158 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -406,23 +406,21 @@ def load_model(
             if hasattr(module, "weight"):
                 module.to(torch.float32)
 
-    needs_fa2_dtype = cfg.adapter or cfg.fsdp
-    if (
-        (cfg.adapter == "lora" and cfg.load_in_8bit)
-        or (cfg.adapter == "qlora" and cfg.load_in_4bit)
-        or (cfg.adapter == "ia3" and cfg.load_in_8bit)
-    ):
+    require_peft: bool = False
+    if cfg.adapter in ["lora", "qlora", "ia3"]:
+        require_peft = True
+
+    if require_peft:
         LOG.info("converting PEFT model w/ prepare_model_for_kbit_training")
         if cfg.gradient_checkpointing:
             model.gradient_checkpointing_enable()
         model = prepare_model_for_kbit_training(
             model, use_gradient_checkpointing=cfg.gradient_checkpointing
         )
-        needs_fa2_dtype = True
 
     # LlamaRMSNorm layers are in fp32 after kbit_training or full finetune, so we need to
     # convert them back to fp16/bf16 for flash-attn compatibility.
-    if needs_fa2_dtype or (cfg.flash_attention and cfg.is_llama_derived_model):
+    if require_peft or cfg.fsdp or (cfg.flash_attention and cfg.is_llama_derived_model):
         LOG.info("converting modules to %s for flash attention", cfg.torch_dtype)
         for name, module in model.named_modules():
             if "norm" in name:
@@ -492,11 +490,11 @@ def load_llama_adapter(model, cfg):
         task_type="CAUSAL_LM",
     )
 
-    if cfg.lora_model_dir:
+    if cfg.peft_model_dir or cfg.lora_model_dir:
         LOG.debug("Loading pretained PEFT - llama_adapter")
         model = PeftModel.from_pretrained(
             model,
-            cfg.lora_model_dir,
+            cfg.peft_model_dir or cfg.lora_model_dir,
             torch_dtype=torch.float16,
         )
     else:
@@ -548,11 +546,11 @@ def load_lora(model, cfg, inference=False):
         task_type="CAUSAL_LM",
     )
 
-    if cfg.lora_model_dir:
+    if cfg.peft_model_dir:
         LOG.debug("Loading pretained PEFT - LoRA")
         model = PeftModel.from_pretrained(
             model,
-            cfg.lora_model_dir,
+            cfg.peft_model_dir,
             is_trainable=(not inference),
         )
     else:
@@ -581,11 +579,11 @@ def load_ia3(model, cfg, inference=False):
         **ia3_config_kwargs,
     )
 
-    if cfg.ia3_model_dir:
+    if cfg.peft_model_dir:
         LOG.debug("Loading pretained PEFT - IA3")
         model = PeftModel.from_pretrained(
             model,
-            cfg.ia3_model_dir,
+            cfg.peft_model_dir,
             is_trainable=(not inference),
         )
     else: