diff --git a/requirements.txt b/requirements.txt
index 1af103e17..27b31a139 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 peft @ git+https://github.com/huggingface/peft.git
 transformers @ git+https://github.com/huggingface/transformers.git
 bitsandbytes>=0.39.0
-attrdict
+addict
 fire
 PyYAML==6.0
 black
diff --git a/scripts/finetune.py b/scripts/finetune.py
index 8d7a18a4a..954ce1625 100644
--- a/scripts/finetune.py
+++ b/scripts/finetune.py
@@ -10,7 +10,7 @@ from typing import Optional, List, Dict, Any, Union
 import fire
 import torch
 import yaml
-from attrdict import AttrDefault
+from addict import Dict
 
 # add src to the pythonpath so we don't need to pip install this
 from axolotl.utils.tokenization import check_dataset_labels
@@ -131,7 +131,7 @@ def train(
 
     # load the config from the yaml file
     with open(config, "r") as f:
-        cfg: AttrDefault = AttrDefault(lambda: None, yaml.load(f, Loader=yaml.Loader))
+        cfg: Dict = Dict(lambda: None, yaml.load(f, Loader=yaml.Loader))
     # if there are any options passed in the cli, if it is something that seems valid from the yaml,
     # then overwrite the value
     cfg_keys = dict(cfg).keys()
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index fe9f18979..6538086fb 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -29,7 +29,7 @@ from axolotl.prompt_tokenizers import LLAMA_DEFAULT_PAD_TOKEN
 
 if TYPE_CHECKING:
     from peft import PeftModel, PeftConfig
-    from attrdict import AttrDefault
+    from addict import Dict
     from transformers import PreTrainedTokenizer
 
 
@@ -79,7 +79,7 @@ def load_model(
     adapter="lora",
     inference=False,
 ):
-    # type: (str, str, str, str, AttrDefault, Optional[str], bool) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
+    # type: (str, str, str, str, Dict, Optional[str], bool) -> Tuple[PreTrainedModel, PreTrainedTokenizer, Optional[PeftConfig]]
 
     # TODO refactor as a kwarg
     load_in_8bit = cfg.load_in_8bit
@@ -294,7 +294,7 @@ def load_model(
 
 
 def load_adapter(model, cfg, adapter):
-    # type: (PreTrainedModel, AttrDefault, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
+    # type: (PreTrainedModel, Dict, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
 
     if adapter is None:
         return model, None
@@ -307,7 +307,7 @@ def load_adapter(model, cfg, adapter):
 
 
 def load_llama_adapter(model, cfg):
-    # type: (PreTrainedModel, AttrDefault) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
+    # type: (PreTrainedModel, Dict) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
     from peft import (
         AdaptionPromptConfig,
         get_peft_model,
@@ -355,7 +355,7 @@ def find_all_linear_names(bits, model):
 
 
 def load_lora(model, cfg):
-    # type: (PreTrainedModel, AttrDefault) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
+    # type: (PreTrainedModel, Dict) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
 
     from peft import (
         LoraConfig,