diff --git a/scripts/finetune.py b/scripts/finetune.py index 57d2520d3..b25412e7f 100644 --- a/scripts/finetune.py +++ b/scripts/finetune.py @@ -10,11 +10,11 @@ from typing import Optional, List, Dict, Any, Union import fire import torch import yaml -from addict import Dict # add src to the pythonpath so we don't need to pip install this from axolotl.utils.tokenization import check_dataset_labels from axolotl.utils.validation import validate_config +from axolotl.utils.dict import DictDefault project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) src_dir = os.path.join(project_root, "src") @@ -83,7 +83,7 @@ def do_inference(cfg, model, tokenizer, prompter="AlpacaPrompter"): temperature=0.9, top_p=0.95, top_k=40, - return_dict_in_generate=True, + return_DictDefault_in_generate=True, output_attentions=False, output_hidden_states=False, output_scores=False, @@ -131,7 +131,7 @@ def train( # load the config from the yaml file with open(config, "r") as f: - cfg: Dict = Dict(yaml.load(f, Loader=yaml.Loader)) + cfg: DictDefault = DictDefault(yaml.load(f, Loader=yaml.Loader)) # if there are any options passed in the cli, if it is something that seems valid from the yaml, # then overwrite the value cfg_keys = cfg.keys() diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 6538086fb..80e2d2447 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -29,7 +29,7 @@ from axolotl.prompt_tokenizers import LLAMA_DEFAULT_PAD_TOKEN if TYPE_CHECKING: from peft import PeftModel, PeftConfig - from addict import Dict + from axolotl.utils.dict import DictDefault from transformers import PreTrainedTokenizer @@ -79,7 +79,7 @@ def load_model( adapter="lora", inference=False, ): - # type: (str, str, str, str, Dict, Optional[str], bool) -> Tuple[PreTrainedModel, PreTrainedTokenizer, Optional[PeftConfig]] + # type: (str, str, str, str, DictDefault, Optional[str], bool) -> Tuple[PreTrainedModel, PreTrainedTokenizer, Optional[PeftConfig]] # TODO refactor as a kwarg load_in_8bit = cfg.load_in_8bit @@ -184,9 +184,9 @@ def load_model( # # https://github.com/HazyResearch/flash-attention/blob/40a25c8ee7465cf547b929cfa2937034e37bfce9/tests/models/test_gpt_neox.py#L12 # # https://github.com/HazyResearch/flash-attention/tree/main/training#model-components # # add `**kwargs` to https://github.com/HazyResearch/flash-attention/blob/40a25c8ee7465cf547b929cfa2937034e37bfce9/flash_attn/models/gpt.py#L442 - # from flash_attn.utils.pretrained import state_dict_from_pretrained + # from flash_attn.utils.pretrained import state_DictDefault_from_pretrained # from flash_attn.models.gpt import GPTLMHeadModel - # from flash_attn.models.gpt_neox import remap_state_dict_hf_gpt_neox, gpt_neox_config_to_gpt2_config + # from flash_attn.models.gpt_neox import remap_state_DictDefault_hf_gpt_neox, gpt_neox_config_to_gpt2_config # from transformers import GPTNeoXConfig # config = gpt_neox_config_to_gpt2_config(GPTNeoXConfig.from_pretrained(base_model)) # config.use_flash_attn = True @@ -294,7 +294,7 @@ def load_model( def load_adapter(model, cfg, adapter): - # type: (PreTrainedModel, Dict, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]] + # type: (PreTrainedModel, DictDefault, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]] if adapter is None: return model, None @@ -307,7 +307,7 @@ def load_adapter(model, cfg, adapter): def load_llama_adapter(model, cfg): - # type: (PreTrainedModel, Dict) -> Tuple[PreTrainedModel, Optional[PeftConfig]] + # type: (PreTrainedModel, DictDefault) -> Tuple[PreTrainedModel, Optional[PeftConfig]] from peft import ( AdaptionPromptConfig, get_peft_model, @@ -355,7 +355,7 @@ def find_all_linear_names(bits, model): def load_lora(model, cfg): - # type: (PreTrainedModel, Dict) -> Tuple[PreTrainedModel, Optional[PeftConfig]] + # type: (PreTrainedModel, DictDefault) -> Tuple[PreTrainedModel, Optional[PeftConfig]] from peft import ( LoraConfig,