fix(dataset): normalize tokenizer config and change hash from tokenizer class to tokenizer path (#1298)

* fix(dataset): normalize tokenizer config and change hash from tokenizer class to tokenizer path

* fix: normalize config
This commit is contained in:
NanoCode012
2024-03-25 15:34:54 +09:00
committed by GitHub
parent 324d59ea0d
commit ff939d8a64
4 changed files with 13 additions and 4 deletions

View File

@@ -119,6 +119,10 @@ def normalize_config(cfg):
model_config = load_model_config(cfg)
cfg.model_config_type = model_config.model_type
cfg.tokenizer_config = (
cfg.tokenizer_config or cfg.base_model_config or cfg.base_model
)
# figure out if the model is llama
cfg.is_llama_derived_model = (
(hasattr(model_config, "model_type") and model_config.model_type == "llama")