fix(dataset): normalize tokenizer config and change hash from tokenizer class to tokenizer path (#1298)

* fix(dataset): normalize tokenizer config and change hash from tokenizer class to tokenizer path * fix: normalize config
2024-03-25 15:34:54 +09:00
parent 324d59ea0d
commit ff939d8a64
4 changed files with 13 additions and 4 deletions
--- a/tests/core/test_trainer_builder.py
+++ b/tests/core/test_trainer_builder.py
@@ -1,16 +1,18 @@
 """
 unit tests for axolotl.core.trainer_builder
 """
+
 import pytest

 from axolotl.core.trainer_builder import HFDPOTrainerBuilder
+from axolotl.utils.config import normalize_config
 from axolotl.utils.dict import DictDefault
 from axolotl.utils.models import load_model, load_tokenizer


@pytest.fixture(name="cfg")
 def fixture_cfg():
-    return DictDefault(
+    cfg = DictDefault(
        {
            "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6",
            "model_type": "AutoModelForCausalLM",
@@ -34,6 +36,10 @@ def fixture_cfg():
        }
    )

+    normalize_config(cfg)
+
+    return cfg
+

@pytest.fixture(name="tokenizer")
 def fixture_tokenizer(cfg):