From 0f985e12fed918e00919320bfd19d72fbe0bfcd1 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 28 Feb 2024 12:57:45 -0500 Subject: [PATCH] more fixes 20240228 (#1342) [skip ci] * add missing evals_per_epoch setting * more pydantic fixes * more fixes * move test from normalization to validation * increase eval size for sample packing tests --- src/axolotl/cli/__init__.py | 3 ++- src/axolotl/utils/config/__init__.py | 3 --- .../utils/config/models/input/v0_4_1/__init__.py | 10 ++++++++++ src/axolotl/utils/trainer.py | 2 +- tests/e2e/patched/test_lora_llama_multipack.py | 2 +- tests/test_normalize_config.py | 14 -------------- tests/test_validation.py | 14 ++++++++++++++ 7 files changed, 28 insertions(+), 20 deletions(-) diff --git a/src/axolotl/cli/__init__.py b/src/axolotl/cli/__init__.py index abca478e4..79a9d3193 100644 --- a/src/axolotl/cli/__init__.py +++ b/src/axolotl/cli/__init__.py @@ -13,7 +13,6 @@ from threading import Thread from typing import Any, Dict, List, Optional, Union from urllib.parse import urlparse -import gradio as gr import requests import torch import yaml @@ -215,6 +214,8 @@ def do_inference_gradio( cfg: DictDefault, cli_args: TrainerCliArgs, ): + import gradio as gr + model, tokenizer = load_model_and_tokenizer(cfg=cfg, cli_args=cli_args) prompter = cli_args.prompter default_tokens = {"unk_token": "", "bos_token": "", "eos_token": ""} diff --git a/src/axolotl/utils/config/__init__.py b/src/axolotl/utils/config/__init__.py index d289054f4..99ce27321 100644 --- a/src/axolotl/utils/config/__init__.py +++ b/src/axolotl/utils/config/__init__.py @@ -164,9 +164,6 @@ def normalize_config(cfg): ] ) or cfg.is_qwen_derived_model - if isinstance(cfg.learning_rate, str): - cfg.learning_rate = float(cfg.learning_rate) - if isinstance(cfg.pretraining_dataset, dict): cfg.pretraining_dataset = [cfg.pretraining_dataset] diff --git a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py index 4de51544e..0a39f269a 100644 --- a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py +++ b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py @@ -302,6 +302,13 @@ class HyperparametersConfig(BaseModel): ) return batch_size + @field_validator("learning_rate") + @classmethod + def convert_learning_rate(cls, learning_rate): + if learning_rate and isinstance(learning_rate, str): + learning_rate = float(learning_rate) + return learning_rate + class ModelOutputConfig(BaseModel): """model save configuration subset""" @@ -368,6 +375,7 @@ class AxolotlInputConfig( rl: Optional[RLType] = None datasets: Optional[conlist(Union[SFTDataset, DPODataset], min_length=1)] = None # type: ignore + test_datasets: Optional[conlist(Union[SFTDataset, DPODataset], min_length=1)] = None # type: ignore dataset_prepared_path: Optional[str] = None dataset_shard_num: Optional[int] = None dataset_shard_idx: Optional[int] = None @@ -456,6 +464,7 @@ class AxolotlInputConfig( warmup_steps: Optional[int] = None warmup_ratio: Optional[float] = None eval_steps: Optional[Union[int, float]] = None + evals_per_epoch: Optional[Union[int]] = None evaluation_strategy: Optional[str] = None save_steps: Optional[Union[int, float]] = None saves_per_epoch: Optional[int] = None @@ -463,6 +472,7 @@ class AxolotlInputConfig( save_total_limit: Optional[int] = None logging_steps: Optional[int] = None early_stopping_patience: Optional[int] = None + load_best_model_at_end: Optional[bool] = False neftune_noise_alpha: Optional[float] = None diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index 6d4168fbd..e52f35ccc 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -255,7 +255,7 @@ def calculate_total_num_steps(cfg, train_dataset, update=True): train_dataset.remove_columns(["length"]), batch_sampler=sampler, ) - data_loader_len = len(data_loader) // batch_size + data_loader_len = len(data_loader) // cfg.batch_size actual_eff = sampler.efficiency() LOG.debug(f"data_loader_len: {data_loader_len}", main_process_only=True) # FIXME: is there a bug here somewhere? the total num steps depends diff --git a/tests/e2e/patched/test_lora_llama_multipack.py b/tests/e2e/patched/test_lora_llama_multipack.py index 079a8e924..f251f9b66 100644 --- a/tests/e2e/patched/test_lora_llama_multipack.py +++ b/tests/e2e/patched/test_lora_llama_multipack.py @@ -43,7 +43,7 @@ class TestLoraLlama(unittest.TestCase): "lora_alpha": 64, "lora_dropout": 0.05, "lora_target_linear": True, - "val_set_size": 0.1, + "val_set_size": 0.2, "special_tokens": { "unk_token": "", "bos_token": "", diff --git a/tests/test_normalize_config.py b/tests/test_normalize_config.py index 9d7573ff0..2e76ceb45 100644 --- a/tests/test_normalize_config.py +++ b/tests/test_normalize_config.py @@ -25,20 +25,6 @@ class NormalizeConfigTestCase(unittest.TestCase): } ) - def test_lr_as_float(self): - cfg = ( - self._get_base_cfg() - | DictDefault( # pylint: disable=unsupported-binary-operation - { - "learning_rate": "5e-5", - } - ) - ) - - normalize_config(cfg) - - assert cfg.learning_rate == 0.00005 - def test_base_model_config_set_when_empty(self): cfg = self._get_base_cfg() del cfg.base_model_config diff --git a/tests/test_validation.py b/tests/test_validation.py index 4ec544351..790a4b171 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -176,6 +176,20 @@ class TestValidation(BaseValidation): with pytest.raises(ValueError, match=r".*At least two of*"): validate_config(cfg) + def test_lr_as_float(self, minimal_cfg): + cfg = ( + DictDefault( # pylint: disable=unsupported-binary-operation + { + "learning_rate": "5e-5", + } + ) + | minimal_cfg + ) + + new_cfg = validate_config(cfg) + + assert new_cfg.learning_rate == 0.00005 + def test_qlora(self, minimal_cfg): base_cfg = ( DictDefault(