diff --git a/src/axolotl/cli/evaluate.py b/src/axolotl/cli/evaluate.py index c89715719..a1859f315 100644 --- a/src/axolotl/cli/evaluate.py +++ b/src/axolotl/cli/evaluate.py @@ -1,6 +1,7 @@ """CLI to run evaluation on a model.""" import logging +import os from pathlib import Path from typing import Union @@ -14,6 +15,7 @@ from axolotl.cli.checks import check_accelerate_default_config, check_user_token from axolotl.cli.config import load_cfg from axolotl.common.datasets import load_datasets, load_preference_datasets from axolotl.evaluate import evaluate +from axolotl.utils import set_pytorch_cuda_alloc_conf from axolotl.utils.dict import DictDefault LOG = logging.getLogger(__name__) @@ -29,10 +31,14 @@ def do_evaluate(cfg: DictDefault, cli_args: TrainerCliArgs) -> None: cfg: Dictionary mapping `axolotl` config keys to values. cli_args: CLI arguments. """ + # Enable expandable segments for cuda allocation to improve VRAM usage + set_pytorch_cuda_alloc_conf() + # pylint: disable=duplicate-code print_axolotl_text_art() check_accelerate_default_config() - check_user_token() + if int(os.getenv("LOCAL_RANK", "0")) == 0: + check_user_token() if cfg.rl: dataset_meta = load_preference_datasets(cfg=cfg, cli_args=cli_args) diff --git a/src/axolotl/evaluate.py b/src/axolotl/evaluate.py index 216ff0110..a6a192bc7 100644 --- a/src/axolotl/evaluate.py +++ b/src/axolotl/evaluate.py @@ -12,11 +12,12 @@ from datasets import Dataset from transformers.trainer import Trainer from axolotl.logging_config import configure_logging -from axolotl.train import TrainDatasetMeta -from axolotl.utils import set_pytorch_cuda_alloc_conf +from axolotl.train import ( + TrainDatasetMeta, + setup_model_and_tokenizer, +) from axolotl.utils.dict import DictDefault from axolotl.utils.distributed import cleanup_distributed -from axolotl.utils.models import load_model, load_processor, load_tokenizer from axolotl.utils.trainer import setup_trainer project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) @@ -24,7 +25,7 @@ src_dir = os.path.join(project_root, "src") sys.path.insert(0, src_dir) configure_logging() -LOG = get_logger("axolotl.evaluate") +LOG = get_logger(__name__) def evaluate_dataset( @@ -75,37 +76,22 @@ def evaluate(*, cfg: DictDefault, dataset_meta: TrainDatasetMeta) -> Dict[str, f Returns: Dictionary mapping metric names to their values. """ - # pylint: disable=duplicate-code - # Enable expandable segments for cuda allocation to improve VRAM usage - set_pytorch_cuda_alloc_conf() - - # Load tokenizer - LOG.debug( - f"loading tokenizer... {cfg.tokenizer_config or cfg.base_model_config}", - main_process_only=True, - ) - tokenizer = load_tokenizer(cfg) - - # Load processor for multimodal models if needed - processor = None - if cfg.is_multimodal: - processor = load_processor(cfg, tokenizer) + # Load tokenizer, processor and model + LOG.debug("loading model for evaluation...") + model, tokenizer, _, processor = setup_model_and_tokenizer(cfg) # Get datasets + # pylint: disable=duplicate-code train_dataset = dataset_meta.train_dataset eval_dataset = dataset_meta.eval_dataset total_num_steps = dataset_meta.total_num_steps - # Load model - LOG.debug("loading model for evaluation...") - model, _ = load_model(cfg, tokenizer, processor=processor) - # Set up trainer trainer = setup_trainer( - cfg, + cfg=cfg, train_dataset=train_dataset, eval_dataset=eval_dataset, - model=(model, None, None), # No need for model_ref or peft_config + model=model, tokenizer=tokenizer, processor=processor, total_num_steps=total_num_steps, diff --git a/tests/e2e/test_evaluate.py b/tests/e2e/test_evaluate.py new file mode 100644 index 000000000..b2d7d02ca --- /dev/null +++ b/tests/e2e/test_evaluate.py @@ -0,0 +1,65 @@ +"""E2E smoke test for evaluate CLI command""" + +import os +from pathlib import Path + +import yaml +from accelerate.test_utils import execute_subprocess_async +from transformers.testing_utils import get_torch_dist_unique_port + +from axolotl.utils.dict import DictDefault + +os.environ["WANDB_DISABLED"] = "true" + + +class TestE2eEvaluate: + """Test cases for evaluate CLI""" + + def test_evaluate(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "JackFram/llama-68m", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 1024, + "val_set_size": 0.02, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 1, + "micro_batch_size": 8, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch_fused", + "lr_scheduler": "cosine", + "max_steps": 20, + } + ) + + # write cfg to yaml file + Path(temp_dir).mkdir(parents=True, exist_ok=True) + with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout: + fout.write(yaml.dump(cfg.to_dict(), Dumper=yaml.Dumper)) + + execute_subprocess_async( + [ + "accelerate", + "launch", + "--num-processes", + "2", + "--main_process_port", + f"{get_torch_dist_unique_port()}", + "-m", + "axolotl.cli.evaluate", + str(Path(temp_dir) / "config.yaml"), + ] + )