Fix eval + add smoke test (#2586)
* fix evaluate CLI * add smoke test * fix naming * lint
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
"""CLI to run evaluation on a model."""
|
"""CLI to run evaluation on a model."""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
@@ -14,6 +15,7 @@ from axolotl.cli.checks import check_accelerate_default_config, check_user_token
|
|||||||
from axolotl.cli.config import load_cfg
|
from axolotl.cli.config import load_cfg
|
||||||
from axolotl.common.datasets import load_datasets, load_preference_datasets
|
from axolotl.common.datasets import load_datasets, load_preference_datasets
|
||||||
from axolotl.evaluate import evaluate
|
from axolotl.evaluate import evaluate
|
||||||
|
from axolotl.utils import set_pytorch_cuda_alloc_conf
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
@@ -29,10 +31,14 @@ def do_evaluate(cfg: DictDefault, cli_args: TrainerCliArgs) -> None:
|
|||||||
cfg: Dictionary mapping `axolotl` config keys to values.
|
cfg: Dictionary mapping `axolotl` config keys to values.
|
||||||
cli_args: CLI arguments.
|
cli_args: CLI arguments.
|
||||||
"""
|
"""
|
||||||
|
# Enable expandable segments for cuda allocation to improve VRAM usage
|
||||||
|
set_pytorch_cuda_alloc_conf()
|
||||||
|
|
||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
print_axolotl_text_art()
|
print_axolotl_text_art()
|
||||||
check_accelerate_default_config()
|
check_accelerate_default_config()
|
||||||
check_user_token()
|
if int(os.getenv("LOCAL_RANK", "0")) == 0:
|
||||||
|
check_user_token()
|
||||||
|
|
||||||
if cfg.rl:
|
if cfg.rl:
|
||||||
dataset_meta = load_preference_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_preference_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -12,11 +12,12 @@ from datasets import Dataset
|
|||||||
from transformers.trainer import Trainer
|
from transformers.trainer import Trainer
|
||||||
|
|
||||||
from axolotl.logging_config import configure_logging
|
from axolotl.logging_config import configure_logging
|
||||||
from axolotl.train import TrainDatasetMeta
|
from axolotl.train import (
|
||||||
from axolotl.utils import set_pytorch_cuda_alloc_conf
|
TrainDatasetMeta,
|
||||||
|
setup_model_and_tokenizer,
|
||||||
|
)
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
from axolotl.utils.distributed import cleanup_distributed
|
from axolotl.utils.distributed import cleanup_distributed
|
||||||
from axolotl.utils.models import load_model, load_processor, load_tokenizer
|
|
||||||
from axolotl.utils.trainer import setup_trainer
|
from axolotl.utils.trainer import setup_trainer
|
||||||
|
|
||||||
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||||
@@ -24,7 +25,7 @@ src_dir = os.path.join(project_root, "src")
|
|||||||
sys.path.insert(0, src_dir)
|
sys.path.insert(0, src_dir)
|
||||||
|
|
||||||
configure_logging()
|
configure_logging()
|
||||||
LOG = get_logger("axolotl.evaluate")
|
LOG = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def evaluate_dataset(
|
def evaluate_dataset(
|
||||||
@@ -75,37 +76,22 @@ def evaluate(*, cfg: DictDefault, dataset_meta: TrainDatasetMeta) -> Dict[str, f
|
|||||||
Returns:
|
Returns:
|
||||||
Dictionary mapping metric names to their values.
|
Dictionary mapping metric names to their values.
|
||||||
"""
|
"""
|
||||||
# pylint: disable=duplicate-code
|
# Load tokenizer, processor and model
|
||||||
# Enable expandable segments for cuda allocation to improve VRAM usage
|
LOG.debug("loading model for evaluation...")
|
||||||
set_pytorch_cuda_alloc_conf()
|
model, tokenizer, _, processor = setup_model_and_tokenizer(cfg)
|
||||||
|
|
||||||
# Load tokenizer
|
|
||||||
LOG.debug(
|
|
||||||
f"loading tokenizer... {cfg.tokenizer_config or cfg.base_model_config}",
|
|
||||||
main_process_only=True,
|
|
||||||
)
|
|
||||||
tokenizer = load_tokenizer(cfg)
|
|
||||||
|
|
||||||
# Load processor for multimodal models if needed
|
|
||||||
processor = None
|
|
||||||
if cfg.is_multimodal:
|
|
||||||
processor = load_processor(cfg, tokenizer)
|
|
||||||
|
|
||||||
# Get datasets
|
# Get datasets
|
||||||
|
# pylint: disable=duplicate-code
|
||||||
train_dataset = dataset_meta.train_dataset
|
train_dataset = dataset_meta.train_dataset
|
||||||
eval_dataset = dataset_meta.eval_dataset
|
eval_dataset = dataset_meta.eval_dataset
|
||||||
total_num_steps = dataset_meta.total_num_steps
|
total_num_steps = dataset_meta.total_num_steps
|
||||||
|
|
||||||
# Load model
|
|
||||||
LOG.debug("loading model for evaluation...")
|
|
||||||
model, _ = load_model(cfg, tokenizer, processor=processor)
|
|
||||||
|
|
||||||
# Set up trainer
|
# Set up trainer
|
||||||
trainer = setup_trainer(
|
trainer = setup_trainer(
|
||||||
cfg,
|
cfg=cfg,
|
||||||
train_dataset=train_dataset,
|
train_dataset=train_dataset,
|
||||||
eval_dataset=eval_dataset,
|
eval_dataset=eval_dataset,
|
||||||
model=(model, None, None), # No need for model_ref or peft_config
|
model=model,
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
processor=processor,
|
processor=processor,
|
||||||
total_num_steps=total_num_steps,
|
total_num_steps=total_num_steps,
|
||||||
|
|||||||
65
tests/e2e/test_evaluate.py
Normal file
65
tests/e2e/test_evaluate.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
"""E2E smoke test for evaluate CLI command"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
from accelerate.test_utils import execute_subprocess_async
|
||||||
|
from transformers.testing_utils import get_torch_dist_unique_port
|
||||||
|
|
||||||
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
|
os.environ["WANDB_DISABLED"] = "true"
|
||||||
|
|
||||||
|
|
||||||
|
class TestE2eEvaluate:
|
||||||
|
"""Test cases for evaluate CLI"""
|
||||||
|
|
||||||
|
def test_evaluate(self, temp_dir):
|
||||||
|
# pylint: disable=duplicate-code
|
||||||
|
cfg = DictDefault(
|
||||||
|
{
|
||||||
|
"base_model": "JackFram/llama-68m",
|
||||||
|
"tokenizer_type": "LlamaTokenizer",
|
||||||
|
"sequence_len": 1024,
|
||||||
|
"val_set_size": 0.02,
|
||||||
|
"special_tokens": {
|
||||||
|
"unk_token": "<unk>",
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"eos_token": "</s>",
|
||||||
|
},
|
||||||
|
"datasets": [
|
||||||
|
{
|
||||||
|
"path": "mhenrichsen/alpaca_2k_test",
|
||||||
|
"type": "alpaca",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"num_epochs": 1,
|
||||||
|
"micro_batch_size": 8,
|
||||||
|
"gradient_accumulation_steps": 1,
|
||||||
|
"output_dir": temp_dir,
|
||||||
|
"learning_rate": 0.00001,
|
||||||
|
"optimizer": "adamw_torch_fused",
|
||||||
|
"lr_scheduler": "cosine",
|
||||||
|
"max_steps": 20,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# write cfg to yaml file
|
||||||
|
Path(temp_dir).mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout:
|
||||||
|
fout.write(yaml.dump(cfg.to_dict(), Dumper=yaml.Dumper))
|
||||||
|
|
||||||
|
execute_subprocess_async(
|
||||||
|
[
|
||||||
|
"accelerate",
|
||||||
|
"launch",
|
||||||
|
"--num-processes",
|
||||||
|
"2",
|
||||||
|
"--main_process_port",
|
||||||
|
f"{get_torch_dist_unique_port()}",
|
||||||
|
"-m",
|
||||||
|
"axolotl.cli.evaluate",
|
||||||
|
str(Path(temp_dir) / "config.yaml"),
|
||||||
|
]
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user