diff --git a/cicd/cicd.sh b/cicd/cicd.sh index e199e112f..7a4a31504 100755 --- a/cicd/cicd.sh +++ b/cicd/cicd.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -pytest -n8 --ignore=tests/e2e/ /workspace/axolotl/tests/ -pytest -n1 --dist loadfile -v /workspace/axolotl/tests/e2e/patched/ /workspace/axolotl/tests/e2e/integrations/ -pytest --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/ +pytest --durations=10 -n8 --ignore=tests/e2e/ /workspace/axolotl/tests/ +pytest --durations=10 -n1 --dist loadfile -v /workspace/axolotl/tests/e2e/patched/ /workspace/axolotl/tests/e2e/integrations/ +pytest --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/ diff --git a/requirements.txt b/requirements.txt index f2086d442..456c63ca5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,7 +26,7 @@ numpy>=1.24.4,<=2.0.1 evaluate==0.4.1 scipy scikit-learn==1.4.2 -pynvml +nvidia-ml-py==12.560.30 art gradio==3.50.2 tensorboard diff --git a/src/axolotl/utils/bench.py b/src/axolotl/utils/bench.py index 57471ae0d..3d338aff1 100644 --- a/src/axolotl/utils/bench.py +++ b/src/axolotl/utils/bench.py @@ -1,13 +1,24 @@ """Benchmarking and measurement utilities""" import functools -import pynvml import torch -from pynvml.nvml import NVMLError from transformers.utils.import_utils import is_torch_npu_available from axolotl.utils.distributed import get_device_type +try: + from pynvml import ( + NVMLError, + nvmlDeviceGetHandleByIndex, + nvmlDeviceGetMemoryInfo, + nvmlInit, + ) +except ImportError: + NVMLError = None + nvmlDeviceGetHandleByIndex = None + nvmlDeviceGetMemoryInfo = None + nvmlInit = None + def check_cuda_device(default_value): """ @@ -68,10 +79,12 @@ def gpu_memory_usage_smi(device=0): device = device.index if isinstance(device, str) and device.startswith("cuda:"): device = int(device[5:]) + if not nvmlInit: + return 0.0 try: - pynvml.nvmlInit() - handle = pynvml.nvmlDeviceGetHandleByIndex(device) - info = pynvml.nvmlDeviceGetMemoryInfo(handle) + nvmlInit() + handle = nvmlDeviceGetHandleByIndex(device) + info = nvmlDeviceGetMemoryInfo(handle) return info.used / 1024.0**3 except NVMLError: return 0.0 diff --git a/src/axolotl/utils/data/sft.py b/src/axolotl/utils/data/sft.py index 0bee4dd5c..4ed16e358 100644 --- a/src/axolotl/utils/data/sft.py +++ b/src/axolotl/utils/data/sft.py @@ -179,7 +179,7 @@ def load_tokenized_prepared_datasets( + "|".join( sorted( [ - f"{d.path}: {d.type}: {d.shards}: {d.conversation}{d.split}" + f"{d.path}:{d.type}:{d.shards}:{d.conversation}{d.split}" for d in cfg_datasets ] ) diff --git a/tests/conftest.py b/tests/conftest.py index a8bf03ac0..4479e676f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,6 +14,12 @@ def download_smollm2_135m_model(): snapshot_download("HuggingFaceTB/SmolLM2-135M") +@pytest.fixture(scope="session", autouse=True) +def download_llama_68m_random_model(): + # download the model + snapshot_download("JackFram/llama-68m") + + @pytest.fixture(scope="session", autouse=True) def download_qwen_2_5_half_billion_model(): # download the model @@ -22,18 +28,26 @@ def download_qwen_2_5_half_billion_model(): @pytest.fixture(scope="session", autouse=True) def download_tatsu_lab_alpaca_dataset(): - # download the model + # download the dataset snapshot_download("tatsu-lab/alpaca", repo_type="dataset") @pytest.fixture(scope="session", autouse=True) def download_mhenrichsen_alpaca_2k_dataset(): - # download the model + # download the dataset snapshot_download("mhenrichsen/alpaca_2k_test", repo_type="dataset") +@pytest.fixture(scope="session", autouse=True) +def download_mhenrichsen_alpaca_2k_w_revision_dataset(): + # download the dataset + snapshot_download( + "mhenrichsen/alpaca_2k_test", repo_type="dataset", revision="d05c1cb" + ) + + def download_mlabonne_finetome_100k_dataset(): - # download the model + # download the dataset snapshot_download("mlabonne/FineTome-100k", repo_type="dataset") diff --git a/tests/core/test_trainer_builder.py b/tests/core/test_trainer_builder.py index 82455922e..558d3cb95 100644 --- a/tests/core/test_trainer_builder.py +++ b/tests/core/test_trainer_builder.py @@ -14,9 +14,7 @@ from axolotl.utils.models import load_model, load_tokenizer def fixture_cfg(): cfg = DictDefault( { - "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", - "model_type": "AutoModelForCausalLM", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", "micro_batch_size": 1, "gradient_accumulation_steps": 1, "learning_rate": 0.00005, @@ -33,6 +31,9 @@ def fixture_cfg(): "dataloader_num_workers": 1, "dataloader_pin_memory": True, "model_config_type": "llama", + "special_tokens": { + "pad_token": "<|endoftext|>", + }, } ) diff --git a/tests/e2e/patched/test_fa_xentropy.py b/tests/e2e/patched/test_fa_xentropy.py index 7ca1c0836..76ea1a934 100644 --- a/tests/e2e/patched/test_fa_xentropy.py +++ b/tests/e2e/patched/test_fa_xentropy.py @@ -51,11 +51,11 @@ class TestFAXentropyLlama: "flash_attn_cross_entropy": True, "load_in_8bit": True, "adapter": "lora", - "lora_r": 32, - "lora_alpha": 64, + "lora_r": 8, + "lora_alpha": 16, "lora_dropout": 0.05, "lora_target_linear": True, - "val_set_size": 0.2, + "val_set_size": 0.05, "special_tokens": { "pad_token": "<|endoftext|>", }, diff --git a/tests/e2e/patched/test_resume.py b/tests/e2e/patched/test_resume.py index c0e791f38..44d3d9e83 100644 --- a/tests/e2e/patched/test_resume.py +++ b/tests/e2e/patched/test_resume.py @@ -29,23 +29,24 @@ class TestResumeLlama(unittest.TestCase): """ @with_temp_dir - def test_resume_qlora_packed(self, temp_dir): + def test_resume_lora_packed(self, temp_dir): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 1024, "sample_packing": True, "flash_attention": True, - "load_in_4bit": True, - "adapter": "qlora", - "lora_r": 32, - "lora_alpha": 64, + "load_in_8bit": True, + "adapter": "lora", + "lora_r": 8, + "lora_alpha": 16, "lora_dropout": 0.05, "lora_target_linear": True, - "val_set_size": 0.1, - "special_tokens": {}, + "val_set_size": 0.01, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, "datasets": [ { "path": "vicgalle/alpaca-gpt4", @@ -57,11 +58,11 @@ class TestResumeLlama(unittest.TestCase): "gradient_accumulation_steps": 1, "output_dir": temp_dir, "learning_rate": 0.00001, - "optimizer": "adamw_torch", + "optimizer": "adamw_8bit", "lr_scheduler": "cosine", - "save_steps": 10, + "save_steps": 3, "save_total_limit": 5, - "max_steps": 40, + "max_steps": 15, "use_tensorboard": True, } ) @@ -77,7 +78,7 @@ class TestResumeLlama(unittest.TestCase): resume_cfg = cfg | DictDefault( { - "resume_from_checkpoint": f"{temp_dir}/checkpoint-30/", + "resume_from_checkpoint": f"{temp_dir}/checkpoint-9/", } ) normalize_config(resume_cfg) @@ -93,4 +94,4 @@ class TestResumeLlama(unittest.TestCase): ) pattern = r"first_step\s+(\d+)" first_steps = int(re.findall(pattern, res.stdout)[0]) - assert first_steps == 31 + assert first_steps == 10 diff --git a/tests/e2e/patched/test_unsloth_qlora.py b/tests/e2e/patched/test_unsloth_qlora.py index 805b15003..3d7e794f1 100644 --- a/tests/e2e/patched/test_unsloth_qlora.py +++ b/tests/e2e/patched/test_unsloth_qlora.py @@ -42,7 +42,7 @@ class TestUnslothQLoRA: "lora_alpha": 16, "lora_dropout": 0.05, "lora_target_linear": True, - "val_set_size": 0.2, + "val_set_size": 0.05, "special_tokens": { "pad_token": "<|endoftext|>", }, @@ -92,7 +92,7 @@ class TestUnslothQLoRA: "lora_alpha": 16, "lora_dropout": 0.05, "lora_target_linear": True, - "val_set_size": 0.2, + "val_set_size": 0.05, "special_tokens": { "pad_token": "<|endoftext|>", }, @@ -146,7 +146,7 @@ class TestUnslothQLoRA: "lora_alpha": 16, "lora_dropout": 0.05, "lora_target_linear": True, - "val_set_size": 0.2, + "val_set_size": 0.05, "special_tokens": { "pad_token": "<|endoftext|>", }, diff --git a/tests/e2e/test_optimizers.py b/tests/e2e/test_optimizers.py index af5445461..63c46c2a2 100644 --- a/tests/e2e/test_optimizers.py +++ b/tests/e2e/test_optimizers.py @@ -94,6 +94,7 @@ class TestCustomOptimizers(unittest.TestCase): }, ], "num_epochs": 1, + "max_steps": 5, "micro_batch_size": 8, "gradient_accumulation_steps": 1, "output_dir": temp_dir, @@ -115,7 +116,7 @@ class TestCustomOptimizers(unittest.TestCase): { "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 1024, - "val_set_size": 0.1, + "val_set_size": 0.01, "special_tokens": { "pad_token": "<|endoftext|>", }, @@ -126,13 +127,14 @@ class TestCustomOptimizers(unittest.TestCase): }, ], "num_epochs": 1, - "micro_batch_size": 4, + "micro_batch_size": 2, "gradient_accumulation_steps": 2, "output_dir": temp_dir, "learning_rate": 0.00001, "optimizer": "schedule_free_adamw", "lr_scheduler": "constant", "save_safetensors": True, + "max_steps": 10, } ) # pylint: disable=duplicate-code diff --git a/tests/e2e/test_relora_llama.py b/tests/e2e/test_relora_llama.py index 4ba130c9d..5de5db11b 100644 --- a/tests/e2e/test_relora_llama.py +++ b/tests/e2e/test_relora_llama.py @@ -52,6 +52,7 @@ class TestReLoraLlama(unittest.TestCase): ], "warmup_steps": 15, "num_epochs": 2, + "max_steps": 51, # at least 2x relora_steps "micro_batch_size": 4, "gradient_accumulation_steps": 1, "output_dir": temp_dir, diff --git a/tests/test_datasets.py b/tests/test_datasets.py index f3bed00fd..b1ecfd6d5 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -67,7 +67,7 @@ class TestDatasetPreparation(unittest.TestCase): def test_load_local_hub(self): """Niche use case. Verify that a local copy of a hub dataset can be loaded""" with tempfile.TemporaryDirectory() as tmp_dir: - tmp_ds_path = Path("mhenrichsen/alpaca_2k_test") + tmp_ds_path = Path(tmp_dir) / "mhenrichsen/alpaca_2k_test" tmp_ds_path.mkdir(parents=True, exist_ok=True) snapshot_download( repo_id="mhenrichsen/alpaca_2k_test", @@ -89,7 +89,7 @@ class TestDatasetPreparation(unittest.TestCase): "ds_type": "parquet", "type": "alpaca", "data_files": [ - "mhenrichsen/alpaca_2k_test/alpaca_2000.parquet", + f"{tmp_ds_path}/alpaca_2000.parquet", ], }, ], diff --git a/tests/test_perplexity.py b/tests/test_perplexity.py index 8688827ce..b32cd5283 100644 --- a/tests/test_perplexity.py +++ b/tests/test_perplexity.py @@ -7,7 +7,7 @@ from transformers.models.auto.tokenization_auto import AutoTokenizer from axolotl.utils.callbacks.perplexity import Perplexity -MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" +MODEL_NAME = "HuggingFaceTB/SmolLM2-135M" @fixture() @@ -22,7 +22,9 @@ def model(): @fixture() def tokenizer(): - return AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) + tokenizer_ = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) + tokenizer_.add_special_tokens({"pad_token": "<|endoftext|>"}) + return tokenizer_ def test_perplexity_longer_than_stride(model, metric): @@ -33,7 +35,7 @@ One day, a little fish named Fin was swimming near the shore. He saw a big crab """ result = metric.compute(model, [sample_text]) ppl = result["score"] - assert round(ppl, 2) == 5.37 + assert round(ppl, 2) == 7.41 def test_perplexity_short(model, metric): @@ -41,4 +43,4 @@ def test_perplexity_short(model, metric): sample_text = "Once upon a time, there was a little car named Beep. Beep loved to go fast and play in the sun." result = metric.compute(model, [sample_text]) ppl = result["score"] - assert round(ppl, 2) == 10.02 + assert round(ppl, 2) == 10.33