From 9871fa060bfd3a3d047ac843969979db0dead1c2 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Mon, 18 Nov 2024 12:35:31 -0500 Subject: [PATCH] optim e2e tests to run a bit faster (#2069) [skip ci] * optim e2e tests to run a bit faster * run prequant w/o lora_modules_to_save * use smollm2 --- tests/e2e/conftest.py | 19 ++++++++++ tests/e2e/multigpu/test_llama.py | 38 +++++++++---------- tests/e2e/patched/test_fa_xentropy.py | 2 + .../e2e/patched/test_lora_llama_multipack.py | 3 ++ tests/e2e/test_optimizers.py | 2 +- tests/e2e/test_packing_loss.py | 2 +- 6 files changed, 43 insertions(+), 23 deletions(-) diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 723a44f03..c316f6c83 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -5,6 +5,25 @@ import shutil import tempfile import pytest +from huggingface_hub import snapshot_download + + +@pytest.fixture(scope="session", autouse=True) +def download_smollm2_135m_model(): + # download the model + snapshot_download("HuggingFaceTB/SmolLM2-135M") + + +@pytest.fixture(scope="session", autouse=True) +def download_tatsu_lab_alpaca_dataset(): + # download the model + snapshot_download("tatsu-lab/alpaca", repo_type="dataset") + + +@pytest.fixture(scope="session", autouse=True) +def download_mhenrichsen_alpaca_2k_dataset(): + # download the model + snapshot_download("mhenrichsen/alpaca_2k_test", repo_type="dataset") @pytest.fixture diff --git a/tests/e2e/multigpu/test_llama.py b/tests/e2e/multigpu/test_llama.py index b2c8abc60..d8dcf3118 100644 --- a/tests/e2e/multigpu/test_llama.py +++ b/tests/e2e/multigpu/test_llama.py @@ -25,7 +25,7 @@ AXOLOTL_ROOT = Path(__file__).parent.parent.parent.parent @pytest.fixture(scope="session", autouse=True) def download_model(): # download the model - snapshot_download("TinyLlama/TinyLlama_v1.1") + snapshot_download("HuggingFaceTB/SmolLM2-135M") class TestMultiGPULlama: @@ -37,7 +37,7 @@ class TestMultiGPULlama: # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 2048, "adapter": "lora", "lora_r": 8, @@ -93,7 +93,7 @@ class TestMultiGPULlama: # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 2048, "sample_packing": True, "eval_sample_packing": False, @@ -149,8 +149,7 @@ class TestMultiGPULlama: # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "TinyLlama/TinyLlama_v1.1", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 2048, "sample_packing": False, "eval_sample_packing": False, @@ -163,12 +162,10 @@ class TestMultiGPULlama: "lora_target_linear": True, "val_set_size": 0.05, "special_tokens": { - "unk_token": "", - "bos_token": "", - "eos_token": "", + "pad_token": "<|endoftext|>", }, "rl": "dpo", - "chat_template": "llama3", + "chat_template": "chatml", "datasets": [ { "path": "fozziethebeat/alpaca_messages_2k_dpo_test", @@ -221,7 +218,7 @@ class TestMultiGPULlama: # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 2048, "sample_packing": False, "eval_sample_packing": False, @@ -294,7 +291,7 @@ class TestMultiGPULlama: # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 2048, "val_set_size": 0.01, "special_tokens": { @@ -359,7 +356,7 @@ class TestMultiGPULlama: # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sample_packing": True, "pad_to_sequence_len": True, "sequence_len": 2048, @@ -422,8 +419,7 @@ class TestMultiGPULlama: # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "axolotl-ai-co/TinyLlama_v1.1-bnb-nf4-bf16", - "tokenizer_type": "AutoTokenizer", + "base_model": "axolotl-ai-co/SmolLM2-135M-bnb-nf4-bf16", "adapter": "qlora", "mean_resizing_embeddings": True, "load_in_4bit": True, @@ -431,17 +427,17 @@ class TestMultiGPULlama: "lora_alpha": 16, "lora_dropout": 0.05, "lora_target_linear": True, - "lora_modules_to_save": [ - "embed_tokens", - "lm_head", - ], + # "lora_modules_to_save": [ + # "embed_tokens", + # "lm_head", + # ], "sample_packing": True, "eval_sample_packing": False, "pad_to_sequence_len": True, "sequence_len": 2048, "val_set_size": 0.05, "special_tokens": { - "pad_token": "", + "pad_token": "<|endoftext|>", }, "datasets": [ { @@ -503,7 +499,7 @@ class TestMultiGPULlama: # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sample_packing": True, "pad_to_sequence_len": True, "sequence_len": 2048, @@ -553,7 +549,7 @@ class TestMultiGPULlama: # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "load_in_4bit": True, "adapter": "qlora", "lora_r": 8, diff --git a/tests/e2e/patched/test_fa_xentropy.py b/tests/e2e/patched/test_fa_xentropy.py index 0991bdd74..8b76362fb 100644 --- a/tests/e2e/patched/test_fa_xentropy.py +++ b/tests/e2e/patched/test_fa_xentropy.py @@ -66,6 +66,8 @@ class TestFAXentropyLlama(unittest.TestCase): }, ], "num_epochs": 1, + "max_steps": 10, + "save_steps": 10, "micro_batch_size": 8, "gradient_accumulation_steps": 1, "output_dir": temp_dir, diff --git a/tests/e2e/patched/test_lora_llama_multipack.py b/tests/e2e/patched/test_lora_llama_multipack.py index f251f9b66..5dbf14654 100644 --- a/tests/e2e/patched/test_lora_llama_multipack.py +++ b/tests/e2e/patched/test_lora_llama_multipack.py @@ -56,6 +56,8 @@ class TestLoraLlama(unittest.TestCase): }, ], "num_epochs": 2, + "max_steps": 20, + "save_steps": 10, "micro_batch_size": 8, "gradient_accumulation_steps": 1, "output_dir": temp_dir, @@ -109,6 +111,7 @@ class TestLoraLlama(unittest.TestCase): }, ], "num_epochs": 2, + "max_steps": 20, "save_steps": 0.5, "micro_batch_size": 8, "gradient_accumulation_steps": 1, diff --git a/tests/e2e/test_optimizers.py b/tests/e2e/test_optimizers.py index b9fa368f6..af5445461 100644 --- a/tests/e2e/test_optimizers.py +++ b/tests/e2e/test_optimizers.py @@ -113,7 +113,7 @@ class TestCustomOptimizers(unittest.TestCase): def test_fft_schedule_free_adamw(self, temp_dir): cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 1024, "val_set_size": 0.1, "special_tokens": { diff --git a/tests/e2e/test_packing_loss.py b/tests/e2e/test_packing_loss.py index 73f9e60ba..60f167381 100644 --- a/tests/e2e/test_packing_loss.py +++ b/tests/e2e/test_packing_loss.py @@ -31,7 +31,7 @@ class TestPackedLlama(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 1024, "sample_packing": True, "flash_attention": True,