optim e2e tests to run a bit faster (#2069) [skip ci]

* optim e2e tests to run a bit faster

* run prequant w/o lora_modules_to_save

* use smollm2
This commit is contained in:
Wing Lian
2024-11-18 12:35:31 -05:00
committed by GitHub
parent 70cf79ef52
commit 9871fa060b
6 changed files with 43 additions and 23 deletions

View File

@@ -5,6 +5,25 @@ import shutil
import tempfile import tempfile
import pytest import pytest
from huggingface_hub import snapshot_download
@pytest.fixture(scope="session", autouse=True)
def download_smollm2_135m_model():
# download the model
snapshot_download("HuggingFaceTB/SmolLM2-135M")
@pytest.fixture(scope="session", autouse=True)
def download_tatsu_lab_alpaca_dataset():
# download the model
snapshot_download("tatsu-lab/alpaca", repo_type="dataset")
@pytest.fixture(scope="session", autouse=True)
def download_mhenrichsen_alpaca_2k_dataset():
# download the model
snapshot_download("mhenrichsen/alpaca_2k_test", repo_type="dataset")
@pytest.fixture @pytest.fixture

View File

@@ -25,7 +25,7 @@ AXOLOTL_ROOT = Path(__file__).parent.parent.parent.parent
@pytest.fixture(scope="session", autouse=True) @pytest.fixture(scope="session", autouse=True)
def download_model(): def download_model():
# download the model # download the model
snapshot_download("TinyLlama/TinyLlama_v1.1") snapshot_download("HuggingFaceTB/SmolLM2-135M")
class TestMultiGPULlama: class TestMultiGPULlama:
@@ -37,7 +37,7 @@ class TestMultiGPULlama:
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM-135M", "base_model": "HuggingFaceTB/SmolLM2-135M",
"sequence_len": 2048, "sequence_len": 2048,
"adapter": "lora", "adapter": "lora",
"lora_r": 8, "lora_r": 8,
@@ -93,7 +93,7 @@ class TestMultiGPULlama:
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM-135M", "base_model": "HuggingFaceTB/SmolLM2-135M",
"sequence_len": 2048, "sequence_len": 2048,
"sample_packing": True, "sample_packing": True,
"eval_sample_packing": False, "eval_sample_packing": False,
@@ -149,8 +149,7 @@ class TestMultiGPULlama:
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "TinyLlama/TinyLlama_v1.1", "base_model": "HuggingFaceTB/SmolLM2-135M",
"tokenizer_type": "LlamaTokenizer",
"sequence_len": 2048, "sequence_len": 2048,
"sample_packing": False, "sample_packing": False,
"eval_sample_packing": False, "eval_sample_packing": False,
@@ -163,12 +162,10 @@ class TestMultiGPULlama:
"lora_target_linear": True, "lora_target_linear": True,
"val_set_size": 0.05, "val_set_size": 0.05,
"special_tokens": { "special_tokens": {
"unk_token": "<unk>", "pad_token": "<|endoftext|>",
"bos_token": "<s>",
"eos_token": "</s>",
}, },
"rl": "dpo", "rl": "dpo",
"chat_template": "llama3", "chat_template": "chatml",
"datasets": [ "datasets": [
{ {
"path": "fozziethebeat/alpaca_messages_2k_dpo_test", "path": "fozziethebeat/alpaca_messages_2k_dpo_test",
@@ -221,7 +218,7 @@ class TestMultiGPULlama:
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM-135M", "base_model": "HuggingFaceTB/SmolLM2-135M",
"sequence_len": 2048, "sequence_len": 2048,
"sample_packing": False, "sample_packing": False,
"eval_sample_packing": False, "eval_sample_packing": False,
@@ -294,7 +291,7 @@ class TestMultiGPULlama:
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM-135M", "base_model": "HuggingFaceTB/SmolLM2-135M",
"sequence_len": 2048, "sequence_len": 2048,
"val_set_size": 0.01, "val_set_size": 0.01,
"special_tokens": { "special_tokens": {
@@ -359,7 +356,7 @@ class TestMultiGPULlama:
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM-135M", "base_model": "HuggingFaceTB/SmolLM2-135M",
"sample_packing": True, "sample_packing": True,
"pad_to_sequence_len": True, "pad_to_sequence_len": True,
"sequence_len": 2048, "sequence_len": 2048,
@@ -422,8 +419,7 @@ class TestMultiGPULlama:
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "axolotl-ai-co/TinyLlama_v1.1-bnb-nf4-bf16", "base_model": "axolotl-ai-co/SmolLM2-135M-bnb-nf4-bf16",
"tokenizer_type": "AutoTokenizer",
"adapter": "qlora", "adapter": "qlora",
"mean_resizing_embeddings": True, "mean_resizing_embeddings": True,
"load_in_4bit": True, "load_in_4bit": True,
@@ -431,17 +427,17 @@ class TestMultiGPULlama:
"lora_alpha": 16, "lora_alpha": 16,
"lora_dropout": 0.05, "lora_dropout": 0.05,
"lora_target_linear": True, "lora_target_linear": True,
"lora_modules_to_save": [ # "lora_modules_to_save": [
"embed_tokens", # "embed_tokens",
"lm_head", # "lm_head",
], # ],
"sample_packing": True, "sample_packing": True,
"eval_sample_packing": False, "eval_sample_packing": False,
"pad_to_sequence_len": True, "pad_to_sequence_len": True,
"sequence_len": 2048, "sequence_len": 2048,
"val_set_size": 0.05, "val_set_size": 0.05,
"special_tokens": { "special_tokens": {
"pad_token": "</s>", "pad_token": "<|endoftext|>",
}, },
"datasets": [ "datasets": [
{ {
@@ -503,7 +499,7 @@ class TestMultiGPULlama:
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM-135M", "base_model": "HuggingFaceTB/SmolLM2-135M",
"sample_packing": True, "sample_packing": True,
"pad_to_sequence_len": True, "pad_to_sequence_len": True,
"sequence_len": 2048, "sequence_len": 2048,
@@ -553,7 +549,7 @@ class TestMultiGPULlama:
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM-135M", "base_model": "HuggingFaceTB/SmolLM2-135M",
"load_in_4bit": True, "load_in_4bit": True,
"adapter": "qlora", "adapter": "qlora",
"lora_r": 8, "lora_r": 8,

View File

@@ -66,6 +66,8 @@ class TestFAXentropyLlama(unittest.TestCase):
}, },
], ],
"num_epochs": 1, "num_epochs": 1,
"max_steps": 10,
"save_steps": 10,
"micro_batch_size": 8, "micro_batch_size": 8,
"gradient_accumulation_steps": 1, "gradient_accumulation_steps": 1,
"output_dir": temp_dir, "output_dir": temp_dir,

View File

@@ -56,6 +56,8 @@ class TestLoraLlama(unittest.TestCase):
}, },
], ],
"num_epochs": 2, "num_epochs": 2,
"max_steps": 20,
"save_steps": 10,
"micro_batch_size": 8, "micro_batch_size": 8,
"gradient_accumulation_steps": 1, "gradient_accumulation_steps": 1,
"output_dir": temp_dir, "output_dir": temp_dir,
@@ -109,6 +111,7 @@ class TestLoraLlama(unittest.TestCase):
}, },
], ],
"num_epochs": 2, "num_epochs": 2,
"max_steps": 20,
"save_steps": 0.5, "save_steps": 0.5,
"micro_batch_size": 8, "micro_batch_size": 8,
"gradient_accumulation_steps": 1, "gradient_accumulation_steps": 1,

View File

@@ -113,7 +113,7 @@ class TestCustomOptimizers(unittest.TestCase):
def test_fft_schedule_free_adamw(self, temp_dir): def test_fft_schedule_free_adamw(self, temp_dir):
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM-135M", "base_model": "HuggingFaceTB/SmolLM2-135M",
"sequence_len": 1024, "sequence_len": 1024,
"val_set_size": 0.1, "val_set_size": 0.1,
"special_tokens": { "special_tokens": {

View File

@@ -31,7 +31,7 @@ class TestPackedLlama(unittest.TestCase):
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM-135M", "base_model": "HuggingFaceTB/SmolLM2-135M",
"sequence_len": 1024, "sequence_len": 1024,
"sample_packing": True, "sample_packing": True,
"flash_attention": True, "flash_attention": True,