Phi2 multipack (#1173)

* phi2 multipack

* update validation and examples for phi

* more updates to phi examples

* make sure to use the correct collator for phi multipack

* phi needs attention mask now for multipack

* if the special token already exists in the tokenizer, don't require in lora modules to save

* fix qlora yml for phi, fix phi test validation

* test qlora too

* make sure flash attention is enabled for the test

* don't use remote code for phi anymore

* reduce sequence len for sample packing phi
This commit is contained in:
Wing Lian
2024-01-23 12:54:36 -05:00
committed by GitHub
parent b715cd549a
commit 814aee6603
18 changed files with 201 additions and 2269 deletions

View File

@@ -0,0 +1,123 @@
"""
E2E tests for lora llama
"""
import logging
import os
import unittest
from pathlib import Path
from axolotl.cli import load_datasets
from axolotl.common.cli import TrainerCliArgs
from axolotl.train import train
from axolotl.utils.config import normalize_config
from axolotl.utils.dict import DictDefault
from ..utils import with_temp_dir
LOG = logging.getLogger("axolotl.tests.e2e")
os.environ["WANDB_DISABLED"] = "true"
class TestPhiMultipack(unittest.TestCase):
"""
Test case for Phi2 models
"""
@with_temp_dir
def test_ft_packed(self, temp_dir):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "microsoft/phi-1_5",
"model_type": "PhiForCausalLM",
"tokenizer_type": "AutoTokenizer",
"sequence_len": 1024,
"sample_packing": True,
"flash_attention": True,
"pad_to_sequence_len": True,
"load_in_8bit": False,
"adapter": None,
"val_set_size": 0.1,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
"datasets": [
{
"path": "mhenrichsen/alpaca_2k_test",
"type": "alpaca",
},
],
"dataset_shard_num": 10,
"dataset_shard_idx": 0,
"num_epochs": 1,
"micro_batch_size": 1,
"gradient_accumulation_steps": 1,
"output_dir": temp_dir,
"learning_rate": 0.00001,
"optimizer": "adamw_bnb_8bit",
"lr_scheduler": "cosine",
"max_steps": 20,
"eval_steps": 10,
"save_steps": 10,
"bf16": "auto",
}
)
normalize_config(cfg)
cli_args = TrainerCliArgs()
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
assert (Path(temp_dir) / "pytorch_model.bin").exists()
@with_temp_dir
def test_qlora_packed(self, temp_dir):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "microsoft/phi-1_5",
"model_type": "PhiForCausalLM",
"tokenizer_type": "AutoTokenizer",
"sequence_len": 1024,
"sample_packing": True,
"flash_attention": True,
"pad_to_sequence_len": True,
"load_in_8bit": False,
"adapter": "qlora",
"lora_r": 64,
"lora_alpha": 32,
"lora_dropout": 0.05,
"lora_target_linear": True,
"val_set_size": 0.1,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
"datasets": [
{
"path": "mhenrichsen/alpaca_2k_test",
"type": "alpaca",
},
],
"dataset_shard_num": 10,
"dataset_shard_idx": 0,
"num_epochs": 1,
"micro_batch_size": 1,
"gradient_accumulation_steps": 1,
"output_dir": temp_dir,
"learning_rate": 0.00001,
"optimizer": "adamw_bnb_8bit",
"lr_scheduler": "cosine",
"max_steps": 20,
"eval_steps": 10,
"save_steps": 10,
"bf16": "auto",
}
)
normalize_config(cfg)
cli_args = TrainerCliArgs()
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
assert (Path(temp_dir) / "adapter_model.bin").exists()

View File

@@ -7,9 +7,6 @@ import os
import unittest
from pathlib import Path
import pytest
from transformers.utils import is_torch_bf16_gpu_available
from axolotl.cli import load_datasets
from axolotl.common.cli import TrainerCliArgs
from axolotl.train import train
@@ -27,17 +24,15 @@ class TestPhi(unittest.TestCase):
Test case for Phi2 models
"""
@pytest.mark.skip(reason="fixme later")
@with_temp_dir
def test_phi2_ft(self, temp_dir):
def test_phi_ft(self, temp_dir):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "microsoft/phi-2",
"trust_remote_code": True,
"base_model": "microsoft/phi-1_5",
"model_type": "AutoModelForCausalLM",
"tokenizer_type": "AutoTokenizer",
"sequence_len": 512,
"sequence_len": 2048,
"sample_packing": False,
"load_in_8bit": False,
"adapter": None,
@@ -64,13 +59,9 @@ class TestPhi(unittest.TestCase):
"max_steps": 10,
"save_steps": 10,
"eval_steps": 10,
"save_safetensors": True,
"bf16": "auto",
}
)
if is_torch_bf16_gpu_available():
cfg.bf16 = True
else:
cfg.fp16 = True
normalize_config(cfg)
cli_args = TrainerCliArgs()
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
@@ -78,25 +69,24 @@ class TestPhi(unittest.TestCase):
train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
assert (Path(temp_dir) / "pytorch_model.bin").exists()
@pytest.mark.skip(reason="multipack no longer supported atm")
@with_temp_dir
def test_ft_packed(self, temp_dir):
def test_phi_qlora(self, temp_dir):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "microsoft/phi-2",
"trust_remote_code": True,
"model_type": "PhiForCausalLM",
"base_model": "microsoft/phi-1_5",
"model_type": "AutoModelForCausalLM",
"tokenizer_type": "AutoTokenizer",
"sequence_len": 512,
"sample_packing": True,
"sequence_len": 2048,
"sample_packing": False,
"load_in_8bit": False,
"adapter": None,
"adapter": "qlora",
"lora_r": 64,
"lora_alpha": 32,
"lora_dropout": 0.05,
"lora_target_linear": True,
"val_set_size": 0.1,
"special_tokens": {
"unk_token": "<|endoftext|>",
"bos_token": "<|endoftext|>",
"eos_token": "<|endoftext|>",
"pad_token": "<|endoftext|>",
},
"datasets": [
@@ -112,18 +102,18 @@ class TestPhi(unittest.TestCase):
"gradient_accumulation_steps": 1,
"output_dir": temp_dir,
"learning_rate": 0.00001,
"optimizer": "adamw_bnb_8bit",
"optimizer": "paged_adamw_8bit",
"lr_scheduler": "cosine",
"flash_attention": True,
"max_steps": 10,
"save_steps": 10,
"eval_steps": 10,
"bf16": "auto",
}
)
if is_torch_bf16_gpu_available():
cfg.bf16 = True
else:
cfg.fp16 = True
normalize_config(cfg)
cli_args = TrainerCliArgs()
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
assert (Path(temp_dir) / "pytorch_model.bin").exists()
assert (Path(temp_dir) / "adapter_model.bin").exists()

View File

@@ -742,11 +742,11 @@ class ValidationCheckModelConfig(BaseValidation):
check_model_config(cfg, model_config)
def test_phi2_add_tokens_adapter(self):
def test_phi_add_tokens_adapter(self):
cfg = DictDefault(
{"adapter": "qlora", "load_in_4bit": True, "tokens": ["<|imstart|>"]}
)
model_config = DictDefault({"model_type": "phi-msft"})
model_config = DictDefault({"model_type": "phi"})
with pytest.raises(
ValueError,
@@ -759,7 +759,7 @@ class ValidationCheckModelConfig(BaseValidation):
"adapter": "qlora",
"load_in_4bit": True,
"tokens": ["<|imstart|>"],
"lora_modules_to_save": ["embed_tokens", "lm_head"],
"lora_modules_to_save": ["embd.wte", "lm_head.linear"],
}
)
@@ -774,7 +774,7 @@ class ValidationCheckModelConfig(BaseValidation):
"adapter": "qlora",
"load_in_4bit": True,
"tokens": ["<|imstart|>"],
"lora_modules_to_save": ["embd.wte", "lm_head.linear"],
"lora_modules_to_save": ["embed_tokens", "lm_head"],
}
)