Compare commits
1 Commits
squash_pos
...
fix/replac
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
10d18e6c97 |
@@ -47,9 +47,9 @@ def download_smollm2_135m_model():
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def download_llama_68m_random_model():
|
||||
def download_smollm2_135m_instruct_model():
|
||||
# download the model
|
||||
snapshot_download_w_retry("JackFram/llama-68m")
|
||||
snapshot_download_w_retry("HuggingFaceTB/SmolLM2-135M-Instruct")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
|
||||
@@ -28,7 +28,7 @@ class Test4dMultipackLlama(unittest.TestCase):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"flash_attention": False,
|
||||
"sdp_attention": True,
|
||||
"sample_packing": True,
|
||||
@@ -72,7 +72,7 @@ class Test4dMultipackLlama(unittest.TestCase):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"flash_attention": False,
|
||||
"sdp_attention": False,
|
||||
"sample_packing": True,
|
||||
|
||||
@@ -32,7 +32,7 @@ class TestFusedLlama(unittest.TestCase):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"flash_attention": True,
|
||||
"pad_to_sequence_len": True,
|
||||
"flash_attn_fuse_qkv": True,
|
||||
|
||||
@@ -31,8 +31,7 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"sequence_len": 16384,
|
||||
"sample_packing": False,
|
||||
"flash_attention": True,
|
||||
@@ -77,8 +76,7 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"sequence_len": 16384,
|
||||
"sample_packing": False,
|
||||
"flash_attention": True,
|
||||
|
||||
@@ -31,8 +31,7 @@ class TestLoraLlama(unittest.TestCase):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"sequence_len": 1024,
|
||||
"sample_packing": True,
|
||||
"flash_attention": True,
|
||||
@@ -43,6 +42,7 @@ class TestLoraLlama(unittest.TestCase):
|
||||
"lora_dropout": 0.05,
|
||||
"lora_target_linear": True,
|
||||
"val_set_size": 0.2,
|
||||
"lora_modules_to_save": ["lm_head", "embed_tokens"],
|
||||
"special_tokens": {
|
||||
"unk_token": "<unk>",
|
||||
"bos_token": "<s>",
|
||||
|
||||
@@ -31,8 +31,7 @@ class TestDPOLlamaLora(unittest.TestCase):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"sequence_len": 1024,
|
||||
"load_in_8bit": True,
|
||||
"adapter": "lora",
|
||||
@@ -77,8 +76,7 @@ class TestDPOLlamaLora(unittest.TestCase):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"sequence_len": 1024,
|
||||
"load_in_8bit": True,
|
||||
"adapter": "lora",
|
||||
@@ -124,8 +122,7 @@ class TestDPOLlamaLora(unittest.TestCase):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"sequence_len": 1024,
|
||||
"load_in_8bit": True,
|
||||
"adapter": "lora",
|
||||
@@ -172,8 +169,7 @@ class TestDPOLlamaLora(unittest.TestCase):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"sequence_len": 1024,
|
||||
"load_in_8bit": True,
|
||||
"adapter": "lora",
|
||||
@@ -218,8 +214,7 @@ class TestDPOLlamaLora(unittest.TestCase):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"sequence_len": 1024,
|
||||
"load_in_8bit": True,
|
||||
"adapter": "lora",
|
||||
@@ -264,8 +259,7 @@ class TestDPOLlamaLora(unittest.TestCase):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"sequence_len": 1024,
|
||||
"load_in_8bit": True,
|
||||
"adapter": "lora",
|
||||
@@ -314,8 +308,7 @@ class TestDPOLlamaLora(unittest.TestCase):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"sequence_len": 1024,
|
||||
"load_in_8bit": True,
|
||||
"adapter": "lora",
|
||||
|
||||
@@ -26,8 +26,7 @@ class TestLlama:
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"trust_remote_code": True,
|
||||
"sequence_len": 512,
|
||||
"val_set_size": 0.1,
|
||||
|
||||
@@ -26,9 +26,8 @@ class TestLoadModelUtils:
|
||||
# load config
|
||||
self.cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"tokenizer_config": "JackFram/llama-68m",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"tokenizer_config": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"sequence_len": 1024,
|
||||
"load_in_8bit": False,
|
||||
"adapter": "lora",
|
||||
|
||||
@@ -28,8 +28,7 @@ class TestLoraLlama(unittest.TestCase):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"sequence_len": 1024,
|
||||
"load_in_8bit": True,
|
||||
"adapter": "lora",
|
||||
@@ -37,6 +36,7 @@ class TestLoraLlama(unittest.TestCase):
|
||||
"lora_alpha": 16,
|
||||
"lora_dropout": 0.05,
|
||||
"lora_target_linear": True,
|
||||
"lora_modules_to_save": ["lm_head", "embed_tokens"],
|
||||
"val_set_size": 0.1,
|
||||
"special_tokens": {
|
||||
"unk_token": "<unk>",
|
||||
|
||||
@@ -28,8 +28,7 @@ class TestCustomOptimizers(unittest.TestCase):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"sequence_len": 1024,
|
||||
"load_in_8bit": True,
|
||||
"adapter": "lora",
|
||||
@@ -74,8 +73,7 @@ class TestCustomOptimizers(unittest.TestCase):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"sequence_len": 1024,
|
||||
"load_in_8bit": True,
|
||||
"adapter": "lora",
|
||||
|
||||
@@ -16,9 +16,8 @@ class NormalizeConfigTestCase(unittest.TestCase):
|
||||
def _get_base_cfg(self):
|
||||
return DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"base_model_config": "JackFram/llama-68m",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"base_model_config": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"num_epochs": 1,
|
||||
"micro_batch_size": 1,
|
||||
"gradient_accumulation_steps": 1,
|
||||
|
||||
@@ -18,9 +18,8 @@ class TestModelsUtils:
|
||||
# load config
|
||||
self.cfg = DictDefault( # pylint: disable=attribute-defined-outside-init
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
|
||||
"model_type": "LlamaForCausalLM",
|
||||
"tokenizer_type": "LlamaTokenizer",
|
||||
"load_in_8bit": True,
|
||||
"load_in_4bit": False,
|
||||
"adapter": "lora",
|
||||
|
||||
Reference in New Issue
Block a user