llama4 support

This commit is contained in:
Wing Lian
2025-04-05 17:47:16 -04:00
parent 5f4af3665d
commit 98827e8f3b
3 changed files with 11 additions and 9 deletions

View File

@@ -56,7 +56,7 @@ class TestMultiGPULlama:
],
"num_epochs": 1,
"max_steps": 2,
"micro_batch_size": 4,
"micro_batch_size": 1,
"gradient_accumulation_steps": 4,
# "gradient_checkpointing": True,
"output_dir": temp_dir,
@@ -193,7 +193,7 @@ class TestMultiGPULlama:
],
"num_epochs": 1,
"max_steps": 2,
"micro_batch_size": 4,
"micro_batch_size": 2,
"gradient_accumulation_steps": 4,
# "gradient_checkpointing": True,
"output_dir": temp_dir,
@@ -390,7 +390,7 @@ class TestMultiGPULlama:
"base_model": "HuggingFaceTB/SmolLM2-135M",
"sample_packing": True,
"pad_to_sequence_len": True,
"sequence_len": 2048,
"sequence_len": 1024,
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",
@@ -403,7 +403,7 @@ class TestMultiGPULlama:
],
"num_epochs": 1,
"max_steps": 2,
"micro_batch_size": 4,
"micro_batch_size": 2,
"gradient_accumulation_steps": 2,
# "gradient_checkpointing": True,
"output_dir": temp_dir,
@@ -551,7 +551,7 @@ class TestMultiGPULlama:
"sample_packing": True,
"eval_sample_packing": False,
"pad_to_sequence_len": True,
"sequence_len": 2048,
"sequence_len": 1024,
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",
@@ -565,7 +565,7 @@ class TestMultiGPULlama:
],
"num_epochs": 1,
"max_steps": 2,
"micro_batch_size": 4,
"micro_batch_size": 2,
"gradient_accumulation_steps": 2,
# "gradient_checkpointing": True,
"output_dir": temp_dir,
@@ -651,7 +651,7 @@ class TestMultiGPULlama:
"base_model": "HuggingFaceTB/SmolLM2-135M",
"sample_packing": True,
"pad_to_sequence_len": True,
"sequence_len": 2048,
"sequence_len": 1024,
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",
@@ -724,7 +724,7 @@ class TestMultiGPULlama:
"base_model": "HuggingFaceTB/SmolLM2-135M",
"sample_packing": True,
"pad_to_sequence_len": True,
"sequence_len": 2048,
"sequence_len": 1024,
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",
@@ -797,7 +797,7 @@ class TestMultiGPULlama:
"base_model": "HuggingFaceTB/SmolLM2-135M",
"sample_packing": True,
"pad_to_sequence_len": True,
"sequence_len": 2048,
"sequence_len": 1024,
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",