Compare commits

..

1 Commits

Author SHA1 Message Date
NanoCode012
10d18e6c97 fix(test): replace jackfram llama with smollm 2025-02-28 16:40:49 +07:00
17 changed files with 26 additions and 60 deletions

View File

@@ -78,9 +78,6 @@ tf32: true # require >=ampere
bfloat16: true # require >=ampere
float16: true
# Use Tensor parallel
tensor_parallel: true # require multi-gGPU
# Limit the memory for all available GPUs to this amount (if an integer, expressed in gigabytes); default: unset
gpu_memory_limit: 20GiB
# Do the LoRA/PEFT loading on CPU -- this is required if the base model is so large it takes up most or all of the available GPU VRAM, e.g. during a model and LoRA merge

View File

@@ -703,9 +703,6 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
"accelerator_config"
] = self.cfg.accelerator_config
if self.cfg.tensor_parallel:
training_arguments_kwargs["tp_size"] = torch.cuda.device_count()
if self.cfg.kd_ce_alpha is not None:
training_arguments_kwargs["kd_ce_alpha"] = self.cfg.kd_ce_alpha
if self.cfg.kd_alpha is not None:

View File

@@ -748,8 +748,6 @@ class AxolotlInputConfig(
local_rank: Optional[int] = None
ddp: Optional[bool] = None
tensor_parallel: Optional[bool] = None
seed: Optional[int] = None
ddp_timeout: Optional[int] = None
ddp_bucket_cap_mb: Optional[int] = None
@@ -1373,13 +1371,6 @@ class AxolotlInputConfig(
)
return data
@model_validator(mode="before")
@classmethod
def check_fsdp_tp(cls, data):
if data.get("fsdp") and data.get("tensor_parallel"):
raise ValueError("FSDP with tensor parallelism is not supported yet.")
return data
@model_validator(mode="after")
def check_fft_possible_bad_config(self):
if (

View File

@@ -762,9 +762,6 @@ class ModelLoader:
return hf_ds_cfg
skip_move_to_device = False
if self.cfg.tensor_parallel:
del self.model_kwargs["device_map"]
if ( # pylint: disable=condition-evals-to-constant)
(self.cfg.fsdp and self.cfg.fsdp_config.fsdp_cpu_ram_efficient_loading)
and not qlora_fsdp

View File

@@ -547,7 +547,6 @@ def prepare_optim_env(cfg):
if not check_cuda_p2p_ib_support():
if os.getenv("NCCL_P2P_DISABLE") is None:
os.environ["NCCL_P2P_DISABLE"] = "1"
if cfg.fsdp:
setup_fsdp_envs(cfg)
elif cfg.deepspeed:

View File

@@ -47,9 +47,9 @@ def download_smollm2_135m_model():
@pytest.fixture(scope="session", autouse=True)
def download_llama_68m_random_model():
def download_smollm2_135m_instruct_model():
# download the model
snapshot_download_w_retry("JackFram/llama-68m")
snapshot_download_w_retry("HuggingFaceTB/SmolLM2-135M-Instruct")
@pytest.fixture(scope="session", autouse=True)

View File

@@ -28,7 +28,7 @@ class Test4dMultipackLlama(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"flash_attention": False,
"sdp_attention": True,
"sample_packing": True,
@@ -72,7 +72,7 @@ class Test4dMultipackLlama(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"flash_attention": False,
"sdp_attention": False,
"sample_packing": True,

View File

@@ -32,7 +32,7 @@ class TestFusedLlama(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"flash_attention": True,
"pad_to_sequence_len": True,
"flash_attn_fuse_qkv": True,

View File

@@ -31,8 +31,7 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"sequence_len": 16384,
"sample_packing": False,
"flash_attention": True,
@@ -77,8 +76,7 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"sequence_len": 16384,
"sample_packing": False,
"flash_attention": True,

View File

@@ -31,8 +31,7 @@ class TestLoraLlama(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"sequence_len": 1024,
"sample_packing": True,
"flash_attention": True,
@@ -43,6 +42,7 @@ class TestLoraLlama(unittest.TestCase):
"lora_dropout": 0.05,
"lora_target_linear": True,
"val_set_size": 0.2,
"lora_modules_to_save": ["lm_head", "embed_tokens"],
"special_tokens": {
"unk_token": "<unk>",
"bos_token": "<s>",

View File

@@ -31,8 +31,7 @@ class TestDPOLlamaLora(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"sequence_len": 1024,
"load_in_8bit": True,
"adapter": "lora",
@@ -77,8 +76,7 @@ class TestDPOLlamaLora(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"sequence_len": 1024,
"load_in_8bit": True,
"adapter": "lora",
@@ -124,8 +122,7 @@ class TestDPOLlamaLora(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"sequence_len": 1024,
"load_in_8bit": True,
"adapter": "lora",
@@ -172,8 +169,7 @@ class TestDPOLlamaLora(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"sequence_len": 1024,
"load_in_8bit": True,
"adapter": "lora",
@@ -218,8 +214,7 @@ class TestDPOLlamaLora(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"sequence_len": 1024,
"load_in_8bit": True,
"adapter": "lora",
@@ -264,8 +259,7 @@ class TestDPOLlamaLora(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"sequence_len": 1024,
"load_in_8bit": True,
"adapter": "lora",
@@ -314,8 +308,7 @@ class TestDPOLlamaLora(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"sequence_len": 1024,
"load_in_8bit": True,
"adapter": "lora",

View File

@@ -26,8 +26,7 @@ class TestLlama:
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"trust_remote_code": True,
"sequence_len": 512,
"val_set_size": 0.1,

View File

@@ -26,9 +26,8 @@ class TestLoadModelUtils:
# load config
self.cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"tokenizer_config": "JackFram/llama-68m",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"tokenizer_config": "HuggingFaceTB/SmolLM2-135M-Instruct",
"sequence_len": 1024,
"load_in_8bit": False,
"adapter": "lora",

View File

@@ -28,8 +28,7 @@ class TestLoraLlama(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"sequence_len": 1024,
"load_in_8bit": True,
"adapter": "lora",
@@ -37,6 +36,7 @@ class TestLoraLlama(unittest.TestCase):
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_target_linear": True,
"lora_modules_to_save": ["lm_head", "embed_tokens"],
"val_set_size": 0.1,
"special_tokens": {
"unk_token": "<unk>",

View File

@@ -28,8 +28,7 @@ class TestCustomOptimizers(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"sequence_len": 1024,
"load_in_8bit": True,
"adapter": "lora",
@@ -74,8 +73,7 @@ class TestCustomOptimizers(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"sequence_len": 1024,
"load_in_8bit": True,
"adapter": "lora",

View File

@@ -16,9 +16,8 @@ class NormalizeConfigTestCase(unittest.TestCase):
def _get_base_cfg(self):
return DictDefault(
{
"base_model": "JackFram/llama-68m",
"base_model_config": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"base_model_config": "HuggingFaceTB/SmolLM2-135M-Instruct",
"num_epochs": 1,
"micro_batch_size": 1,
"gradient_accumulation_steps": 1,

View File

@@ -18,9 +18,8 @@ class TestModelsUtils:
# load config
self.cfg = DictDefault( # pylint: disable=attribute-defined-outside-init
{
"base_model": "JackFram/llama-68m",
"base_model": "HuggingFaceTB/SmolLM2-135M-Instruct",
"model_type": "LlamaForCausalLM",
"tokenizer_type": "LlamaTokenizer",
"load_in_8bit": True,
"load_in_4bit": False,
"adapter": "lora",