llama4 support (#2493)

* llama4 support

* add xet support [skip ci]

* be flexible on transformers version and skip test on version

* don't use deepspeed for the fix_untrained_tokens test

* reordering to trigger torch 2.6.0 tests first

* slightly smaller train set

* use 4.51.0 for now

* remove stray print, add llama4 chat template to schema, bump peft to 0.15.1

* patches to make llama4 performant

* add preliminary fp8 support
This commit is contained in:
Wing Lian
2025-04-07 10:49:15 -04:00
committed by GitHub
parent 5f4af3665d
commit 8bbad21bfd
17 changed files with 409 additions and 34 deletions

View File

@@ -31,7 +31,7 @@ class TestMultiGPURay:
cfg = DictDefault(
{
"base_model": "HuggingFaceTB/SmolLM2-135M",
"sequence_len": 2048,
"sequence_len": 1024,
"adapter": "lora",
"lora_r": 8,
"lora_alpha": 16,
@@ -94,8 +94,8 @@ class TestMultiGPURay:
"base_model": "HuggingFaceTB/SmolLM2-135M",
"sample_packing": True,
"pad_to_sequence_len": True,
"sequence_len": 2048,
"val_set_size": 0.05,
"sequence_len": 1024,
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",
},