Update dependencies and show slow tests in CI (#2492)
* use latest torchao, gradio, schedule-free * get info on slow tests * speed up tests by avoiding gradient checkpointing and reducing eval size
This commit is contained in:
@@ -2,5 +2,5 @@
|
|||||||
set -e
|
set -e
|
||||||
|
|
||||||
# only run one test at a time so as not to OOM the GPU
|
# only run one test at a time so as not to OOM the GPU
|
||||||
pytest -v -n2 /workspace/axolotl/tests/e2e/multigpu/ --ignore=/workspace/axolotl/tests/e2e/multigpu/solo/
|
pytest -v --durations=10 -n2 /workspace/axolotl/tests/e2e/multigpu/ --ignore=/workspace/axolotl/tests/e2e/multigpu/solo/
|
||||||
pytest -v -n1 /workspace/axolotl/tests/e2e/multigpu/solo/
|
pytest -v --durations=10 -n1 /workspace/axolotl/tests/e2e/multigpu/solo/
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ trl==0.16.0
|
|||||||
optimum==1.16.2
|
optimum==1.16.2
|
||||||
hf_transfer
|
hf_transfer
|
||||||
sentencepiece
|
sentencepiece
|
||||||
gradio==3.50.2
|
gradio==5.23.3
|
||||||
|
|
||||||
modal==0.70.5
|
modal==0.70.5
|
||||||
pydantic==2.10.6
|
pydantic==2.10.6
|
||||||
@@ -59,8 +59,8 @@ langdetect==1.0.9
|
|||||||
immutabledict==4.2.0
|
immutabledict==4.2.0
|
||||||
antlr4-python3-runtime==4.13.2
|
antlr4-python3-runtime==4.13.2
|
||||||
|
|
||||||
torchao==0.7.0
|
torchao==0.9.0
|
||||||
schedulefree==1.3.0
|
schedulefree==1.4.1
|
||||||
|
|
||||||
axolotl-contribs-lgpl==0.0.6
|
axolotl-contribs-lgpl==0.0.6
|
||||||
axolotl-contribs-mit==0.0.3
|
axolotl-contribs-mit==0.0.3
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ class TestMultiGPULlama:
|
|||||||
"lora_alpha": 16,
|
"lora_alpha": 16,
|
||||||
"lora_dropout": 0.05,
|
"lora_dropout": 0.05,
|
||||||
"lora_target_linear": True,
|
"lora_target_linear": True,
|
||||||
"val_set_size": 0.05,
|
"val_set_size": 0.01,
|
||||||
"special_tokens": {
|
"special_tokens": {
|
||||||
"pad_token": "<|endoftext|>",
|
"pad_token": "<|endoftext|>",
|
||||||
},
|
},
|
||||||
@@ -58,7 +58,7 @@ class TestMultiGPULlama:
|
|||||||
"max_steps": 2,
|
"max_steps": 2,
|
||||||
"micro_batch_size": 4,
|
"micro_batch_size": 4,
|
||||||
"gradient_accumulation_steps": 4,
|
"gradient_accumulation_steps": 4,
|
||||||
"gradient_checkpointing": True,
|
# "gradient_checkpointing": True,
|
||||||
"output_dir": temp_dir,
|
"output_dir": temp_dir,
|
||||||
"learning_rate": 0.00001,
|
"learning_rate": 0.00001,
|
||||||
"optimizer": "adamw_8bit",
|
"optimizer": "adamw_8bit",
|
||||||
@@ -108,7 +108,7 @@ class TestMultiGPULlama:
|
|||||||
"lora_alpha": 16,
|
"lora_alpha": 16,
|
||||||
"lora_dropout": 0.05,
|
"lora_dropout": 0.05,
|
||||||
"lora_target_linear": True,
|
"lora_target_linear": True,
|
||||||
"val_set_size": 0.05,
|
"val_set_size": 0.01,
|
||||||
"special_tokens": {
|
"special_tokens": {
|
||||||
"pad_token": "<|endoftext|>",
|
"pad_token": "<|endoftext|>",
|
||||||
},
|
},
|
||||||
@@ -122,7 +122,7 @@ class TestMultiGPULlama:
|
|||||||
"max_steps": 2,
|
"max_steps": 2,
|
||||||
"micro_batch_size": 1,
|
"micro_batch_size": 1,
|
||||||
"gradient_accumulation_steps": gradient_accumulation_steps,
|
"gradient_accumulation_steps": gradient_accumulation_steps,
|
||||||
"gradient_checkpointing": True,
|
# "gradient_checkpointing": True,
|
||||||
"output_dir": temp_dir,
|
"output_dir": temp_dir,
|
||||||
"learning_rate": 0.00001,
|
"learning_rate": 0.00001,
|
||||||
"optimizer": "adamw_8bit",
|
"optimizer": "adamw_8bit",
|
||||||
@@ -169,7 +169,7 @@ class TestMultiGPULlama:
|
|||||||
"lora_alpha": 16,
|
"lora_alpha": 16,
|
||||||
"lora_dropout": 0.05,
|
"lora_dropout": 0.05,
|
||||||
"lora_target_linear": True,
|
"lora_target_linear": True,
|
||||||
"val_set_size": 0.05,
|
"val_set_size": 0.01,
|
||||||
"special_tokens": {
|
"special_tokens": {
|
||||||
"pad_token": "<|endoftext|>",
|
"pad_token": "<|endoftext|>",
|
||||||
},
|
},
|
||||||
@@ -195,7 +195,7 @@ class TestMultiGPULlama:
|
|||||||
"max_steps": 2,
|
"max_steps": 2,
|
||||||
"micro_batch_size": 4,
|
"micro_batch_size": 4,
|
||||||
"gradient_accumulation_steps": 4,
|
"gradient_accumulation_steps": 4,
|
||||||
"gradient_checkpointing": True,
|
# "gradient_checkpointing": True,
|
||||||
"output_dir": temp_dir,
|
"output_dir": temp_dir,
|
||||||
"warmup_steps": 0,
|
"warmup_steps": 0,
|
||||||
"learning_rate": 0.00001,
|
"learning_rate": 0.00001,
|
||||||
@@ -247,7 +247,7 @@ class TestMultiGPULlama:
|
|||||||
"lora_alpha": 16,
|
"lora_alpha": 16,
|
||||||
"lora_dropout": 0.05,
|
"lora_dropout": 0.05,
|
||||||
"lora_target_linear": True,
|
"lora_target_linear": True,
|
||||||
"val_set_size": 0.05,
|
"val_set_size": 0.01,
|
||||||
"special_tokens": {
|
"special_tokens": {
|
||||||
"pad_token": "<|endoftext|>",
|
"pad_token": "<|endoftext|>",
|
||||||
},
|
},
|
||||||
@@ -273,7 +273,7 @@ class TestMultiGPULlama:
|
|||||||
"max_steps": 2,
|
"max_steps": 2,
|
||||||
"micro_batch_size": 2,
|
"micro_batch_size": 2,
|
||||||
"gradient_accumulation_steps": 4,
|
"gradient_accumulation_steps": 4,
|
||||||
"gradient_checkpointing": True,
|
# "gradient_checkpointing": True,
|
||||||
"output_dir": temp_dir,
|
"output_dir": temp_dir,
|
||||||
"warmup_steps": 0,
|
"warmup_steps": 0,
|
||||||
"learning_rate": 0.00001,
|
"learning_rate": 0.00001,
|
||||||
@@ -334,7 +334,7 @@ class TestMultiGPULlama:
|
|||||||
"max_steps": 2,
|
"max_steps": 2,
|
||||||
"micro_batch_size": 2,
|
"micro_batch_size": 2,
|
||||||
"gradient_accumulation_steps": gradient_accumulation_steps,
|
"gradient_accumulation_steps": gradient_accumulation_steps,
|
||||||
"gradient_checkpointing": True,
|
# "gradient_checkpointing": True,
|
||||||
"output_dir": temp_dir,
|
"output_dir": temp_dir,
|
||||||
"learning_rate": 0.00001,
|
"learning_rate": 0.00001,
|
||||||
"optimizer": "adamw_torch_fused",
|
"optimizer": "adamw_torch_fused",
|
||||||
@@ -391,7 +391,7 @@ class TestMultiGPULlama:
|
|||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"pad_to_sequence_len": True,
|
"pad_to_sequence_len": True,
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"val_set_size": 0.05,
|
"val_set_size": 0.01,
|
||||||
"special_tokens": {
|
"special_tokens": {
|
||||||
"pad_token": "<|endoftext|>",
|
"pad_token": "<|endoftext|>",
|
||||||
},
|
},
|
||||||
@@ -405,7 +405,7 @@ class TestMultiGPULlama:
|
|||||||
"max_steps": 2,
|
"max_steps": 2,
|
||||||
"micro_batch_size": 4,
|
"micro_batch_size": 4,
|
||||||
"gradient_accumulation_steps": 2,
|
"gradient_accumulation_steps": 2,
|
||||||
"gradient_checkpointing": True,
|
# "gradient_checkpointing": True,
|
||||||
"output_dir": temp_dir,
|
"output_dir": temp_dir,
|
||||||
"learning_rate": 0.00001,
|
"learning_rate": 0.00001,
|
||||||
"optimizer": "adamw_torch_fused",
|
"optimizer": "adamw_torch_fused",
|
||||||
@@ -470,7 +470,7 @@ class TestMultiGPULlama:
|
|||||||
"eval_sample_packing": False,
|
"eval_sample_packing": False,
|
||||||
"pad_to_sequence_len": True,
|
"pad_to_sequence_len": True,
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"val_set_size": 0.05,
|
"val_set_size": 0.01,
|
||||||
"special_tokens": {
|
"special_tokens": {
|
||||||
"pad_token": "<|endoftext|>",
|
"pad_token": "<|endoftext|>",
|
||||||
},
|
},
|
||||||
@@ -485,7 +485,7 @@ class TestMultiGPULlama:
|
|||||||
"max_steps": 2,
|
"max_steps": 2,
|
||||||
"micro_batch_size": 4,
|
"micro_batch_size": 4,
|
||||||
"gradient_accumulation_steps": 2,
|
"gradient_accumulation_steps": 2,
|
||||||
"gradient_checkpointing": True,
|
# "gradient_checkpointing": True,
|
||||||
"output_dir": temp_dir,
|
"output_dir": temp_dir,
|
||||||
"learning_rate": 0.00001,
|
"learning_rate": 0.00001,
|
||||||
"optimizer": "adamw_torch_fused",
|
"optimizer": "adamw_torch_fused",
|
||||||
@@ -567,7 +567,7 @@ class TestMultiGPULlama:
|
|||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"pad_to_sequence_len": True,
|
"pad_to_sequence_len": True,
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"val_set_size": 0.05,
|
"val_set_size": 0.01,
|
||||||
"special_tokens": {
|
"special_tokens": {
|
||||||
"pad_token": "<|endoftext|>",
|
"pad_token": "<|endoftext|>",
|
||||||
},
|
},
|
||||||
@@ -640,7 +640,7 @@ class TestMultiGPULlama:
|
|||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"pad_to_sequence_len": True,
|
"pad_to_sequence_len": True,
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"val_set_size": 0.05,
|
"val_set_size": 0.01,
|
||||||
"special_tokens": {
|
"special_tokens": {
|
||||||
"pad_token": "<|endoftext|>",
|
"pad_token": "<|endoftext|>",
|
||||||
},
|
},
|
||||||
@@ -713,7 +713,7 @@ class TestMultiGPULlama:
|
|||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"pad_to_sequence_len": True,
|
"pad_to_sequence_len": True,
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"val_set_size": 0.05,
|
"val_set_size": 0.01,
|
||||||
"special_tokens": {
|
"special_tokens": {
|
||||||
"pad_token": "<|endoftext|>",
|
"pad_token": "<|endoftext|>",
|
||||||
},
|
},
|
||||||
@@ -788,7 +788,7 @@ class TestMultiGPULlama:
|
|||||||
"max_steps": 2,
|
"max_steps": 2,
|
||||||
"micro_batch_size": 1,
|
"micro_batch_size": 1,
|
||||||
"gradient_accumulation_steps": 1,
|
"gradient_accumulation_steps": 1,
|
||||||
"gradient_checkpointing": True,
|
# "gradient_checkpointing": True,
|
||||||
"output_dir": temp_dir,
|
"output_dir": temp_dir,
|
||||||
"learning_rate": 0.00001,
|
"learning_rate": 0.00001,
|
||||||
"optimizer": "adamw_torch_fused",
|
"optimizer": "adamw_torch_fused",
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ class TestMultiGPUQwen2:
|
|||||||
"lora_alpha": 16,
|
"lora_alpha": 16,
|
||||||
"lora_dropout": 0.05,
|
"lora_dropout": 0.05,
|
||||||
"lora_target_linear": True,
|
"lora_target_linear": True,
|
||||||
"val_set_size": 0.05,
|
"val_set_size": 0.01,
|
||||||
"datasets": [
|
"datasets": [
|
||||||
{
|
{
|
||||||
"path": "Intel/orca_dpo_pairs",
|
"path": "Intel/orca_dpo_pairs",
|
||||||
@@ -57,7 +57,7 @@ class TestMultiGPUQwen2:
|
|||||||
"flash_attention": True,
|
"flash_attention": True,
|
||||||
"bf16": "auto",
|
"bf16": "auto",
|
||||||
"tf32": True,
|
"tf32": True,
|
||||||
"gradient_checkpointing": True,
|
# "gradient_checkpointing": True,
|
||||||
"gradient_checkpointing_kwargs": {
|
"gradient_checkpointing_kwargs": {
|
||||||
"use_reentrant": False,
|
"use_reentrant": False,
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user