Add ruff, remove black, isort, flake8, pylint (#3092)
* black, isort, flake8 -> ruff * remove unused * add back needed import * fix
This commit is contained in:
@@ -12,8 +12,6 @@ from axolotl.utils.dict import DictDefault
|
||||
|
||||
from ..utils import check_model_output_exists
|
||||
|
||||
# pylint: disable=duplicate-code
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def min_cfg(temp_dir):
|
||||
@@ -53,7 +51,6 @@ class TestCutCrossEntropyIntegration:
|
||||
e2e tests for cut_cross_entropy integration with Axolotl
|
||||
"""
|
||||
|
||||
# pylint: disable=redefined-outer-name
|
||||
def test_llama_w_cce(self, min_cfg, temp_dir):
|
||||
cfg = DictDefault(min_cfg)
|
||||
cfg = validate_config(cfg)
|
||||
@@ -69,7 +66,6 @@ class TestCutCrossEntropyIntegration:
|
||||
train(cfg=cfg, dataset_meta=dataset_meta)
|
||||
check_model_output_exists(temp_dir, cfg)
|
||||
|
||||
# pylint: disable=redefined-outer-name
|
||||
def test_qwen2_w_cce(self, temp_dir):
|
||||
cfg = DictDefault(
|
||||
{
|
||||
|
||||
@@ -18,7 +18,7 @@ class FP8IntegrationTestCase:
|
||||
@require_torch_2_7_0
|
||||
def test_fp8_single_gpu_smoke(self, temp_dir):
|
||||
"""Smoke test for single GPU FP8 + torch.compile training"""
|
||||
# pylint: disable=duplicate-code
|
||||
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -53,7 +53,6 @@ class FP8IntegrationTestCase:
|
||||
}
|
||||
)
|
||||
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = validate_config(cfg)
|
||||
normalize_config(cfg)
|
||||
dataset_meta = load_datasets(cfg=cfg)
|
||||
|
||||
@@ -28,85 +28,81 @@ class LogHooksPlugin(BasePlugin):
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
def post_trainer_create(self, cfg, trainer): # pylint: disable=unused-argument
|
||||
def post_trainer_create(self, cfg, trainer):
|
||||
with open(
|
||||
self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
|
||||
) as f:
|
||||
f.write("post_trainer_create\n")
|
||||
|
||||
def pre_model_load(self, cfg): # pylint: disable=unused-argument
|
||||
def pre_model_load(self, cfg):
|
||||
with open(
|
||||
self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
|
||||
) as f:
|
||||
f.write("pre_model_load\n")
|
||||
|
||||
def post_model_build(self, cfg, model): # pylint: disable=unused-argument
|
||||
def post_model_build(self, cfg, model):
|
||||
with open(
|
||||
self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
|
||||
) as f:
|
||||
f.write("post_model_build\n")
|
||||
|
||||
def pre_lora_load(self, cfg, model): # pylint: disable=unused-argument
|
||||
def pre_lora_load(self, cfg, model):
|
||||
with open(
|
||||
self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
|
||||
) as f:
|
||||
f.write("pre_lora_load\n")
|
||||
|
||||
def post_lora_load(self, cfg, model): # pylint: disable=unused-argument
|
||||
def post_lora_load(self, cfg, model):
|
||||
with open(
|
||||
self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
|
||||
) as f:
|
||||
f.write("post_lora_load\n")
|
||||
|
||||
def post_model_load(self, cfg, model): # pylint: disable=unused-argument
|
||||
def post_model_load(self, cfg, model):
|
||||
with open(
|
||||
self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
|
||||
) as f:
|
||||
f.write("post_model_load\n")
|
||||
|
||||
def create_optimizer(self, cfg, trainer): # pylint: disable=unused-argument
|
||||
def create_optimizer(self, cfg, trainer):
|
||||
with open(
|
||||
self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
|
||||
) as f:
|
||||
f.write("create_optimizer\n")
|
||||
|
||||
def get_trainer_cls(self, cfg): # pylint: disable=unused-argument
|
||||
def get_trainer_cls(self, cfg):
|
||||
with open(
|
||||
self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
|
||||
) as f:
|
||||
f.write("get_trainer_cls\n")
|
||||
|
||||
def create_lr_scheduler(
|
||||
self, cfg, trainer, optimizer, num_training_steps
|
||||
): # pylint: disable=unused-argument
|
||||
def create_lr_scheduler(self, cfg, trainer, optimizer, num_training_steps):
|
||||
with open(
|
||||
self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
|
||||
) as f:
|
||||
f.write("create_lr_scheduler\n")
|
||||
|
||||
def add_callbacks_pre_trainer(self, cfg, model): # pylint: disable=unused-argument
|
||||
def add_callbacks_pre_trainer(self, cfg, model):
|
||||
with open(
|
||||
self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
|
||||
) as f:
|
||||
f.write("add_callbacks_pre_trainer\n")
|
||||
return []
|
||||
|
||||
def add_callbacks_post_trainer(
|
||||
self, cfg, trainer
|
||||
): # pylint: disable=unused-argument
|
||||
def add_callbacks_post_trainer(self, cfg, trainer):
|
||||
with open(
|
||||
self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
|
||||
) as f:
|
||||
f.write("add_callbacks_post_trainer\n")
|
||||
return []
|
||||
|
||||
def post_train(self, cfg, model): # pylint: disable=unused-argument
|
||||
def post_train(self, cfg, model):
|
||||
with open(
|
||||
self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
|
||||
) as f:
|
||||
f.write("post_train\n")
|
||||
|
||||
def post_train_unload(self, cfg): # pylint: disable=unused-argument
|
||||
def post_train_unload(self, cfg):
|
||||
with open(
|
||||
self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
|
||||
) as f:
|
||||
@@ -119,7 +115,6 @@ class TestPluginHooks:
|
||||
"""
|
||||
|
||||
def test_plugin_hooks(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -81,7 +81,7 @@ class TestKnowledgeDistillation:
|
||||
@require_torch_2_5_1
|
||||
def test_llama_kd(self, temp_dir, kd_min_cfg):
|
||||
cfg = DictDefault(kd_min_cfg)
|
||||
# pylint: disable=duplicate-code
|
||||
|
||||
# write cfg to yaml file
|
||||
Path(temp_dir).mkdir(parents=True, exist_ok=True)
|
||||
with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout:
|
||||
@@ -123,7 +123,7 @@ class TestKnowledgeDistillation:
|
||||
}
|
||||
| kd_min_cfg
|
||||
)
|
||||
# pylint: disable=duplicate-code
|
||||
|
||||
# write cfg to yaml file
|
||||
Path(temp_dir).mkdir(parents=True, exist_ok=True)
|
||||
with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout:
|
||||
|
||||
@@ -17,7 +17,6 @@ class LigerIntegrationTestCase:
|
||||
|
||||
@require_torch_2_4_1
|
||||
def test_llama_wo_flce(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -53,7 +52,7 @@ class LigerIntegrationTestCase:
|
||||
"save_first_step": False,
|
||||
}
|
||||
)
|
||||
# pylint: disable=duplicate-code
|
||||
|
||||
cfg = validate_config(cfg)
|
||||
prepare_plugins(cfg)
|
||||
normalize_config(cfg)
|
||||
@@ -64,7 +63,6 @@ class LigerIntegrationTestCase:
|
||||
|
||||
@require_torch_2_4_1
|
||||
def test_llama_w_flce(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -100,7 +98,7 @@ class LigerIntegrationTestCase:
|
||||
"save_first_step": False,
|
||||
}
|
||||
)
|
||||
# pylint: disable=duplicate-code
|
||||
|
||||
cfg = validate_config(cfg)
|
||||
prepare_plugins(cfg)
|
||||
normalize_config(cfg)
|
||||
|
||||
@@ -85,6 +85,6 @@ def test_geglu_inplace_preservation():
|
||||
|
||||
assert not torch.equal(gate, gate_copy), "Gate should be modified in-place"
|
||||
assert not torch.equal(up, up_copy), "Up should be modified in-place"
|
||||
assert not torch.equal(
|
||||
grad_output, grad_copy
|
||||
), "Grad output should be modified in-place"
|
||||
assert not torch.equal(grad_output, grad_copy), (
|
||||
"Grad output should be modified in-place"
|
||||
)
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
"""Tests for LoRA custom autograd."""
|
||||
|
||||
# pylint: disable=invalid-name,redefined-outer-name
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
from bitsandbytes.functional import QuantState
|
||||
@@ -333,7 +331,7 @@ def test_lora_qkv(sample_tensors):
|
||||
X.requires_grad = True
|
||||
|
||||
# Test without LoRA adapters
|
||||
# pylint: disable=duplicate-code
|
||||
|
||||
Q1, K1, V1 = LoRA_QKV.apply(
|
||||
X,
|
||||
q_weight,
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
"""Tests for quantization utility functions."""
|
||||
|
||||
# pylint: disable=invalid-name
|
||||
|
||||
import torch
|
||||
from bitsandbytes.functional import QuantState
|
||||
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
"""Tests for SwiGLU activation function Triton kernels."""
|
||||
|
||||
# pylint: disable=duplicate-code
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
@@ -74,6 +72,6 @@ def test_swiglu_inplace_preservation():
|
||||
|
||||
assert not torch.equal(gate, gate_copy), "Gate should be modified in-place"
|
||||
assert not torch.equal(up, up_copy), "Up should be modified in-place"
|
||||
assert not torch.equal(
|
||||
grad_output, grad_copy
|
||||
), "Grad output should be modified in-place"
|
||||
assert not torch.equal(grad_output, grad_copy), (
|
||||
"Grad output should be modified in-place"
|
||||
)
|
||||
|
||||
@@ -31,7 +31,6 @@ class TestPackedFlex:
|
||||
|
||||
@require_torch_2_6_0
|
||||
def test_loss_llama(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -80,7 +80,7 @@ def start_vllm(
|
||||
cmd_env = env.copy()
|
||||
cmd_env.update({"VLLM_LOGGING_CONFIG_PATH": vllm_logging_json})
|
||||
# start `trl vllm-serve` command in the background and capture the process id
|
||||
process = subprocess.Popen( # pylint: disable=consider-using-with
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
env=cmd_env,
|
||||
stdout=subprocess.DEVNULL if quiet else subprocess.PIPE,
|
||||
|
||||
@@ -21,7 +21,6 @@ class TestMultiGPUEval:
|
||||
"""
|
||||
|
||||
def test_eval_sample_packing(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -93,7 +92,6 @@ class TestMultiGPUEval:
|
||||
check_tensorboard(temp_dir + "/runs", "eval/loss", 2.5, "Eval Loss is too high")
|
||||
|
||||
def test_eval(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
"""Test module for FP8 mixed precision with FSDP2 multi-GPU functionality."""
|
||||
|
||||
# pylint: disable=duplicate-code
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
@@ -28,9 +26,9 @@ def verify_fp8_training_success(temp_dir):
|
||||
assert len(model_files) > 0, "No model files found - training may have failed"
|
||||
|
||||
checkpoint_files = list(output_path.glob("checkpoint-*"))
|
||||
assert (
|
||||
len(checkpoint_files) > 0
|
||||
), "No checkpoint files found - training may have failed"
|
||||
assert len(checkpoint_files) > 0, (
|
||||
"No checkpoint files found - training may have failed"
|
||||
)
|
||||
|
||||
tb_log_path = most_recent_subdir(temp_dir + "/runs")
|
||||
if tb_log_path:
|
||||
@@ -42,9 +40,9 @@ def verify_fp8_training_success(temp_dir):
|
||||
train_loss_df = df[df.tag == "train/train_loss"]
|
||||
if len(train_loss_df) > 0:
|
||||
final_loss = train_loss_df.value.values[-1]
|
||||
assert not torch.isnan(
|
||||
torch.tensor(final_loss)
|
||||
), f"Training loss is NaN: {final_loss}"
|
||||
assert not torch.isnan(torch.tensor(final_loss)), (
|
||||
f"Training loss is NaN: {final_loss}"
|
||||
)
|
||||
|
||||
|
||||
class TestFP8FSDP2:
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
"""Test module for FSDP1 multi-GPU functionality."""
|
||||
|
||||
# pylint: disable=duplicate-code
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
@@ -29,9 +27,9 @@ def verify_training_success(temp_dir):
|
||||
assert len(model_files) > 0, "No model files found - training may have failed"
|
||||
|
||||
checkpoint_files = list(output_path.glob("checkpoint-*"))
|
||||
assert (
|
||||
len(checkpoint_files) > 0
|
||||
), "No checkpoint files found - training may have failed"
|
||||
assert len(checkpoint_files) > 0, (
|
||||
"No checkpoint files found - training may have failed"
|
||||
)
|
||||
|
||||
tb_log_path = most_recent_subdir(temp_dir + "/runs")
|
||||
if tb_log_path:
|
||||
@@ -43,9 +41,9 @@ def verify_training_success(temp_dir):
|
||||
train_loss_df = df[df.tag == "train/train_loss"]
|
||||
if len(train_loss_df) > 0:
|
||||
final_loss = train_loss_df.value.values[-1]
|
||||
assert not torch.isnan(
|
||||
torch.tensor(final_loss)
|
||||
), f"Training loss is NaN: {final_loss}"
|
||||
assert not torch.isnan(torch.tensor(final_loss)), (
|
||||
f"Training loss is NaN: {final_loss}"
|
||||
)
|
||||
|
||||
|
||||
class TestFSDP1:
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
"""Test module for FSDP2 multi-GPU functionality."""
|
||||
|
||||
# pylint: disable=duplicate-code
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
@@ -29,9 +27,9 @@ def verify_training_success(temp_dir):
|
||||
assert len(model_files) > 0, "No model files found - training may have failed"
|
||||
|
||||
checkpoint_files = list(output_path.glob("checkpoint-*"))
|
||||
assert (
|
||||
len(checkpoint_files) > 0
|
||||
), "No checkpoint files found - training may have failed"
|
||||
assert len(checkpoint_files) > 0, (
|
||||
"No checkpoint files found - training may have failed"
|
||||
)
|
||||
|
||||
tb_log_path = most_recent_subdir(temp_dir + "/runs")
|
||||
if tb_log_path:
|
||||
@@ -43,9 +41,9 @@ def verify_training_success(temp_dir):
|
||||
train_loss_df = df[df.tag == "train/train_loss"]
|
||||
if len(train_loss_df) > 0:
|
||||
final_loss = train_loss_df.value.values[-1]
|
||||
assert not torch.isnan(
|
||||
torch.tensor(final_loss)
|
||||
), f"Training loss is NaN: {final_loss}"
|
||||
assert not torch.isnan(torch.tensor(final_loss)), (
|
||||
f"Training loss is NaN: {final_loss}"
|
||||
)
|
||||
|
||||
|
||||
class TestFSDP2:
|
||||
|
||||
@@ -29,7 +29,6 @@ class TestMultiGPUGemma3:
|
||||
"""
|
||||
|
||||
def test_lora_ddp_packed(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "axolotl-mirrors/gemma-3-4b-pt",
|
||||
|
||||
@@ -35,7 +35,6 @@ class TestMultiGPULlama:
|
||||
"""
|
||||
|
||||
def test_lora_ddp(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -99,7 +98,6 @@ class TestMultiGPULlama:
|
||||
[1, 2],
|
||||
)
|
||||
def test_lora_ddp_packed(self, temp_dir, gradient_accumulation_steps):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -162,7 +160,6 @@ class TestMultiGPULlama:
|
||||
)
|
||||
|
||||
def test_dpo_lora_ddp(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -242,7 +239,6 @@ class TestMultiGPULlama:
|
||||
)
|
||||
|
||||
def test_dpo_qlora_ddp(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -326,7 +322,6 @@ class TestMultiGPULlama:
|
||||
[1, 2],
|
||||
)
|
||||
def test_fsdp(self, temp_dir, gradient_accumulation_steps):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -402,7 +397,6 @@ class TestMultiGPULlama:
|
||||
],
|
||||
)
|
||||
def test_fsdp_packed(self, temp_dir, fsdp_state_dict_type):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -484,7 +478,6 @@ class TestMultiGPULlama:
|
||||
def test_fsdp2_packed(
|
||||
self, temp_dir, attention_backend, fsdp_reshard_after_forward
|
||||
):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -556,7 +549,6 @@ class TestMultiGPULlama:
|
||||
)
|
||||
|
||||
def test_fsdp_qlora_prequant_packed(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "axolotl-ai-co/SmolLM2-135M-bnb-nf4-bf16",
|
||||
@@ -656,7 +648,6 @@ class TestMultiGPULlama:
|
||||
def test_ds_zero3_packed(
|
||||
self, temp_dir, gradient_accumulation_steps, deepspeed, qlora
|
||||
):
|
||||
# pylint: disable=duplicate-code
|
||||
if qlora:
|
||||
adapter = {
|
||||
"adapter": "qlora",
|
||||
@@ -732,7 +723,6 @@ class TestMultiGPULlama:
|
||||
[True, False],
|
||||
)
|
||||
def test_ds_zero2_packed(self, temp_dir, gradient_accumulation_steps, qlora):
|
||||
# pylint: disable=duplicate-code
|
||||
if qlora:
|
||||
adapter = {
|
||||
"adapter": "qlora",
|
||||
@@ -809,7 +799,6 @@ class TestMultiGPULlama:
|
||||
[True, False],
|
||||
)
|
||||
def test_ds_zero1_packed(self, temp_dir, gradient_accumulation_steps, qlora):
|
||||
# pylint: disable=duplicate-code
|
||||
if qlora:
|
||||
adapter = {
|
||||
"adapter": "qlora",
|
||||
@@ -880,7 +869,6 @@ class TestMultiGPULlama:
|
||||
reason="fix untrained tokens brittle with lots of edge cases in latest transformers"
|
||||
)
|
||||
def test_fix_untrained_tokens(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -26,7 +26,6 @@ class TestMultiGPURay:
|
||||
|
||||
@require_torch_lt_2_6_0
|
||||
def test_lora_ddp(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -90,7 +89,6 @@ class TestMultiGPURay:
|
||||
[1, 2],
|
||||
)
|
||||
def test_ds_zero2_packed(self, temp_dir, gradient_accumulation_steps):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -150,7 +148,6 @@ class TestMultiGPURay:
|
||||
[1, 2],
|
||||
)
|
||||
def test_sft_fsdp2_packed(self, temp_dir, gradient_accumulation_steps):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -19,7 +19,6 @@ class TestTensorParallel:
|
||||
)
|
||||
@require_torch_2_7_0
|
||||
def test_fft_sft(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "Qwen/Qwen2.5-0.5B",
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
"""Integration tests for LoRA activation and attention kernels."""
|
||||
|
||||
# pylint: disable=redefined-outer-name
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
@@ -88,7 +86,7 @@ def test_attention_patching_integration(model_name, attention_cls):
|
||||
cfg = DictDefault({"base_model": model_name})
|
||||
|
||||
# Store the original implementation
|
||||
original_forward = getattr(attention_cls, "forward")
|
||||
original_forward = attention_cls.forward
|
||||
|
||||
# Apply patch
|
||||
patch_self_attn_lora(cfg)
|
||||
@@ -104,7 +102,7 @@ def test_attention_patching_integration(model_name, attention_cls):
|
||||
assert hasattr(attention_cls, "_original_forward")
|
||||
|
||||
# Clean up
|
||||
setattr(attention_cls, "forward", original_forward)
|
||||
attention_cls.forward = original_forward
|
||||
delattr(attention_cls, "_original_forward")
|
||||
|
||||
|
||||
@@ -379,9 +377,9 @@ def test_model_architecture(model_config):
|
||||
|
||||
# Verify correct activation function
|
||||
layer = patched_model.model.model.layers[0]
|
||||
assert (
|
||||
layer.mlp.forward.__func__ is model_config["expected_activation"]
|
||||
), f"Wrong activation for {model_config['name']}"
|
||||
assert layer.mlp.forward.__func__ is model_config["expected_activation"], (
|
||||
f"Wrong activation for {model_config['name']}"
|
||||
)
|
||||
|
||||
# Test forward pass
|
||||
inputs = get_test_inputs(model)
|
||||
@@ -390,12 +388,11 @@ def test_model_architecture(model_config):
|
||||
patched_output = patched_model(inputs).logits
|
||||
|
||||
# Check outputs match
|
||||
assert torch.allclose(
|
||||
original_output, patched_output, rtol=1e-4
|
||||
), f"Outputs don't match for {model_config['name']}"
|
||||
assert torch.allclose(original_output, patched_output, rtol=1e-4), (
|
||||
f"Outputs don't match for {model_config['name']}"
|
||||
)
|
||||
|
||||
|
||||
# pylint: disable=duplicate-code
|
||||
def test_kernel_training_integration(temp_dir):
|
||||
"""Test model loading with kernel patches enabled."""
|
||||
from axolotl.cli.utils import load_model_and_tokenizer
|
||||
@@ -563,15 +560,13 @@ def test_kernel_training_integration_dropout_non_zero(temp_dir):
|
||||
model_loader = ModelLoader(cfg, tokenizer)
|
||||
|
||||
# Apply patch
|
||||
model_loader.patch_manager._apply_self_attention_lora_patch() # pylint: disable=protected-access
|
||||
model_loader.patch_manager._apply_self_attention_lora_patch()
|
||||
|
||||
# Verify patch was not applied
|
||||
assert attention_cls.forward == original_forward_method
|
||||
|
||||
# Apply apply_lora_kernel_patches
|
||||
model_loader.patch_manager._apply_lora_kernel_patch( # pylint: disable=protected-access
|
||||
model
|
||||
)
|
||||
model_loader.patch_manager._apply_lora_kernel_patch(model)
|
||||
|
||||
# Verify patch was not applied
|
||||
layers = get_layers(model)
|
||||
|
||||
@@ -19,7 +19,6 @@ class Test4dMultipackLlama(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_sdp_lora_packing(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -67,7 +66,6 @@ class Test4dMultipackLlama(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_torch_lora_packing(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -32,10 +32,9 @@ class TestActivationCheckpointing:
|
||||
def test_activation_checkpointing_offload(
|
||||
self,
|
||||
temp_dir,
|
||||
fix_checkpoint_after_test, # pylint: disable=unused-argument,redefined-outer-name
|
||||
fix_checkpoint_after_test,
|
||||
gradient_checkpointing,
|
||||
):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -10,7 +10,6 @@ from axolotl.cli.config import load_cfg
|
||||
from axolotl.utils.dict import DictDefault
|
||||
|
||||
|
||||
# pylint: disable=duplicate-code
|
||||
class TestPluginArgs:
|
||||
"""
|
||||
test class for plugin args loaded from the config file
|
||||
|
||||
@@ -23,7 +23,6 @@ class TestFAXentropyLlama:
|
||||
[1, 4],
|
||||
)
|
||||
def test_lora_packing_fa_cross_entropy(self, temp_dir, gradient_accumulation_steps):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -22,7 +22,6 @@ class TestFalconPatched(unittest.TestCase):
|
||||
@pytest.mark.skip(reason="no tiny models for testing with safetensors")
|
||||
@with_temp_dir
|
||||
def test_qlora(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "illuin/tiny-random-FalconForCausalLM",
|
||||
@@ -71,7 +70,6 @@ class TestFalconPatched(unittest.TestCase):
|
||||
@pytest.mark.skip(reason="no tiny models for testing with safetensors")
|
||||
@with_temp_dir
|
||||
def test_ft(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "illuin/tiny-random-FalconForCausalLM",
|
||||
|
||||
@@ -23,7 +23,6 @@ class TestFAFlattening:
|
||||
[1, 4],
|
||||
)
|
||||
def test_lora_packing_flattening(self, temp_dir, gradient_accumulation_steps):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -15,7 +15,6 @@ class TestFSDPPatchIntegration:
|
||||
apply_init_unsharded_param_patch,
|
||||
)
|
||||
|
||||
# pylint: disable=protected-access
|
||||
original_init_sharded = FSDPParam._init_sharded_param
|
||||
original_init_unsharded = FSDPParam.init_unsharded_param
|
||||
|
||||
@@ -23,11 +22,9 @@ class TestFSDPPatchIntegration:
|
||||
apply_init_sharded_param_patch()
|
||||
apply_init_unsharded_param_patch()
|
||||
|
||||
assert (
|
||||
# pylint: disable=protected-access
|
||||
FSDPParam._init_sharded_param
|
||||
!= original_init_sharded
|
||||
), "_init_sharded_param was not patched"
|
||||
assert (
|
||||
FSDPParam.init_unsharded_param != original_init_unsharded
|
||||
), "init_unsharded_param was not patched"
|
||||
assert FSDPParam._init_sharded_param != original_init_sharded, (
|
||||
"_init_sharded_param was not patched"
|
||||
)
|
||||
assert FSDPParam.init_unsharded_param != original_init_unsharded, (
|
||||
"init_unsharded_param was not patched"
|
||||
)
|
||||
|
||||
@@ -23,7 +23,6 @@ class TestFusedLlama(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_fft_packing(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -22,7 +22,6 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_lora_s2_attn(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -71,7 +70,6 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_fft_s2_attn(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -22,7 +22,6 @@ class TestLoraLlama(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_lora_packing(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -73,7 +72,6 @@ class TestLoraLlama(unittest.TestCase):
|
||||
@pytest.mark.skipif(not is_auto_gptq_available(), reason="auto-gptq not available")
|
||||
@with_temp_dir
|
||||
def test_lora_gptq_packed(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "lilmeaty/SmolLM2-135M-Instruct-GPTQ",
|
||||
|
||||
@@ -20,7 +20,6 @@ class TestMistral(unittest.TestCase):
|
||||
@require_torch_2_6_0
|
||||
@with_temp_dir
|
||||
def test_lora_packing(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2",
|
||||
@@ -68,7 +67,6 @@ class TestMistral(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_ft_packing(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2",
|
||||
|
||||
@@ -19,7 +19,6 @@ class TestMixtral(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_qlora(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "hf-internal-testing/Mixtral-tiny",
|
||||
@@ -64,7 +63,6 @@ class TestMixtral(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_ft(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "hf-internal-testing/Mixtral-tiny",
|
||||
|
||||
@@ -89,5 +89,5 @@ class TestModelPatches(unittest.TestCase):
|
||||
|
||||
assert (
|
||||
"torch.jit"
|
||||
in transformers.modeling_flash_attention_utils._get_unpad_data.__module__ # pylint: disable=protected-access
|
||||
in transformers.modeling_flash_attention_utils._get_unpad_data.__module__
|
||||
)
|
||||
|
||||
@@ -15,7 +15,6 @@ class TestLlamaPeftEmbeddings:
|
||||
"""
|
||||
|
||||
def test_peft_embeddings_upcast(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -19,7 +19,6 @@ class TestPhiMultipack(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_ft_packed(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "microsoft/phi-1_5",
|
||||
@@ -67,7 +66,6 @@ class TestPhiMultipack(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_qlora_packed(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "microsoft/phi-1_5",
|
||||
|
||||
@@ -22,7 +22,6 @@ class TestResumeLlama:
|
||||
|
||||
@require_torch_2_6_0
|
||||
def test_resume_lora_packed(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -12,7 +12,6 @@ from axolotl.utils.dict import DictDefault
|
||||
from ..utils import check_model_output_exists, check_tensorboard
|
||||
|
||||
|
||||
# pylint: disable=duplicate-code
|
||||
@pytest.mark.skip(
|
||||
reason="Unsloth integration will be broken going into latest transformers"
|
||||
)
|
||||
|
||||
@@ -22,7 +22,6 @@ class TestPackedFlex(unittest.TestCase):
|
||||
@require_torch_2_6_0
|
||||
@with_temp_dir
|
||||
def test_loss_llama(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -20,7 +20,6 @@ class TestReLoraLlama(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_relora(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -76,9 +75,9 @@ class TestReLoraLlama(unittest.TestCase):
|
||||
|
||||
train(cfg=cfg, dataset_meta=dataset_meta)
|
||||
check_model_output_exists(Path(temp_dir) / "checkpoint-100/adapter", cfg)
|
||||
assert (
|
||||
Path(temp_dir) / "checkpoint-100/relora/model.safetensors"
|
||||
).exists(), "Relora model checkpoint not found"
|
||||
assert (Path(temp_dir) / "checkpoint-100/relora/model.safetensors").exists(), (
|
||||
"Relora model checkpoint not found"
|
||||
)
|
||||
|
||||
check_tensorboard(
|
||||
temp_dir + "/runs", "train/grad_norm", 0.2, "grad_norm is too high"
|
||||
|
||||
@@ -11,8 +11,6 @@ from axolotl.utils.dict import DictDefault
|
||||
|
||||
from .utils import check_model_output_exists
|
||||
|
||||
# pylint: disable=duplicate-code
|
||||
|
||||
|
||||
class TestActivationOffloading:
|
||||
"""
|
||||
@@ -28,7 +26,6 @@ class TestActivationOffloading:
|
||||
temp_dir,
|
||||
adapter,
|
||||
):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -25,7 +25,6 @@ class TestDeepseekV3:
|
||||
[True, False],
|
||||
)
|
||||
def test_lora_deepseekv3(self, temp_dir, sample_packing):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "axolotl-ai-co/DeepSeek-V3-11M",
|
||||
@@ -83,7 +82,6 @@ class TestDeepseekV3:
|
||||
[True, False],
|
||||
)
|
||||
def test_fft_deepseekv3(self, temp_dir, sample_packing):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "axolotl-ai-co/DeepSeek-V3-11M",
|
||||
|
||||
@@ -21,7 +21,6 @@ class TestDPOLlamaLora(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_dpo_lora(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -70,7 +69,6 @@ class TestDPOLlamaLora(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_dpo_nll_lora(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -120,7 +118,6 @@ class TestDPOLlamaLora(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_dpo_use_weighting(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -171,7 +168,6 @@ class TestDPOLlamaLora(unittest.TestCase):
|
||||
@pytest.mark.skip("kto_pair no longer supported in trl")
|
||||
@with_temp_dir
|
||||
def test_kto_pair_lora(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -220,7 +216,6 @@ class TestDPOLlamaLora(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_ipo_lora(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -269,7 +264,6 @@ class TestDPOLlamaLora(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_orpo_lora(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -322,7 +316,6 @@ class TestDPOLlamaLora(unittest.TestCase):
|
||||
@pytest.mark.skip(reason="Fix the implementation")
|
||||
@with_temp_dir
|
||||
def test_kto_lora(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -19,7 +19,6 @@ class TestEmbeddingsLrScale(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_train_w_embedding_lr_scale(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -65,7 +64,6 @@ class TestEmbeddingsLrScale(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_train_w_embedding_lr(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -13,7 +13,6 @@ class TestE2eEvaluate:
|
||||
"""Test cases for evaluate CLI"""
|
||||
|
||||
def test_evaluate(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -22,7 +22,6 @@ class TestFalcon(unittest.TestCase):
|
||||
@pytest.mark.skip(reason="no tiny models for testing with safetensors")
|
||||
@with_temp_dir
|
||||
def test_lora(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "illuin/tiny-random-FalconForCausalLM",
|
||||
@@ -74,7 +73,6 @@ class TestFalcon(unittest.TestCase):
|
||||
@pytest.mark.skip(reason="no tiny models for testing with safetensors")
|
||||
@with_temp_dir
|
||||
def test_lora_added_vocab(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "illuin/tiny-random-FalconForCausalLM",
|
||||
@@ -130,7 +128,6 @@ class TestFalcon(unittest.TestCase):
|
||||
@pytest.mark.skip(reason="no tiny models for testing with safetensors")
|
||||
@with_temp_dir
|
||||
def test_ft(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "illuin/tiny-random-FalconForCausalLM",
|
||||
|
||||
@@ -22,7 +22,6 @@ class TestGemma2:
|
||||
[True, False],
|
||||
)
|
||||
def test_lora_gemma2(self, temp_dir, sample_packing):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "axolotl-ai-co/gemma-2-33M",
|
||||
@@ -78,7 +77,6 @@ class TestGemma2:
|
||||
[True, False],
|
||||
)
|
||||
def test_fft_gemma2(self, temp_dir, sample_packing):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "axolotl-ai-co/gemma-2-33M",
|
||||
|
||||
@@ -22,7 +22,6 @@ class TestGemma3Text:
|
||||
[True, False],
|
||||
)
|
||||
def test_lora_gemma3_text(self, temp_dir, sample_packing):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "axolotl-ai-co/gemma-3-34M",
|
||||
@@ -78,7 +77,6 @@ class TestGemma3Text:
|
||||
[True, False],
|
||||
)
|
||||
def test_fft_gemma3_text(self, temp_dir, sample_packing):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "axolotl-ai-co/gemma-3-34M",
|
||||
|
||||
@@ -11,11 +11,7 @@ class TestImports(unittest.TestCase):
|
||||
"""
|
||||
|
||||
def test_import_causal_trainer(self):
|
||||
from axolotl.core.builders import ( # pylint: disable=unused-import # noqa: F401
|
||||
HFCausalTrainerBuilder,
|
||||
)
|
||||
pass
|
||||
|
||||
def test_import_rl_trainer(self):
|
||||
from axolotl.core.builders import ( # pylint: disable=unused-import # noqa: F401
|
||||
HFRLTrainerBuilder,
|
||||
)
|
||||
pass
|
||||
|
||||
@@ -16,7 +16,6 @@ class TestLlama:
|
||||
"""
|
||||
|
||||
def test_fft_trust_remote_code(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -57,7 +56,6 @@ class TestLlama:
|
||||
check_model_output_exists(temp_dir, cfg)
|
||||
|
||||
def test_fix_untrained_tokens(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -105,7 +103,6 @@ class TestLlama:
|
||||
check_model_output_exists(temp_dir, cfg)
|
||||
|
||||
def test_fix_untrained_tokens_already_trained(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -150,7 +147,6 @@ class TestLlama:
|
||||
check_model_output_exists(temp_dir, cfg)
|
||||
|
||||
def test_batch_flattening(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -22,7 +22,6 @@ class TestPretrainLlama:
|
||||
],
|
||||
)
|
||||
def test_pretrain(self, temp_dir, sample_packing, pretrain_multipack_attn):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -19,7 +19,6 @@ class TestLlamaVision(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_lora_llama_vision_text_only_dataset(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "axolotl-ai-co/Llama-3.2-39M-Vision",
|
||||
@@ -67,7 +66,6 @@ class TestLlamaVision(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_lora_llama_vision_multimodal_dataset(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "axolotl-ai-co/Llama-3.2-39M-Vision",
|
||||
|
||||
@@ -56,13 +56,11 @@ class TestLoadModelUtils:
|
||||
"context_parallel_size": 1,
|
||||
}
|
||||
)
|
||||
self.model_loader = ( # pylint: disable=attribute-defined-outside-init
|
||||
ModelLoader(
|
||||
cfg=self.cfg,
|
||||
tokenizer="",
|
||||
inference=False,
|
||||
reference_model=True,
|
||||
)
|
||||
self.model_loader = ModelLoader(
|
||||
cfg=self.cfg,
|
||||
tokenizer="",
|
||||
inference=False,
|
||||
reference_model=True,
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("embedding_modules", ["embed_tokens", "lm_head"])
|
||||
@@ -74,7 +72,7 @@ class TestLoadModelUtils:
|
||||
self, temp_dir, embedding_modules, dist_dtype, before_kbit_train_or_finetune
|
||||
):
|
||||
self.cfg.output_dir = temp_dir
|
||||
self.model_loader.tokenizer = load_tokenizer(self.cfg) # pylint: disable=all
|
||||
self.model_loader.tokenizer = load_tokenizer(self.cfg)
|
||||
self.model_loader.load()
|
||||
self.model_loader._convert_embedding_modules_dtype(
|
||||
embedding_modules, dist_dtype, before_kbit_train_or_finetune
|
||||
|
||||
@@ -19,7 +19,6 @@ class TestLoraLlama(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_lora(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -22,7 +22,6 @@ class TestMamba(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_fft(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "state-spaces/mamba-130m",
|
||||
|
||||
@@ -21,7 +21,6 @@ class TestMistral(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_lora(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2",
|
||||
@@ -68,7 +67,6 @@ class TestMistral(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_ft(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2",
|
||||
|
||||
@@ -22,7 +22,6 @@ class TestMixtral(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_qlora_w_fa2(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "hf-internal-testing/Mixtral-tiny",
|
||||
@@ -78,7 +77,6 @@ class TestMixtral(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_qlora_wo_fa2(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "hf-internal-testing/Mixtral-tiny",
|
||||
@@ -134,7 +132,6 @@ class TestMixtral(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_16bit_lora_w_fa2(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "hf-internal-testing/Mixtral-tiny",
|
||||
@@ -193,7 +190,6 @@ class TestMixtral(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_16bit_lora_wo_fa2(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "hf-internal-testing/Mixtral-tiny",
|
||||
@@ -252,7 +248,6 @@ class TestMixtral(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_ft(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "hf-internal-testing/Mixtral-tiny",
|
||||
|
||||
@@ -25,7 +25,6 @@ class TestCustomOptimizers(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_optimi_adamw(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -71,7 +70,6 @@ class TestCustomOptimizers(unittest.TestCase):
|
||||
@with_temp_dir
|
||||
@require_torch_2_5_1
|
||||
def test_adopt_adamw(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -117,7 +115,6 @@ class TestCustomOptimizers(unittest.TestCase):
|
||||
@with_temp_dir
|
||||
@require_torch_2_5_1
|
||||
def test_muon(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -164,7 +161,6 @@ class TestCustomOptimizers(unittest.TestCase):
|
||||
@with_temp_dir
|
||||
@require_torch_2_7_0
|
||||
def test_dion(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -206,7 +202,6 @@ class TestCustomOptimizers(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_fft_schedule_free_adamw(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -234,7 +229,6 @@ class TestCustomOptimizers(unittest.TestCase):
|
||||
"save_first_step": False,
|
||||
}
|
||||
)
|
||||
# pylint: disable=duplicate-code
|
||||
|
||||
cfg = validate_config(cfg)
|
||||
normalize_config(cfg)
|
||||
@@ -246,7 +240,6 @@ class TestCustomOptimizers(unittest.TestCase):
|
||||
@with_temp_dir
|
||||
@require_torch_2_6_0
|
||||
def test_came_pytorch(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "JackFram/llama-68m",
|
||||
|
||||
@@ -21,7 +21,6 @@ class TestPackedLlama(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_loss_packed(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -19,7 +19,6 @@ class TestPhi(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_phi_ft(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "microsoft/phi-1_5",
|
||||
@@ -65,7 +64,6 @@ class TestPhi(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_phi_qlora(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "microsoft/phi-1_5",
|
||||
|
||||
@@ -15,7 +15,7 @@ class TestPreprocess:
|
||||
|
||||
def test_w_deepspeed(self, temp_dir):
|
||||
"""make sure preproces doesn't choke when using deepspeed in the config"""
|
||||
# pylint: disable=duplicate-code
|
||||
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "Qwen/Qwen2.5-0.5B",
|
||||
|
||||
@@ -19,7 +19,6 @@ class TestProcessRewardSmolLM2(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_prm(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -18,7 +18,6 @@ class TestQATLlama:
|
||||
"""
|
||||
|
||||
def test_qat(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -68,7 +67,6 @@ class TestQATLlama:
|
||||
check_model_output_exists(Path(temp_dir) / "checkpoint-5", cfg)
|
||||
|
||||
def test_qat_dpo(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -131,7 +131,7 @@ class TestQuantization:
|
||||
@require_torch_2_6_0
|
||||
def test_prepare_model_for_qat(
|
||||
self, model, weight_dtype, activation_dtype, group_size, quantize_embedding
|
||||
): # pylint: disable=redefined-outer-name
|
||||
):
|
||||
prepare_model_for_qat(
|
||||
model, weight_dtype, group_size, activation_dtype, quantize_embedding
|
||||
)
|
||||
@@ -175,7 +175,7 @@ class TestQuantization:
|
||||
group_size,
|
||||
quantize_embedding,
|
||||
expected_exception,
|
||||
): # pylint: disable=redefined-outer-name
|
||||
):
|
||||
if expected_exception:
|
||||
with pytest.raises(expected_exception):
|
||||
quantize_model_for_ptq(
|
||||
@@ -198,11 +198,13 @@ class TestQuantization:
|
||||
if activation_dtype:
|
||||
assert isinstance(
|
||||
child.weight, LinearActivationQuantizedTensor
|
||||
), "Linear weight should be quantized with activation quantization"
|
||||
), (
|
||||
"Linear weight should be quantized with activation quantization"
|
||||
)
|
||||
else:
|
||||
assert isinstance(
|
||||
child.weight, AffineQuantizedTensor
|
||||
), "Linear weight should be quantized without activation quantization"
|
||||
assert isinstance(child.weight, AffineQuantizedTensor), (
|
||||
"Linear weight should be quantized without activation quantization"
|
||||
)
|
||||
|
||||
|
||||
class TestQuantizationCallback:
|
||||
@@ -217,9 +219,7 @@ class TestQuantizationCallback:
|
||||
)
|
||||
|
||||
@require_torch_2_6_0
|
||||
def test_qat_callback_fake_quant_after_n_steps(
|
||||
self, model, trainer_state
|
||||
): # pylint: disable=redefined-outer-name
|
||||
def test_qat_callback_fake_quant_after_n_steps(self, model, trainer_state):
|
||||
cfg = QATConfig(
|
||||
weight_dtype="int8",
|
||||
activation_dtype="int8",
|
||||
@@ -269,9 +269,7 @@ class TestQuantizationCallback:
|
||||
assert model.lm_head.weight_fake_quantizer.enabled
|
||||
|
||||
@require_torch_2_6_0
|
||||
def test_qat_callback_fake_quant_after_n_steps_is_none(
|
||||
self, model, trainer_state
|
||||
): # pylint: disable=redefined-outer-name
|
||||
def test_qat_callback_fake_quant_after_n_steps_is_none(self, model, trainer_state):
|
||||
cfg = QATConfig(
|
||||
weight_dtype="int8",
|
||||
activation_dtype="int8",
|
||||
@@ -314,9 +312,7 @@ class TestConvertQATModelForPTQ:
|
||||
"""
|
||||
|
||||
@require_torch_2_6_0
|
||||
def test_convert_qat_model_for_ptq(
|
||||
self, model
|
||||
): # pylint: disable=redefined-outer-name
|
||||
def test_convert_qat_model_for_ptq(self, model):
|
||||
config = QATConfig(
|
||||
weight_dtype="int8",
|
||||
activation_dtype="int8",
|
||||
|
||||
@@ -19,7 +19,6 @@ class TestE2eQwen:
|
||||
|
||||
@pytest.mark.parametrize("base_model", ["Qwen/Qwen2-0.5B", "Qwen/Qwen2.5-0.5B"])
|
||||
def test_dpo(self, base_model, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": base_model,
|
||||
|
||||
@@ -19,7 +19,6 @@ class TestRewardModelLoraSmolLM2(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_rm_lora(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -20,7 +20,6 @@ class TestSaveFirstStepCallback(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_save_first_step(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -61,7 +60,6 @@ class TestSaveFirstStepCallback(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_no_save_first_step(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -19,7 +19,6 @@ class TestCustomSchedulers(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_rex_scheduler(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
helper utils for tests
|
||||
"""
|
||||
|
||||
import importlib.util
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
@@ -107,12 +108,7 @@ def require_vllm(test_case):
|
||||
"""
|
||||
|
||||
def is_vllm_installed():
|
||||
try:
|
||||
import vllm # pylint: disable=unused-import # noqa: F401
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
return importlib.util.find_spec("vllm") is not None
|
||||
|
||||
return unittest.skipUnless(
|
||||
is_vllm_installed(), "test requires vllm to be installed"
|
||||
@@ -125,12 +121,7 @@ def require_llmcompressor(test_case):
|
||||
"""
|
||||
|
||||
def is_llmcompressor_installed():
|
||||
try:
|
||||
import llmcompressor # pylint: disable=unused-import # noqa: F401
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
return importlib.util.find_spec("llmcompressor") is not None
|
||||
|
||||
return unittest.skipUnless(
|
||||
is_llmcompressor_installed(), "test requires llmcompressor to be installed"
|
||||
@@ -159,8 +150,8 @@ def check_tensorboard(
|
||||
tb_log_path = most_recent_subdir(temp_run_dir)
|
||||
event_file = os.path.join(tb_log_path, sorted(os.listdir(tb_log_path))[0])
|
||||
reader = SummaryReader(event_file)
|
||||
df = reader.scalars # pylint: disable=invalid-name
|
||||
df = df[(df.tag == tag)] # pylint: disable=invalid-name
|
||||
df = reader.scalars
|
||||
df = df[(df.tag == tag)]
|
||||
lt_val = (1 + rtol) * lt_val
|
||||
if "%s" in assertion_err:
|
||||
assert df.value.values[-1] < lt_val, assertion_err % df.value.values[-1]
|
||||
|
||||
Reference in New Issue
Block a user