Add ruff, remove black, isort, flake8, pylint (#3092)
* black, isort, flake8 -> ruff * remove unused * add back needed import * fix
This commit is contained in:
@@ -1,7 +1,5 @@
|
||||
"""Integration tests for LoRA activation and attention kernels."""
|
||||
|
||||
# pylint: disable=redefined-outer-name
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
@@ -88,7 +86,7 @@ def test_attention_patching_integration(model_name, attention_cls):
|
||||
cfg = DictDefault({"base_model": model_name})
|
||||
|
||||
# Store the original implementation
|
||||
original_forward = getattr(attention_cls, "forward")
|
||||
original_forward = attention_cls.forward
|
||||
|
||||
# Apply patch
|
||||
patch_self_attn_lora(cfg)
|
||||
@@ -104,7 +102,7 @@ def test_attention_patching_integration(model_name, attention_cls):
|
||||
assert hasattr(attention_cls, "_original_forward")
|
||||
|
||||
# Clean up
|
||||
setattr(attention_cls, "forward", original_forward)
|
||||
attention_cls.forward = original_forward
|
||||
delattr(attention_cls, "_original_forward")
|
||||
|
||||
|
||||
@@ -379,9 +377,9 @@ def test_model_architecture(model_config):
|
||||
|
||||
# Verify correct activation function
|
||||
layer = patched_model.model.model.layers[0]
|
||||
assert (
|
||||
layer.mlp.forward.__func__ is model_config["expected_activation"]
|
||||
), f"Wrong activation for {model_config['name']}"
|
||||
assert layer.mlp.forward.__func__ is model_config["expected_activation"], (
|
||||
f"Wrong activation for {model_config['name']}"
|
||||
)
|
||||
|
||||
# Test forward pass
|
||||
inputs = get_test_inputs(model)
|
||||
@@ -390,12 +388,11 @@ def test_model_architecture(model_config):
|
||||
patched_output = patched_model(inputs).logits
|
||||
|
||||
# Check outputs match
|
||||
assert torch.allclose(
|
||||
original_output, patched_output, rtol=1e-4
|
||||
), f"Outputs don't match for {model_config['name']}"
|
||||
assert torch.allclose(original_output, patched_output, rtol=1e-4), (
|
||||
f"Outputs don't match for {model_config['name']}"
|
||||
)
|
||||
|
||||
|
||||
# pylint: disable=duplicate-code
|
||||
def test_kernel_training_integration(temp_dir):
|
||||
"""Test model loading with kernel patches enabled."""
|
||||
from axolotl.cli.utils import load_model_and_tokenizer
|
||||
@@ -563,15 +560,13 @@ def test_kernel_training_integration_dropout_non_zero(temp_dir):
|
||||
model_loader = ModelLoader(cfg, tokenizer)
|
||||
|
||||
# Apply patch
|
||||
model_loader.patch_manager._apply_self_attention_lora_patch() # pylint: disable=protected-access
|
||||
model_loader.patch_manager._apply_self_attention_lora_patch()
|
||||
|
||||
# Verify patch was not applied
|
||||
assert attention_cls.forward == original_forward_method
|
||||
|
||||
# Apply apply_lora_kernel_patches
|
||||
model_loader.patch_manager._apply_lora_kernel_patch( # pylint: disable=protected-access
|
||||
model
|
||||
)
|
||||
model_loader.patch_manager._apply_lora_kernel_patch(model)
|
||||
|
||||
# Verify patch was not applied
|
||||
layers = get_layers(model)
|
||||
|
||||
@@ -19,7 +19,6 @@ class Test4dMultipackLlama(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_sdp_lora_packing(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -67,7 +66,6 @@ class Test4dMultipackLlama(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_torch_lora_packing(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -32,10 +32,9 @@ class TestActivationCheckpointing:
|
||||
def test_activation_checkpointing_offload(
|
||||
self,
|
||||
temp_dir,
|
||||
fix_checkpoint_after_test, # pylint: disable=unused-argument,redefined-outer-name
|
||||
fix_checkpoint_after_test,
|
||||
gradient_checkpointing,
|
||||
):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -10,7 +10,6 @@ from axolotl.cli.config import load_cfg
|
||||
from axolotl.utils.dict import DictDefault
|
||||
|
||||
|
||||
# pylint: disable=duplicate-code
|
||||
class TestPluginArgs:
|
||||
"""
|
||||
test class for plugin args loaded from the config file
|
||||
|
||||
@@ -23,7 +23,6 @@ class TestFAXentropyLlama:
|
||||
[1, 4],
|
||||
)
|
||||
def test_lora_packing_fa_cross_entropy(self, temp_dir, gradient_accumulation_steps):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -22,7 +22,6 @@ class TestFalconPatched(unittest.TestCase):
|
||||
@pytest.mark.skip(reason="no tiny models for testing with safetensors")
|
||||
@with_temp_dir
|
||||
def test_qlora(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "illuin/tiny-random-FalconForCausalLM",
|
||||
@@ -71,7 +70,6 @@ class TestFalconPatched(unittest.TestCase):
|
||||
@pytest.mark.skip(reason="no tiny models for testing with safetensors")
|
||||
@with_temp_dir
|
||||
def test_ft(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "illuin/tiny-random-FalconForCausalLM",
|
||||
|
||||
@@ -23,7 +23,6 @@ class TestFAFlattening:
|
||||
[1, 4],
|
||||
)
|
||||
def test_lora_packing_flattening(self, temp_dir, gradient_accumulation_steps):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -15,7 +15,6 @@ class TestFSDPPatchIntegration:
|
||||
apply_init_unsharded_param_patch,
|
||||
)
|
||||
|
||||
# pylint: disable=protected-access
|
||||
original_init_sharded = FSDPParam._init_sharded_param
|
||||
original_init_unsharded = FSDPParam.init_unsharded_param
|
||||
|
||||
@@ -23,11 +22,9 @@ class TestFSDPPatchIntegration:
|
||||
apply_init_sharded_param_patch()
|
||||
apply_init_unsharded_param_patch()
|
||||
|
||||
assert (
|
||||
# pylint: disable=protected-access
|
||||
FSDPParam._init_sharded_param
|
||||
!= original_init_sharded
|
||||
), "_init_sharded_param was not patched"
|
||||
assert (
|
||||
FSDPParam.init_unsharded_param != original_init_unsharded
|
||||
), "init_unsharded_param was not patched"
|
||||
assert FSDPParam._init_sharded_param != original_init_sharded, (
|
||||
"_init_sharded_param was not patched"
|
||||
)
|
||||
assert FSDPParam.init_unsharded_param != original_init_unsharded, (
|
||||
"init_unsharded_param was not patched"
|
||||
)
|
||||
|
||||
@@ -23,7 +23,6 @@ class TestFusedLlama(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_fft_packing(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -22,7 +22,6 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_lora_s2_attn(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -71,7 +70,6 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_fft_s2_attn(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -22,7 +22,6 @@ class TestLoraLlama(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_lora_packing(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
@@ -73,7 +72,6 @@ class TestLoraLlama(unittest.TestCase):
|
||||
@pytest.mark.skipif(not is_auto_gptq_available(), reason="auto-gptq not available")
|
||||
@with_temp_dir
|
||||
def test_lora_gptq_packed(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "lilmeaty/SmolLM2-135M-Instruct-GPTQ",
|
||||
|
||||
@@ -20,7 +20,6 @@ class TestMistral(unittest.TestCase):
|
||||
@require_torch_2_6_0
|
||||
@with_temp_dir
|
||||
def test_lora_packing(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2",
|
||||
@@ -68,7 +67,6 @@ class TestMistral(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_ft_packing(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2",
|
||||
|
||||
@@ -19,7 +19,6 @@ class TestMixtral(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_qlora(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "hf-internal-testing/Mixtral-tiny",
|
||||
@@ -64,7 +63,6 @@ class TestMixtral(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_ft(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "hf-internal-testing/Mixtral-tiny",
|
||||
|
||||
@@ -89,5 +89,5 @@ class TestModelPatches(unittest.TestCase):
|
||||
|
||||
assert (
|
||||
"torch.jit"
|
||||
in transformers.modeling_flash_attention_utils._get_unpad_data.__module__ # pylint: disable=protected-access
|
||||
in transformers.modeling_flash_attention_utils._get_unpad_data.__module__
|
||||
)
|
||||
|
||||
@@ -15,7 +15,6 @@ class TestLlamaPeftEmbeddings:
|
||||
"""
|
||||
|
||||
def test_peft_embeddings_upcast(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -19,7 +19,6 @@ class TestPhiMultipack(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_ft_packed(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "microsoft/phi-1_5",
|
||||
@@ -67,7 +66,6 @@ class TestPhiMultipack(unittest.TestCase):
|
||||
|
||||
@with_temp_dir
|
||||
def test_qlora_packed(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "microsoft/phi-1_5",
|
||||
|
||||
@@ -22,7 +22,6 @@ class TestResumeLlama:
|
||||
|
||||
@require_torch_2_6_0
|
||||
def test_resume_lora_packed(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||
|
||||
@@ -12,7 +12,6 @@ from axolotl.utils.dict import DictDefault
|
||||
from ..utils import check_model_output_exists, check_tensorboard
|
||||
|
||||
|
||||
# pylint: disable=duplicate-code
|
||||
@pytest.mark.skip(
|
||||
reason="Unsloth integration will be broken going into latest transformers"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user