Add ruff, remove black, isort, flake8, pylint (#3092)

* black, isort, flake8 -> ruff * remove unused * add back needed import * fix
2025-08-23 23:37:33 -04:00
parent eea7a006e1
commit 79ddaebe9a
286 changed files with 10979 additions and 11435 deletions
--- a/tests/e2e/multigpu/solo/test_flex.py
+++ b/tests/e2e/multigpu/solo/test_flex.py
@@ -31,7 +31,6 @@ class TestPackedFlex:

    @require_torch_2_6_0
    def test_loss_llama(self, temp_dir):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "HuggingFaceTB/SmolLM2-135M",
--- a/tests/e2e/multigpu/solo/test_grpo.py
+++ b/tests/e2e/multigpu/solo/test_grpo.py
@@ -80,7 +80,7 @@ def start_vllm(
    cmd_env = env.copy()
    cmd_env.update({"VLLM_LOGGING_CONFIG_PATH": vllm_logging_json})
    # start `trl vllm-serve` command in the background and capture the process id
-    process = subprocess.Popen(  # pylint: disable=consider-using-with
+    process = subprocess.Popen(
        cmd,
        env=cmd_env,
        stdout=subprocess.DEVNULL if quiet else subprocess.PIPE,
--- a/tests/e2e/multigpu/test_eval.py
+++ b/tests/e2e/multigpu/test_eval.py
@@ -21,7 +21,6 @@ class TestMultiGPUEval:
    """

    def test_eval_sample_packing(self, temp_dir):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -93,7 +92,6 @@ class TestMultiGPUEval:
        check_tensorboard(temp_dir + "/runs", "eval/loss", 2.5, "Eval Loss is too high")

    def test_eval(self, temp_dir):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "HuggingFaceTB/SmolLM2-135M",
--- a/tests/e2e/multigpu/test_fp8_fsdp2.py
+++ b/tests/e2e/multigpu/test_fp8_fsdp2.py
@@ -1,7 +1,5 @@
 """Test module for FP8 mixed precision with FSDP2 multi-GPU functionality."""

-# pylint: disable=duplicate-code
-
 import os
 from pathlib import Path

@@ -28,9 +26,9 @@ def verify_fp8_training_success(temp_dir):
    assert len(model_files) > 0, "No model files found - training may have failed"

    checkpoint_files = list(output_path.glob("checkpoint-*"))
-    assert (
-        len(checkpoint_files) > 0
-    ), "No checkpoint files found - training may have failed"
+    assert len(checkpoint_files) > 0, (
+        "No checkpoint files found - training may have failed"
+    )

    tb_log_path = most_recent_subdir(temp_dir + "/runs")
    if tb_log_path:
@@ -42,9 +40,9 @@ def verify_fp8_training_success(temp_dir):
            train_loss_df = df[df.tag == "train/train_loss"]
            if len(train_loss_df) > 0:
                final_loss = train_loss_df.value.values[-1]
-                assert not torch.isnan(
-                    torch.tensor(final_loss)
-                ), f"Training loss is NaN: {final_loss}"
+                assert not torch.isnan(torch.tensor(final_loss)), (
+                    f"Training loss is NaN: {final_loss}"
+                )


 class TestFP8FSDP2:
--- a/tests/e2e/multigpu/test_fsdp1.py
+++ b/tests/e2e/multigpu/test_fsdp1.py
@@ -1,7 +1,5 @@
 """Test module for FSDP1 multi-GPU functionality."""

-# pylint: disable=duplicate-code
-
 import os
 from pathlib import Path

@@ -29,9 +27,9 @@ def verify_training_success(temp_dir):
    assert len(model_files) > 0, "No model files found - training may have failed"

    checkpoint_files = list(output_path.glob("checkpoint-*"))
-    assert (
-        len(checkpoint_files) > 0
-    ), "No checkpoint files found - training may have failed"
+    assert len(checkpoint_files) > 0, (
+        "No checkpoint files found - training may have failed"
+    )

    tb_log_path = most_recent_subdir(temp_dir + "/runs")
    if tb_log_path:
@@ -43,9 +41,9 @@ def verify_training_success(temp_dir):
            train_loss_df = df[df.tag == "train/train_loss"]
            if len(train_loss_df) > 0:
                final_loss = train_loss_df.value.values[-1]
-                assert not torch.isnan(
-                    torch.tensor(final_loss)
-                ), f"Training loss is NaN: {final_loss}"
+                assert not torch.isnan(torch.tensor(final_loss)), (
+                    f"Training loss is NaN: {final_loss}"
+                )


 class TestFSDP1:
--- a/tests/e2e/multigpu/test_fsdp2.py
+++ b/tests/e2e/multigpu/test_fsdp2.py
@@ -1,7 +1,5 @@
 """Test module for FSDP2 multi-GPU functionality."""

-# pylint: disable=duplicate-code
-
 import os
 from pathlib import Path

@@ -29,9 +27,9 @@ def verify_training_success(temp_dir):
    assert len(model_files) > 0, "No model files found - training may have failed"

    checkpoint_files = list(output_path.glob("checkpoint-*"))
-    assert (
-        len(checkpoint_files) > 0
-    ), "No checkpoint files found - training may have failed"
+    assert len(checkpoint_files) > 0, (
+        "No checkpoint files found - training may have failed"
+    )

    tb_log_path = most_recent_subdir(temp_dir + "/runs")
    if tb_log_path:
@@ -43,9 +41,9 @@ def verify_training_success(temp_dir):
            train_loss_df = df[df.tag == "train/train_loss"]
            if len(train_loss_df) > 0:
                final_loss = train_loss_df.value.values[-1]
-                assert not torch.isnan(
-                    torch.tensor(final_loss)
-                ), f"Training loss is NaN: {final_loss}"
+                assert not torch.isnan(torch.tensor(final_loss)), (
+                    f"Training loss is NaN: {final_loss}"
+                )


 class TestFSDP2:
--- a/tests/e2e/multigpu/test_gemma3.py
+++ b/tests/e2e/multigpu/test_gemma3.py
@@ -29,7 +29,6 @@ class TestMultiGPUGemma3:
    """

    def test_lora_ddp_packed(self, temp_dir):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "axolotl-mirrors/gemma-3-4b-pt",
--- a/tests/e2e/multigpu/test_llama.py
+++ b/tests/e2e/multigpu/test_llama.py
@@ -35,7 +35,6 @@ class TestMultiGPULlama:
    """

    def test_lora_ddp(self, temp_dir):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -99,7 +98,6 @@ class TestMultiGPULlama:
        [1, 2],
    )
    def test_lora_ddp_packed(self, temp_dir, gradient_accumulation_steps):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -162,7 +160,6 @@ class TestMultiGPULlama:
        )

    def test_dpo_lora_ddp(self, temp_dir):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -242,7 +239,6 @@ class TestMultiGPULlama:
        )

    def test_dpo_qlora_ddp(self, temp_dir):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -326,7 +322,6 @@ class TestMultiGPULlama:
        [1, 2],
    )
    def test_fsdp(self, temp_dir, gradient_accumulation_steps):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -402,7 +397,6 @@ class TestMultiGPULlama:
        ],
    )
    def test_fsdp_packed(self, temp_dir, fsdp_state_dict_type):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -484,7 +478,6 @@ class TestMultiGPULlama:
    def test_fsdp2_packed(
        self, temp_dir, attention_backend, fsdp_reshard_after_forward
    ):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -556,7 +549,6 @@ class TestMultiGPULlama:
        )

    def test_fsdp_qlora_prequant_packed(self, temp_dir):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "axolotl-ai-co/SmolLM2-135M-bnb-nf4-bf16",
@@ -656,7 +648,6 @@ class TestMultiGPULlama:
    def test_ds_zero3_packed(
        self, temp_dir, gradient_accumulation_steps, deepspeed, qlora
    ):
-        # pylint: disable=duplicate-code
        if qlora:
            adapter = {
                "adapter": "qlora",
@@ -732,7 +723,6 @@ class TestMultiGPULlama:
        [True, False],
    )
    def test_ds_zero2_packed(self, temp_dir, gradient_accumulation_steps, qlora):
-        # pylint: disable=duplicate-code
        if qlora:
            adapter = {
                "adapter": "qlora",
@@ -809,7 +799,6 @@ class TestMultiGPULlama:
        [True, False],
    )
    def test_ds_zero1_packed(self, temp_dir, gradient_accumulation_steps, qlora):
-        # pylint: disable=duplicate-code
        if qlora:
            adapter = {
                "adapter": "qlora",
@@ -880,7 +869,6 @@ class TestMultiGPULlama:
        reason="fix untrained tokens brittle with lots of edge cases in latest transformers"
    )
    def test_fix_untrained_tokens(self, temp_dir):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "HuggingFaceTB/SmolLM2-135M",
--- a/tests/e2e/multigpu/test_ray.py
+++ b/tests/e2e/multigpu/test_ray.py
@@ -26,7 +26,6 @@ class TestMultiGPURay:

    @require_torch_lt_2_6_0
    def test_lora_ddp(self, temp_dir):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -90,7 +89,6 @@ class TestMultiGPURay:
        [1, 2],
    )
    def test_ds_zero2_packed(self, temp_dir, gradient_accumulation_steps):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -150,7 +148,6 @@ class TestMultiGPURay:
        [1, 2],
    )
    def test_sft_fsdp2_packed(self, temp_dir, gradient_accumulation_steps):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "HuggingFaceTB/SmolLM2-135M",
--- a/tests/e2e/multigpu/test_tp.py
+++ b/tests/e2e/multigpu/test_tp.py
@@ -19,7 +19,6 @@ class TestTensorParallel:
    )
    @require_torch_2_7_0
    def test_fft_sft(self, temp_dir):
-        # pylint: disable=duplicate-code
        cfg = DictDefault(
            {
                "base_model": "Qwen/Qwen2.5-0.5B",