diff --git a/.bandit b/.bandit
index 82e88e814..b81428751 100644
--- a/.bandit
+++ b/.bandit
@@ -1,3 +1,3 @@
 [bandit]
 exclude = tests
-skips = B101,B615
+skips = B101,B615,B102,B110
diff --git a/.flake8 b/.flake8
deleted file mode 100644
index fd69af775..000000000
--- a/.flake8
+++ /dev/null
@@ -1,5 +0,0 @@
-[flake8]
-max-line-length = 88
-
-select = C,E,F,W,B,B950
-extend-ignore = E203, E501, W503
diff --git a/.isort.cfg b/.isort.cfg
deleted file mode 100644
index bf9afe319..000000000
--- a/.isort.cfg
+++ /dev/null
@@ -1,4 +0,0 @@
-[settings]
-profile=black
-known_third_party=wandb,comet_ml
-known_local_folder=src,tests
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4c9268529..4c2861346 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -10,22 +10,12 @@ repos:
     -   id: trailing-whitespace
     -   id: no-commit-to-branch
         args: ['--branch', 'main']
--   repo: https://github.com/psf/black
-    rev: 25.1.0
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.12.9
     hooks:
-    -   id: black
--   repo: https://github.com/pycqa/isort
-    rev: 6.0.1
-    hooks:
-      - id: isort
--   repo: https://github.com/PyCQA/flake8
-    rev: 7.3.0
-    hooks:
-    - id: flake8
--   repo: https://github.com/pylint-dev/pylint
-    rev: v3.3.8
-    hooks:
-    - id: pylint
+    -   id: ruff
+        args: [--fix]
+    -   id: ruff-format
 -   repo: https://github.com/pre-commit/mirrors-mypy
     rev: v1.17.1
     hooks:
diff --git a/.pylintrc b/.pylintrc
deleted file mode 100644
index 208dd32b6..000000000
--- a/.pylintrc
+++ /dev/null
@@ -1,15 +0,0 @@
-[MASTER]
-init-hook="from pylint.config import find_default_config_files; import sys; sys.path.append(next(find_default_config_files()).parent.as_posix())"
-
-[TYPECHECK]
-
-# List of members which are set dynamically and missed by Pylint inference
-# system, and so shouldn't trigger E1101 when accessed.
-generated-members=numpy.*, torch.*
-
-
-[pylint.messages_control]
-disable=missing-function-docstring, line-too-long, import-error,
-    too-many-arguments, too-many-locals, too-many-statements, too-many-branches, too-few-public-methods,
-    too-many-instance-attributes, fixme, import-outside-toplevel, logging-fstring-interpolation,
-    too-many-positional-arguments, possibly-used-before-assignment
diff --git a/cicd/multigpu.py b/cicd/multigpu.py
index 2c067f143..5bd8d3c04 100644
--- a/cicd/multigpu.py
+++ b/cicd/multigpu.py
@@ -2,8 +2,6 @@
 modal application to run axolotl gpu tests in Modal
 """
 
-# pylint: disable=duplicate-code
-
 import os
 import pathlib
 import tempfile
@@ -63,7 +61,7 @@ def run_cmd(cmd: str, run_folder: str):
 
     # Propagate errors from subprocess.
     if exit_code := subprocess.call(cmd.split(), cwd=run_folder):  # nosec
-        exit(exit_code)  # pylint: disable=consider-using-sys-exit
+        exit(exit_code)
 
 
 @app.function(
diff --git a/cicd/single_gpu.py b/cicd/single_gpu.py
index eb34e1748..0e2922e90 100644
--- a/cicd/single_gpu.py
+++ b/cicd/single_gpu.py
@@ -1,7 +1,5 @@
 """Modal app to run axolotl GPU tests"""
 
-# pylint: disable=duplicate-code
-
 import os
 import pathlib
 import tempfile
@@ -70,4 +68,4 @@ def run_cmd(cmd: str, run_folder: str):
 
     # Propagate errors from subprocess.
     if exit_code := subprocess.call(cmd.split(), cwd=run_folder, env=sp_env):  # nosec
-        exit(exit_code)  # pylint: disable=consider-using-sys-exit
+        exit(exit_code)
diff --git a/docs/scripts/generate_config_docs.py b/docs/scripts/generate_config_docs.py
index e22da7d05..6efa2038b 100644
--- a/docs/scripts/generate_config_docs.py
+++ b/docs/scripts/generate_config_docs.py
@@ -47,7 +47,6 @@ class QuartoGenerator:
         """Check if a type is a Pydantic BaseModel."""
         return inspect.isclass(type_obj) and issubclass(type_obj, BaseModel)
 
-    # pylint: disable=too-many-return-statements
     def _extract_nested_type(self, field_type) -> Any:
         """Extract the actual type from complex type annotations."""
         # Handle Annotated types (Python 3.9+)
@@ -124,7 +123,6 @@ class QuartoGenerator:
 
         return field_type
 
-    # pylint: disable=too-many-return-statements
     def _extract_all_pydantic_models_from_type(
         self, field_type
     ) -> list[type[BaseModel]]:
@@ -318,7 +316,6 @@ class QuartoGenerator:
 
         return all_groups
 
-    # pylint: disable=too-many-return-statements
     def _extract_field_groups_from_source(
         self, model_class: type[BaseModel]
     ) -> list[dict]:
@@ -503,7 +500,7 @@ class QuartoGenerator:
                     nested_schema = nested_model.model_json_schema()
                     nested_properties = nested_schema.get("properties", {})
                     nested_required = nested_schema.get("required", [])
-                except Exception:  # pylint: disable=broad-exception-caught
+                except Exception:
                     # Fallback: use model fields directly
                     nested_properties = {}
                     nested_required = []
@@ -607,7 +604,7 @@ class QuartoGenerator:
             schema = model_class.model_json_schema()
             properties = schema.get("properties", {})
             required = schema.get("required", [])
-        except Exception as e:  # pylint: disable=broad-exception-caught
+        except Exception as e:
             print(
                 f"Warning: Could not generate JSON schema ({e}). Using model fields instead."
             )
diff --git a/examples/colab-notebooks/colab-axolotl-example.ipynb b/examples/colab-notebooks/colab-axolotl-example.ipynb
index 69881997e..30ef1c3de 100644
--- a/examples/colab-notebooks/colab-axolotl-example.ipynb
+++ b/examples/colab-notebooks/colab-axolotl-example.ipynb
@@ -1,9934 +1,9944 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "OPLSwmgdrB7g"
-      },
-      "source": [
-        "# Fine-Tune Qwen3 14B with Axolotl\n",
-        "\n",
-        "[<img src=\"https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png\" alt=\"Built with Axolotl\" width=\"200\" height=\"32\"/>](https://github.com/axolotl-ai-cloud/axolotl)\n",
-        "\n",
-        "Axolotl is the most performant LLM post-training framework available, delivering faster training with efficient, consistent and stable performance. Train your workload and ship your product 30% faster; saving you both time and money.\n",
-        "\n",
-        "- ⭐ us on [GitHub](https://github.com/axolotl-ai-cloud/axolotl)\n",
-        "- 📜 Read the [Docs](http://docs.axolotl.ai/)\n",
-        "- 💬 Chat with us on [Discord](https://discord.gg/mnpEYgRUmD)\n",
-        "- 📰 Get updates on [X/Twitter](https://x.com/axolotl_ai)\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "rVjKD7CbxIP3"
-      },
-      "source": [
-        "# Installation\n",
-        "\n",
-        "Axolotl is easy to install from [pip](https://pypi.org/project/axolotl/), or use our [pre-built Docker images](http://docs.axolotl.ai/docs/docker.html) for a hassle free dependency experience. See our [docs](http://docs.axolotl.ai/docs/installation.html) for more information."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "msOCO4NRmRLa"
-      },
-      "outputs": [],
-      "source": [
-        "%%capture\n",
-        "# This step can take ~5-10 minutes to install dependencies\n",
-        "!pip install --no-build-isolation axolotl[flash-attn]>=0.9.1\n",
-        "!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@0ee9ee8\""
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "N0OW0YeksDLr"
-      },
-      "source": [
-        "## Demo: Talk Like a Pirate\n",
-        "\n",
-        "In this demo, we are training the model ***to respond like a pirate***. This was chosen as a way to easily show how to train a model to respond in a certain style of your choosing (without being prompted) and is quite easy to validate within the scope of a Colab."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "8Du2fANTsNCK"
-      },
-      "source": [
-        "### Upload your own dataset or use a Huggingface dataset\n",
-        "\n",
-        "You can choose to use your own JSONL file from your own [Google Drive](https://drive.google.com/drive/home); for example downloading the [Pirate-Ultrachat JSONL](https://huggingface.co/datasets/winglian/pirate-ultrachat-10k/blob/main/train.jsonl) to your Google Drive. JSONL datasets should be formatted similar to the [OpenAI dataset format](https://cookbook.openai.com/examples/chat_finetuning_data_prep).\n",
-        "\n",
-        "You can also simply use the [`winglian/pirate-ultrachat-10k`](https://huggingface.co/datasets/winglian/pirate-ultrachat-10k) dataset directly.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "fGEEjyQ-r_IV"
-      },
-      "outputs": [],
-      "source": [
-        "# Default to HF dataset location\n",
-        "dataset_id = \"winglian/pirate-ultrachat-10k\"\n",
-        "uploaded = {}"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "c5MyYqk7vIsG"
-      },
-      "outputs": [],
-      "source": [
-        "import os\n",
-        "# Optionally, upload your own JSONL to your Google Drive\n",
-        "GOOGLE_DRIVE_PATH = \"\"  # ex: \"MyDrive/Colab\\ Notebooks/train.jsonl\"\n",
-        "\n",
-        "# \"Select All\" permissions, or you may get the error:\n",
-        "# \"MessageError: Error: credential propagation was unsuccessful\"\n",
-        "if GOOGLE_DRIVE_PATH:\n",
-        "    from google.colab import drive\n",
-        "    # Mount your Google Drive\n",
-        "    GOOGLE_DRIVE_MNT = \"/content/drive/\"\n",
-        "    drive.mount(GOOGLE_DRIVE_MNT, force_remount=True)\n",
-        "    tmp_path = os.path.join(GOOGLE_DRIVE_MNT, GOOGLE_DRIVE_PATH.lstrip(\"/\"))\n",
-        "    # make sure file exists\n",
-        "    if not os.path.isfile(tmp_path):\n",
-        "        raise ValueError(f\"File {tmp_path} does not exist\")\n",
-        "    dataset_id = tmp_path\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "U6pTk3A9xj1W"
-      },
-      "source": [
-        "# Configure for Supervised Fine-Tuning (SFT)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 151,
-          "referenced_widgets": [
-            "388f618924274d21a066f098f4f1e744",
-            "7c95f85a2b1f47a1bd846d110c47bb3c",
-            "083f9cda8d754c168beee10d2f8955a2",
-            "62e1a65582f446a78612eaa804e08a7d",
-            "487a177d020f4605834878b2fdc7afa3",
-            "7fd44cf9ca6e4726bfd7ac21846d6a14",
-            "366a343b62fa47d8985a3bd464d99f9e",
-            "a0a11e929edd4189b79723d618522c33",
-            "e87ea87fcff247b5bbcc331ba79a8dc2",
-            "5e18768f7ad6434ba8b8b8a2e853e204",
-            "bb33aec33a6447078c31bfd728942994"
-          ]
-        },
-        "id": "fdRioqytmTtX",
-        "outputId": "f0acdcec-4b41-4a3f-ffed-c2d2d929158e"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "[2025-05-08 13:40:27,488] [INFO] [root.register:348] [PID:174] Attempting to load plugin: axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin\n",
-            "[2025-05-08 13:40:27,493] [INFO] [root.register:351] [PID:174] Plugin loaded successfully: axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin\n",
-            "[2025-05-08 13:40:27,959] [INFO] [axolotl.utils.schemas.config.check_eval_packing:721] [PID:174] [RANK:0] explicitly setting `eval_sample_packing` to match `sample_packing`\u001b[39m\n",
-            "[2025-05-08 13:40:27,960] [INFO] [axolotl.utils.schemas.config.hint_sample_packing_padding:514] [PID:174] [RANK:0] Setting `pad_to_sequence_len: true` to prevent memory leaks when sample_packing\u001b[39m\n",
-            "[2025-05-08 13:40:27,961] [INFO] [axolotl.utils.schemas.config.check_bf16:1251] [PID:174] [RANK:0] bf16 support detected, but not enabled for this configuration.\u001b[39m\n"
-          ]
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "388f618924274d21a066f098f4f1e744",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "config.json:   0%|          | 0.00/728 [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "[2025-05-08 13:40:28,590] [INFO] [axolotl.normalize_config:237] [PID:174] [RANK:0] cuda memory usage baseline: 0.000GB (+0.002GB cache, +0.359GB misc)\u001b[39m\n"
-          ]
-        }
-      ],
-      "source": [
-        "from axolotl.utils.dict import DictDefault\n",
-        "from axolotl.cli.config import load_cfg\n",
-        "\n",
-        "# Axolotl provides full control and transparency over model and training configuration\n",
-        "config = DictDefault(\n",
-        "    base_model = \"Qwen/Qwen3-14B\",  # Use the instruct tuned model, but we're aligning it to be a pirate\n",
-        "    load_in_4bit = True,  # set to True for qLoRA\n",
-        "    adapter = \"qlora\",\n",
-        "    lora_r = 32,\n",
-        "    lora_alpha = 64,\n",
-        "    lora_target_modules = [\n",
-        "        \"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",  # train self_attn linear modules\n",
-        "        \"gate_proj\", \"down_proj\", \"up_proj\",  # train MLP linear modules\n",
-        "    ],\n",
-        "    lora_qkv_kernel = True,  # optimized triton kernels for LoRA\n",
-        "    lora_o_kernel = True,\n",
-        "    lora_mlp_kernel = True,\n",
-        "    embeddings_skip_upcast = True,  # keep embeddings in fp16 so the model fits in 15GB VRAM\n",
-        "    xformers_attention = True,  # use xformers on Colab w/ T4 for memory efficient attention, flash_attention only on Ampere or above\n",
-        "    plugins = [\n",
-        "        # more efficient training using Apple's Cut Cross Entropy; https://github.com/apple/ml-cross-entropy\n",
-        "        \"axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin\",\n",
-        "    ],\n",
-        "    sample_packing = True,  # 2-6x increase in tokens per micro-batch\n",
-        "    # when using packing, use a slightly higher learning rate to account for fewer steps\n",
-        "    # alternatively, reduce the micro_batch_size + gradient_accumulation_steps to achieve closer to the same number of steps/epoch\n",
-        "    learning_rate = 0.00019,\n",
-        "    sequence_len = 4096,  # larger sequence length improves packing efficiency for more tokens/sec\n",
-        "    micro_batch_size = 1,\n",
-        "    gradient_accumulation_steps = 1,\n",
-        "    gradient_checkpointing = True,  # tradeoff reduced VRAM for increased time\n",
-        "    gradient_checkpointing_kwargs = {\n",
-        "        \"use_reentrant\": False,\n",
-        "    },\n",
-        "    optimizer = \"paged_adamw_8bit\",\n",
-        "    lr_scheduler = \"cosine\",\n",
-        "    warmup_steps = 5,\n",
-        "    fp16 = True,  # use float16 + automatic mixed precision, bfloat16 not supported on Colab w/ T4\n",
-        "    bf16 = False,\n",
-        "    max_grad_norm = 0.1,  # gradient clipping\n",
-        "    num_epochs = 1,\n",
-        "    saves_per_epoch = 2,  # how many checkpoints to save over one epoch\n",
-        "    logging_steps = 1,\n",
-        "    output_dir = \"./outputs/qwen-sft-pirate-rrr\",\n",
-        "    chat_template = \"qwen3\",\n",
-        "    datasets = [\n",
-        "        {\n",
-        "            \"path\": dataset_id,  # Huggingface Dataset id or path to train.jsonl\n",
-        "            \"type\": \"chat_template\",\n",
-        "            \"split\": \"train\",\n",
-        "            \"eot_tokens\": [\"<|im_end|>\"],\n",
-        "        }\n",
-        "    ],\n",
-        "    dataloader_prefetch_factor = 8,  # dataloader optimizations\n",
-        "    dataloader_num_workers = 2,\n",
-        "    dataloader_pin_memory = True,\n",
-        "  )\n",
-        "\n",
-        "# validates the configuration\n",
-        "cfg = load_cfg(config)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "715UpvnSoBIS"
-      },
-      "outputs": [],
-      "source": [
-        "from axolotl.utils import patch_optimized_env\n",
-        "# speedup downloads from HF 🤗 and set \"PYTORCH_CUDA_ALLOC_CONF\" env to save memory\n",
-        "patch_optimized_env()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Vc6MC-hwyH-n"
-      },
-      "source": [
-        "# Datasets\n",
-        "\n",
-        "Axolotl has a robust suite of loaders and transforms to parse most open datasets of any format into the appropriate chat template for your model. Axolotl will mask input tokens from the user's prompt so that the train loss is only calculated against the model's response. For more information, [see our documentation](http://docs.axolotl.ai/docs/dataset-formats/conversation.html) on dataset preparation.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 1000,
-          "referenced_widgets": [
-            "b82aa8c57f7c422a9a9c90f333ed2a99",
-            "c0991cf63ee6458b96e9a75e7a88b61a",
-            "71c8af139cd248b1b51101fd46a93f35",
-            "1d5117195d4b49eb8f1a73b18419f7ce",
-            "3c21e4a511b4441192c03b7f1d0976e9",
-            "ed28e2e0410d4e0b855467e798e53d66",
-            "d93f134f802b4b69b575bdaf07dbd27c",
-            "d0e9dce55cec4c1ca619a0ccf209d924",
-            "4c727d40ef0443449afc31724ee79f0c",
-            "0dea5caa27384f5689e3cab51f558727",
-            "a6f48410b9964fefba0c3009a77dc838",
-            "95caff42f08a4c2aa14c867b8f37f231",
-            "de7c37ee83e24f0c889e84d07279c2ec",
-            "9d4897eefb5f48259ffb2d23e332f752",
-            "253017b0d0534e54ab44e181f6d7c82d",
-            "27beaf06e41b472abdb544a43c720c5a",
-            "34cf3df51fbc41cabfdbba153c007f0e",
-            "ac764024cf1c4e08ba7749afd2cd20ac",
-            "30a81da86f8043eca301e86a8651201a",
-            "e8b7a81040904c1e89e58978223b1737",
-            "1c6f1f10667545aaab958016ba7e2c94",
-            "e6e969610738449887259063967f82b0",
-            "a138859f19b74fc0928dc236ab5359db",
-            "9b42e08b3c9548818488268768a118b1",
-            "12b56912736849fea2ad8124456fdc5c",
-            "879c8ab5873847a8833bd74123be90a4",
-            "20352e5f58d24bb8b1f3940efd14fe4a",
-            "d955dcaa0e944e719f3a06139dd54a03",
-            "d3de2662c7964f1ba96e58da382af720",
-            "97e36007e1304e1583fd81bfb13f0edd",
-            "c65dc74c7d6f4bab8f7dd28455161dd8",
-            "ef223e8504b64e3592589880326aaf41",
-            "598da69727bd4fb8b1caf465ac736d7a",
-            "5f86cd894de94c3280fadc1e2fd0ee13",
-            "a20927bf5f2c41f58c1e31ac858ab36c",
-            "0a46ad75c198463d843fb35e813642cb",
-            "09007681cf8d42aeb8c1d2f6a74e470a",
-            "ebc80d1a55fa47f4a5ea2756588569ec",
-            "1811cda0644e4190a9469d1774435d82",
-            "35c811d2ae8e43f3b5cecbdd3cfa857f",
-            "b8e39e4dddc3497fbc29ae45c66da759",
-            "63b4e563e85c4f03b1b72beda9577bcc",
-            "b195f160ca20442fadd8b5aed0ee41af",
-            "ca65e32eb52f48c09a84b33cb18f22cd",
-            "7cd0b85ebd204b7aba908417811ce4e0",
-            "7baeab52d6694c32b1efd1ea1a0a7782",
-            "519a7b154022443db6703f04a9142bae",
-            "d4183e9715f34d249942b8271cca3bdf",
-            "da2347ac94764a3fa2743343cf0d3cd2",
-            "93a44a11aa4846fa8efc6c1413ef1627",
-            "a55060adc3564407ac81ad7297d34aaa",
-            "d02274afd47b462291c745f261209d42",
-            "0f417447a7bd4a33acca96fa37aec877",
-            "63580b6fb30642479fe3000915bf551a",
-            "8f726dbfb45d4528afa33e36a6313267",
-            "03b093d592ba4386aa61f7b8483da660",
-            "b8766a88716948cf968f4563531a76d9",
-            "6f3a28b912714c6e931003549664bfa3",
-            "16d1283741404b7bb319094c992fce01",
-            "2a5bb0e818ab47be8cf6465988328503",
-            "2b3a2659b12244bd8548320320016dbf",
-            "0cd7efffbb3c4c4b972e63749f61ab97",
-            "5ca240f31e6b44e3882c5eb37cd5a309",
-            "5eb06edeb58e4930b1affef2a59eae81",
-            "a4e5789584564049b83df7c6c54a3e08",
-            "ff3a94b146a948b6907f5d80c7157f99",
-            "258b7c635c1045329d4669e48c46ccd5",
-            "6f68ed9889f54ad2ae8a3b95ac263a83",
-            "80366349d81e4dcc892db6cd56e384f3",
-            "c73055099c084dca996159e23e162d0b",
-            "977f799afaac4a55b2dc1cffa7d5b63b",
-            "41f3b32c2f6b4034ae7a3b9124e28bc7",
-            "a10d0a76010f4e508c65a9b69ebc5156",
-            "f8ef805b776145c3bfa9ba8d90972058",
-            "cc587493c33c4f118d1b1170f85be24c",
-            "e40d1c1ac9494b3bade9858324e7ffdf",
-            "d65b6b060d9845779299491ac5599c31",
-            "0f6907ebbc6242c8bde059cef1e1bd29",
-            "5bdfd87fc6cd4f9dabef7cfee29c8060",
-            "64f54d4a744a4627a07c3c0120276f3b",
-            "65b75b9b8bc143cf997796af68ff6668",
-            "d6fe74e4255444368f8f90a62157d869",
-            "4d468f96ec924681ad65eb671674b93e",
-            "ad7599de524549c48bf2d3124ad4b299",
-            "0546d04aae644dde846c58a4afb598a6",
-            "897b77a56c09479bb11d7f2a30997e55",
-            "81c3db71ac704280ad030072655f1537",
-            "042e091f75694c47aee761e760e76773",
-            "ef0a3c7a6f14460fb4da096928ae249e",
-            "07fb3a2c8315494e97b447e672dfae06",
-            "ec030fc3c346426f9abc3a89892258d3",
-            "e3fb3fc6afe04b3c9b7ac61809ce78fa",
-            "c3be9109d63c485d9c0ef4f9bc0f9218",
-            "12815f401eba44658caa7b2e490137a8",
-            "30e02aa2d0d241979369e598287f2639",
-            "dfd2a2649b8341ef913207526708aff1",
-            "4f1977d7e4824ef1a14b65f0f42bba10",
-            "c6164e05a1914ae48083db9ad7f4ef7c",
-            "813621384dc748b0ad06775e22761c0b",
-            "dc892a596f6942d7973c616c38f0eebb",
-            "c84cc07789be48aebb322c23d355289e",
-            "bed8726b8069434687c75452e21f19e5",
-            "16a188a0b06d45f980dcf3933509fe0a",
-            "60c1a0d765c14a1d888317e6a507e4ea",
-            "0077aedc3d174560bce924ee89e9c006",
-            "00321cce58884f6f9b3855a21fcd9187",
-            "fa864b41586f4a7aa56aeafd1d84eb75",
-            "3225603166b54e7aab766b9964a2f660",
-            "349eee9f56d64f0cba6fc24ff2c50c9b",
-            "7e5d3774060e4589aa65982da5ea4ef4",
-            "7c2485c6cdfe463da6fdb35982a1070d",
-            "ad1236893754446881e153adc9d5c962",
-            "daee63fd167e4441a32324b51b00ad2b",
-            "fe41858c6bd04c58840112b67c19a336",
-            "d262c82138024169b9f3aa034ca756fa",
-            "62e302ebdad64aada0ffe64ae1c873f3",
-            "bd1b0dfed6d34d16af33a4a58330f5ec",
-            "d07c8b97d3314f1c852e44bdd40f61ed",
-            "ebb69a2c3d0a4299a484698287b3087c",
-            "e5a82df528bb4e408797a3b6c2758f4a",
-            "f113ebd8c1c34806bea4dd7ed3035173"
-          ]
-        },
-        "id": "KQQhgK8FoDfF",
-        "outputId": "f69441d8-95f9-4885-c306-6c8709090ff6"
-      },
-      "outputs": [
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "b82aa8c57f7c422a9a9c90f333ed2a99",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "tokenizer_config.json:   0%|          | 0.00/9.68k [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "95caff42f08a4c2aa14c867b8f37f231",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "a138859f19b74fc0928dc236ab5359db",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "5f86cd894de94c3280fadc1e2fd0ee13",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "[2025-05-08 13:41:00,844] [DEBUG] [axolotl.utils.models.load_tokenizer:441] [PID:174] [RANK:0] EOS: 151645 / <|im_end|>\u001b[39m\n",
-            "[2025-05-08 13:41:00,845] [DEBUG] [axolotl.utils.models.load_tokenizer:442] [PID:174] [RANK:0] BOS: None / None\u001b[39m\n",
-            "[2025-05-08 13:41:00,846] [DEBUG] [axolotl.utils.models.load_tokenizer:443] [PID:174] [RANK:0] PAD: 151643 / <|endoftext|>\u001b[39m\n",
-            "[2025-05-08 13:41:00,847] [DEBUG] [axolotl.utils.models.load_tokenizer:444] [PID:174] [RANK:0] UNK: None / None\u001b[39m\n",
-            "[2025-05-08 13:41:00,869] [INFO] [axolotl.utils.data.sft.load_tokenized_prepared_datasets:271] [PID:174] [RANK:0] Unable to find prepared dataset in last_run_prepared/97037817611d38b3a9c681753c3c4c95\u001b[39m\n",
-            "[2025-05-08 13:41:00,870] [INFO] [axolotl.utils.data.sft.load_tokenized_prepared_datasets:272] [PID:174] [RANK:0] Loading raw datasets...\u001b[39m\n",
-            "\u001b[33m[2025-05-08 13:41:00,870] [WARNING] [axolotl.utils.data.sft.load_tokenized_prepared_datasets:274] [PID:174] [RANK:0] Processing datasets during training can lead to VRAM instability. Please pre-process your dataset.\u001b[39m\n",
-            "[2025-05-08 13:41:00,871] [INFO] [axolotl.utils.data.sft.load_tokenized_prepared_datasets:281] [PID:174] [RANK:0] No seed provided, using default seed of 42\u001b[39m\n"
-          ]
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "7cd0b85ebd204b7aba908417811ce4e0",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "train.jsonl:   0%|          | 0.00/27.3M [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "03b093d592ba4386aa61f7b8483da660",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Generating train split: 0 examples [00:00, ? examples/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "[2025-05-08 13:41:04,196] [INFO] [axolotl.utils.data.sft.get_dataset_wrapper:484] [PID:174] [RANK:0] Loading dataset with base_type: chat_template and prompt_style: None\u001b[39m\n",
-            "[2025-05-08 13:41:04,233] [INFO] [axolotl.__call__:761] [PID:174] [RANK:0] Using chat template:\n",
-            "---\n",
-            "{%- if tools %}\n",
-            "    {{- '<|im_start|>system\\n' }}\n",
-            "    {%- if messages[0].role == 'system' %}\n",
-            "        {{- messages[0].content + '\\n\\n' }}\n",
-            "    {%- endif %}\n",
-            "    {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n",
-            "    {%- for tool in tools %}\n",
-            "        {{- \"\\n\" }}\n",
-            "        {{- tool | tojson }}\n",
-            "    {%- endfor %}\n",
-            "    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n",
-            "{%- else %}\n",
-            "    {%- if messages[0].role == 'system' %}\n",
-            "        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n",
-            "    {%- endif %}\n",
-            "{%- endif %}\n",
-            "{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n",
-            "{%- for message in messages[::-1] %}\n",
-            "    {%- set index = (messages|length - 1) - loop.index0 %}\n",
-            "    {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n",
-            "        {%- set ns.multi_step_tool = false %}\n",
-            "        {%- set ns.last_query_index = index %}\n",
-            "    {%- endif %}\n",
-            "{%- endfor %}\n",
-            "{%- for message in messages %}\n",
-            "    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n",
-            "        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n",
-            "    {%- elif message.role == \"assistant\" %}\n",
-            "        {%- set content = message.content %}\n",
-            "        {%- set reasoning_content = '' %}\n",
-            "        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n",
-            "            {%- set reasoning_content = message.reasoning_content %}\n",
-            "        {%- else %}\n",
-            "            {%- if '</think>' in message.content %}\n",
-            "                {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n",
-            "                {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n",
-            "            {%- endif %}\n",
-            "        {%- endif %}\n",
-            "        {%- if loop.index0 > ns.last_query_index %}\n",
-            "            {%- if loop.last or (not loop.last and reasoning_content) %}\n",
-            "                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n",
-            "            {%- else %}\n",
-            "                {{- '<|im_start|>' + message.role + '\\n' + content }}\n",
-            "            {%- endif %}\n",
-            "        {%- else %}\n",
-            "            {{- '<|im_start|>' + message.role + '\\n' + content }}\n",
-            "        {%- endif %}\n",
-            "        {%- if message.tool_calls %}\n",
-            "            {%- for tool_call in message.tool_calls %}\n",
-            "                {%- if (loop.first and content) or (not loop.first) %}\n",
-            "                    {{- '\\n' }}\n",
-            "                {%- endif %}\n",
-            "                {%- if tool_call.function %}\n",
-            "                    {%- set tool_call = tool_call.function %}\n",
-            "                {%- endif %}\n",
-            "                {{- '<tool_call>\\n{\"name\": \"' }}\n",
-            "                {{- tool_call.name }}\n",
-            "                {{- '\", \"arguments\": ' }}\n",
-            "                {%- if tool_call.arguments is string %}\n",
-            "                    {{- tool_call.arguments }}\n",
-            "                {%- else %}\n",
-            "                    {{- tool_call.arguments | tojson }}\n",
-            "                {%- endif %}\n",
-            "                {{- '}\\n</tool_call>' }}\n",
-            "            {%- endfor %}\n",
-            "        {%- endif %}\n",
-            "        {{- '<|im_end|>\\n' }}\n",
-            "    {%- elif message.role == \"tool\" %}\n",
-            "        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n",
-            "            {{- '<|im_start|>user' }}\n",
-            "        {%- endif %}\n",
-            "        {{- '\\n<tool_response>\\n' }}\n",
-            "        {{- message.content }}\n",
-            "        {{- '\\n</tool_response>' }}\n",
-            "        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n",
-            "            {{- '<|im_end|>\\n' }}\n",
-            "        {%- endif %}\n",
-            "    {%- endif %}\n",
-            "{%- endfor %}\n",
-            "{%- if add_generation_prompt %}\n",
-            "    {{- '<|im_start|>assistant\\n' }}\n",
-            "    {%- if enable_thinking is defined and enable_thinking is false %}\n",
-            "        {{- '<think>\\n\\n</think>\\n\\n' }}\n",
-            "    {%- endif %}\n",
-            "{%- endif %}\n",
-            "---\u001b[39m\n"
-          ]
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "258b7c635c1045329d4669e48c46ccd5",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Tokenizing Prompts (num_proc=2):   0%|          | 0/9985 [00:00<?, ? examples/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "[2025-05-08 13:42:09,195] [INFO] [axolotl.utils.data.utils.drop_long_seq_in_dataset:177] [PID:174] [RANK:0] min_input_len: 23\u001b[39m\n",
-            "[2025-05-08 13:42:09,196] [INFO] [axolotl.utils.data.utils.drop_long_seq_in_dataset:179] [PID:174] [RANK:0] max_input_len: 3380\u001b[39m\n"
-          ]
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "0f6907ebbc6242c8bde059cef1e1bd29",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Dropping Long Sequences (num_proc=2):   0%|          | 0/9985 [00:00<?, ? examples/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "ef0a3c7a6f14460fb4da096928ae249e",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Drop Samples with Zero Trainable Tokens (num_proc=2):   0%|          | 0/9985 [00:00<?, ? examples/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "dc892a596f6942d7973c616c38f0eebb",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Add position_id column (Sample Packing) (num_proc=2):   0%|          | 0/9985 [00:00<?, ? examples/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "[2025-05-08 13:42:21,651] [INFO] [axolotl.utils.data.sft.load_tokenized_prepared_datasets:351] [PID:174] [RANK:0] Saving merged prepared dataset to disk... last_run_prepared/97037817611d38b3a9c681753c3c4c95\u001b[39m\n"
-          ]
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "7c2485c6cdfe463da6fdb35982a1070d",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Saving the dataset (0/1 shards):   0%|          | 0/9985 [00:00<?, ? examples/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "[2025-05-08 13:42:25,711] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:411] [PID:174] [RANK:0] gather_len_batches: [1540]\u001b[39m\n",
-            "[2025-05-08 13:42:25,714] [INFO] [axolotl.calc_sample_packing_eff_est:491] [PID:174] [RANK:0] sample_packing_eff_est across ranks: [0.9987832601968344]\u001b[39m\n"
-          ]
-        }
-      ],
-      "source": [
-        "from axolotl.common.datasets import load_datasets\n",
-        "\n",
-        "# Load, parse and tokenize the datasets to be formatted with qwen3 chat template\n",
-        "# Drop long samples from the dataset that overflow the max sequence length\n",
-        "dataset_meta = load_datasets(cfg=cfg)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mrSNfHpk0EAe"
-      },
-      "source": [
-        "# Training\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 1000,
-          "referenced_widgets": [
-            "004d9177a6a14118a5930dc3cc13147b",
-            "a80410b919e442c49aea15acc1ce1a72",
-            "c6e00f5224364822bc4239b176686919",
-            "ec11d1e5ae7b42c883d9b1f38a65356e",
-            "734185351eb543fa9a00a881dcbb9fe7",
-            "fa1282ccc7544e4f818e2f03ccffe4a5",
-            "bbbf575d2a4b4c6ea8389be79b2a6039",
-            "2a51b36be41745468e4c2d7a21b1c0d2",
-            "4fd114abe9f5494ab59858949f5055f1",
-            "936d04b5fe1b4c63bf0b080e423d051b",
-            "f1cef8e8dc2646fb9fd09f3b09081074",
-            "cdebbc55a1164c018546c2ac6f8c620c",
-            "a44f630e099e43899f20a77084ae60cd",
-            "c3725c7f79fe415fbd1ea336f0cc9cf1",
-            "0e50870ed0c643e0b6c18cc5d7ddae7f",
-            "c33ced495f70464aa4a3a91922090853",
-            "ed5ca967ad5342929e578ac6aa4dc4c0",
-            "af401d117d5047629d3a6e2361757b62",
-            "b191ac001a2e4962bc9a245fcdf26e6b",
-            "054c8dffadba48c6b895a6cc62448ecc",
-            "bfcdbba993b74972a9e3e575f86908ff",
-            "6ebb2ec171414e47a14765505f64bb3c",
-            "500e272208a246089613bf788a165271",
-            "200df5e79b9244849e589ecb0250a520",
-            "cc94432d08464affa3e58b560bdad194",
-            "3036608c71904ce9ae4bb2a9fa8802d9",
-            "adacfdcc1b0140efac56918e9ccf064e",
-            "f4a1795dc7514a718f478245f521f0ba",
-            "5e746eb25bbe416fb585fa24e79f5177",
-            "b5b65414154544aa8a71b1a39164aad7",
-            "f0a58fbd0fca4340890041f99fa2f8c8",
-            "5ca6be24acb548cea130bd58e9954c7c",
-            "5cfb02ee044b4011a378efa8b54a370f",
-            "4d05314858354e729d76094b3b0ce761",
-            "c42acf646f344a88b8c11f81e67f7206",
-            "7be6f04c284e4326bb4ff3d301e7b3c6",
-            "ffdbb12a2f2c4d14911685e7683e0ef0",
-            "bee3501b2a17427784a717e50a85e7fa",
-            "8bc9d8ba866c442b9118d9630009939c",
-            "9f56a2d9979c4bd8928c644c22c3ecdf",
-            "9503a45960984adc97b58e16c50662e0",
-            "da6e93f3e4984780b930fe7a706983ea",
-            "ab93eabd7cea4b94b4b7a387f101e8a1",
-            "704f2f5a9b1c49d5a75a0025a5dda11b",
-            "dd0e646fad3f4a89ba23b39d162bd8d9",
-            "d43c6df07ddb466587807d6dbe1ff614",
-            "e0e8b840b8ea4d0d9db09afe99fa287d",
-            "9327977822be4b1294f80e876552e305",
-            "77304d1a46b3468a98483e02ec0ac4a4",
-            "8c4d4fc5a30f4e7cb3be53fe2adda33d",
-            "e90658f4bcb642baa78426012f863152",
-            "f7434f3e03124a1c938a39af79d7fa59",
-            "c1314f241a434c41b45d84dc4d3b30f8",
-            "37de928300e34184881039378bd75e7f",
-            "0e936d9dbf9c4fdd86bbfe9730dedc47",
-            "e21e180307e5485cbbe908672fd6639a",
-            "2e2b0c1599c341a198f632f46a40c90e",
-            "bff139df987d4a62abec6456cb27f3d4",
-            "ebe1cc366d324ad59b264c8b3c431441",
-            "114dece49dba437c8572ef94b23c3b1e",
-            "be724f04b03942b2a033a7e8898bb4fd",
-            "fcbab4d8dced41a18dfccce81e3a45a0",
-            "c1f9c267ba3f40039cdb5eb3267e8043",
-            "33b3b1d0295646edaac7b4822761aeb0",
-            "fba7aa824b38467ab3061b226114cdec",
-            "f3075dccbd2747b4a7913b66f44f2596",
-            "fe18bba7f3fb4c31bf840541f36b3425",
-            "fd4f333f7ece4450b04e1a9af1f9d2f6",
-            "f60a2bdb6b6b4e0e8c3508580e247132",
-            "c0892a1881de4eb4bfabc6a68f87ae99",
-            "1bec6297c90242a88672d195bc09d429",
-            "d1f9b10c130542f094c8fd3d1e23b5e9",
-            "e575d87a7efe4ec7b1efde489839d4a6",
-            "edc99591b9c747b689b94d0052fec14c",
-            "35cc989ca3374e7dba0cb166febc4bde",
-            "158c8b85dbf34de6a94b4e35e2fc7d5a",
-            "0b4c9753a7cb4354b8e5f187e6e1ad7c",
-            "4471ff62258549fba9514bb67050f965",
-            "9cd5211b5d8b457aa0002f1d17b80028",
-            "19127c7bb1554ccbac877059f9a82db0",
-            "f4667818b9d34a09891cd727a429a610",
-            "9ed02dc43412471a9ab47f3620ccf3a5",
-            "6932489232ec4ab18a160b1e7fbcdfe1",
-            "4540927d98f54466b434ba4c0edf045d",
-            "e400cbf14bcc446a9d33b210cd93550b",
-            "71002199df6b40c9a1ac40df5fb27a1b",
-            "4b27c267393640f28f6eae0875bd2ed9",
-            "9858cb74a09748a39e8149baac96702c",
-            "eb1c9535e6a546098b760528b2ea387c",
-            "18357b321ce44d7b8bd9d1c886f69275",
-            "279937fe03bc4e4eb25b472d7e9df163",
-            "bca2c7185b6749fd899c06a2ba4c5e46",
-            "1f7d30f71bbd4547a9150d21da071055",
-            "e366ae3fceec4566b9ed303d6c5f90af",
-            "5dd7d150dbe04f08b165ce7f2c27cd11",
-            "b634bb73cfa743d09a5999101b840976",
-            "742b1030acfd414bbd9d5327b7e3826d",
-            "0f480e3a0b0a45d2a2d2dec3cad923f3",
-            "fcb30372e7404c5d8a1ad4df91e6c7b2",
-            "2860e3bb3baf4f7da058465850e800c5",
-            "3efd18ea8eaa41918894883da9541bfa",
-            "e09f1bcbb9d94c09be53e5e1303642c2",
-            "82177df57a494de8900c14c2f5185175",
-            "ccfcdc95baf646f8aeb3d516742383f2",
-            "8f5bd719974e41c3a8dd9a5b0d3d71e6",
-            "b87c84de30e84b3abf4871461fb9cbd3",
-            "e7d8e4fe58384e93a106de546068c65e",
-            "0aa8ab56b85f4171a79c3bc210594025",
-            "67da6c4260574869aa24c3cbc1bc1654",
-            "94b9088614464f60a203de39dbcae853",
-            "fea1b70fb46745feb5111b3929175b5d",
-            "f365820a3d3c42b2948abfe32065de14",
-            "823f1c78f15043e38bbd4dca3932a86a",
-            "a1959759c5424da9961fb2a308d4dee4",
-            "34c9c0137b504cd799c6bd6de69507c2",
-            "735d4f225b24414294fc1b213c61223c",
-            "5e5e15b0569b474c9620083b3ec6af55",
-            "03a3c744d716431488163b4358b80f92",
-            "a5434ee714f9498d83870544b67c0cb7",
-            "3aaecbf540f54a2db9ab0931e3b1fe57",
-            "9e333ed3b5014069ac1dd969255dd591"
-          ]
-        },
-        "id": "IwrpurmloGOy",
-        "outputId": "84fa167f-ba27-4255-d508-dc9df56ad39b"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "     #@@ #@@      @@# @@#\n",
-            "    @@  @@          @@  @@           =@@#                               @@                 #@    =@@#.\n",
-            "    @@    #@@@@@@@@@    @@           #@#@=                              @@                 #@     .=@@\n",
-            "      #@@@@@@@@@@@@@@@@@            =@# @#     ##=     ##    =####=+    @@      =#####+  =#@@###.   @@\n",
-            "    @@@@@@@@@@/  +@@/  +@@          #@  =@=     #@=   @@   =@#+  +#@#   @@    =@#+  +#@#   #@.      @@\n",
-            "    @@@@@@@@@@  ##@@  ##@@         =@#   @#      =@# @#    @@      @@   @@    @@      #@   #@       @@\n",
-            "     @@@@@@@@@@@@@@@@@@@@          #@=+++#@=      =@@#     @@      @@   @@    @@      #@   #@       @@\n",
-            "                                  =@#=====@@     =@# @#    @@      @@   @@    @@      #@   #@       @@\n",
-            "    @@@@@@@@@@@@@@@@  @@@@        #@      #@=   #@=  +@@   #@#    =@#   @@.   =@#    =@#   #@.      @@\n",
-            "                                 =@#       @#  #@=     #@   =#@@@@#=    +#@@=  +#@@@@#=    .##@@+   @@\n",
-            "    @@@@  @@@@@@@@@@@@@@@@\n",
-            "\n",
-            "[2025-05-07 22:08:14,344] [INFO] [axolotl.monkeypatch.peft.utils.patch_peft_prep_code:76] [PID:1336] [RANK:0] patching prepare_model_for_kbit_training to allow for overrides\u001b[39m\n",
-            "[2025-05-07 22:08:14,549] [INFO] [axolotl.integrations.cut_cross_entropy.pre_model_load:80] [PID:1336] [RANK:0] Applying Cut Cross Entropy to model type: qwen3\u001b[39m\n"
-          ]
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "004d9177a6a14118a5930dc3cc13147b",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "model.safetensors.index.json:   0%|          | 0.00/36.5k [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "cdebbc55a1164c018546c2ac6f8c620c",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "model-00001-of-00008.safetensors:   0%|          | 0.00/3.84G [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "500e272208a246089613bf788a165271",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "model-00002-of-00008.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "4d05314858354e729d76094b3b0ce761",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "model-00003-of-00008.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "dd0e646fad3f4a89ba23b39d162bd8d9",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "model-00004-of-00008.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "e21e180307e5485cbbe908672fd6639a",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "model-00005-of-00008.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "fe18bba7f3fb4c31bf840541f36b3425",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "model-00006-of-00008.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "4471ff62258549fba9514bb67050f965",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "model-00007-of-00008.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "eb1c9535e6a546098b760528b2ea387c",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "model-00008-of-00008.safetensors:   0%|          | 0.00/1.91G [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "[2025-05-07 22:09:49,798] [INFO] [accelerate.utils.modeling.get_balanced_memory:990] [PID:1336] We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).\n"
-          ]
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "2860e3bb3baf4f7da058465850e800c5",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "fea1b70fb46745feb5111b3929175b5d",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "[2025-05-07 22:11:37,521] [INFO] [axolotl.utils.models.load_model:1302] [PID:1336] [RANK:0] cuda memory usage after model load: 9.264GB (+1.721GB cache, +0.375GB misc)\u001b[39m\n",
-            "[2025-05-07 22:11:37,532] [INFO] [axolotl.utils.models.prepare_model:1205] [PID:1336] [RANK:0] converting PEFT model w/ prepare_model_for_kbit_training\u001b[39m\n",
-            "[2025-05-07 22:11:37,537] [INFO] [axolotl.utils.models.load_model:1341] [PID:1336] [RANK:0] Converting modules to torch.float16\u001b[39m\n",
-            "trainable params: 128,450,560 || all params: 14,896,757,760 || trainable%: 0.8623\n",
-            "[2025-05-07 22:11:40,170] [INFO] [axolotl.utils.models.load_model:1402] [PID:1336] [RANK:0] cuda memory usage after adapters: 9.743GB (+1.476GB cache, +0.375GB misc)\u001b[39m\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/usr/local/lib/python3.11/dist-packages/axolotl/core/trainers/base.py:64: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `AxolotlTrainer.__init__`. Use `processing_class` instead.\n",
-            "  super().__init__(*_args, **kwargs)\n",
-            "No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "[2025-05-07 22:11:41,755] [INFO] [axolotl.train.save_initial_configs:359] [PID:1336] [RANK:0] Pre-saving adapter config to ./outputs/qwen-sft-pirate-rrr...\u001b[39m\n",
-            "[2025-05-07 22:11:41,756] [INFO] [axolotl.train.save_initial_configs:363] [PID:1336] [RANK:0] Pre-saving tokenizer to ./outputs/qwen-sft-pirate-rrr...\u001b[39m\n",
-            "[2025-05-07 22:11:41,974] [INFO] [axolotl.train.save_initial_configs:366] [PID:1336] [RANK:0] Pre-saving model config to ./outputs/qwen-sft-pirate-rrr...\u001b[39m\n",
-            "[2025-05-07 22:11:41,982] [INFO] [axolotl.train.execute_training:211] [PID:1336] [RANK:0] Starting trainer...\u001b[39m\n",
-            "[2025-05-07 22:11:45,047] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:411] [PID:1336] [RANK:0] gather_len_batches: [1540]\u001b[39m\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "You're using a Qwen2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n",
-            "You're using a Qwen2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
-          ]
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='25' max='25' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [25/25 09:25, Epoch 0/1]\n",
-              "    </div>\n",
-              "    <table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              " <tr style=\"text-align: left;\">\n",
-              "      <th>Step</th>\n",
-              "      <th>Training Loss</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <td>1</td>\n",
-              "      <td>1.092300</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2</td>\n",
-              "      <td>1.554200</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>3</td>\n",
-              "      <td>1.041400</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>4</td>\n",
-              "      <td>1.733800</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>5</td>\n",
-              "      <td>1.430000</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>6</td>\n",
-              "      <td>1.258500</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>7</td>\n",
-              "      <td>1.343600</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>8</td>\n",
-              "      <td>1.101700</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>9</td>\n",
-              "      <td>1.086500</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>10</td>\n",
-              "      <td>0.813200</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>11</td>\n",
-              "      <td>0.689600</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>12</td>\n",
-              "      <td>0.826700</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>13</td>\n",
-              "      <td>1.541800</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>14</td>\n",
-              "      <td>0.948000</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>15</td>\n",
-              "      <td>1.357000</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>16</td>\n",
-              "      <td>1.085800</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>17</td>\n",
-              "      <td>1.516800</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>18</td>\n",
-              "      <td>1.146800</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>19</td>\n",
-              "      <td>0.834800</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>20</td>\n",
-              "      <td>0.968000</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>21</td>\n",
-              "      <td>1.388800</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>22</td>\n",
-              "      <td>1.511500</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>23</td>\n",
-              "      <td>1.338500</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>24</td>\n",
-              "      <td>1.206600</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>25</td>\n",
-              "      <td>1.504600</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table><p>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "[2025-05-07 22:12:42,746] [INFO] [axolotl.callbacks.on_step_end:128] [PID:1336] [RANK:0] cuda memory usage while training: 9.768GB (+3.287GB cache, +0.646GB misc)\u001b[39m\n",
-            "[2025-05-07 22:21:46,859] [INFO] [axolotl.train.save_trained_model:231] [PID:1336] [RANK:0] Training completed! Saving pre-trained model to ./outputs/qwen-sft-pirate-rrr.\u001b[39m\n"
-          ]
-        }
-      ],
-      "source": [
-        "from axolotl.train import train\n",
-        "\n",
-        "# just train the first 25 steps for demo.\n",
-        "# This is sufficient to align the model as we've used packing to maximize the trainable samples per step.\n",
-        "cfg.max_steps = 25\n",
-        "model, tokenizer, trainer = train(cfg=cfg, dataset_meta=dataset_meta)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "j1b9ypF78eCb"
-      },
-      "source": [
-        "# Inferencing the trained model"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "r3_vHhif8YEs",
-        "outputId": "e5050605-f6c9-421c-98f9-bde56a281eae"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Ahoy there, matey! Shiver me timbers, ye be lookin' for the Pythagorean theorem, eh? Well, hold yer horses and listen up, for I'll be tellin' ye all about it in me own special way.\n",
-            "\n",
-            "The Pythagorean theorem be a real gem of a mathematical trick that helps ye find the length of a side of a right triangle. Now, a right triangle be a triangle with a right angle, which be that little corner that looks like a square. \n",
-            "\n",
-            "The theorem be named after a clever fellow named Pythagoras, who be a mathematician from ancient Greece. He discovered that if ye have a right triangle, the square of the length of the hypotenuse (that be the side opposite the right angle) be equal to the sum of the squares of the other two sides. \n",
-            "\n",
-            "In other words, if ye have a triangle with sides of length a, b, and c (\n"
-          ]
-        }
-      ],
-      "source": [
-        "import torch\n",
-        "from transformers import TextStreamer\n",
-        "\n",
-        "messages = [\n",
-        "    {\n",
-        "        \"role\": \"user\",\n",
-        "        \"content\": \"Explain the Pythagorean theorem to me.\",\n",
-        "    },\n",
-        "]\n",
-        "\n",
-        "prompt = tokenizer.apply_chat_template(\n",
-        "    messages,\n",
-        "    add_generation_prompt=True,\n",
-        "    tokenize=False,\n",
-        "    enable_thinking = False,\n",
-        ")\n",
-        "\n",
-        "outputs = model.generate(\n",
-        "    **tokenizer(prompt, return_tensors = \"pt\").to(\"cuda\"),\n",
-        "    max_new_tokens = 192,\n",
-        "    temperature = 1.0, top_p = 0.8, top_k = 32,\n",
-        "    streamer = TextStreamer(tokenizer, skip_prompt = True),\n",
-        ")\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "HoGwT2JRSIjA"
-      },
-      "source": [
-        "# Saving your trained model\n",
-        "\n",
-        "Axolotl automatically saves checkpoints to the `output_dir` path.\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "5BmSbiy6NaaS",
-        "outputId": "f5e1d913-7d55-42d2-8340-f9f1b0bc2b38"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "total 506M\n",
-            "-rw-r--r-- 1 root root  845 May  7 22:21 adapter_config.json\n",
-            "-rw-r--r-- 1 root root 491M May  7 22:21 adapter_model.safetensors\n",
-            "-rw-r--r-- 1 root root  707 May  7 22:11 added_tokens.json\n",
-            "drwxr-xr-x 2 root root 4.0K May  7 22:17 checkpoint-13\n",
-            "drwxr-xr-x 2 root root 4.0K May  7 22:21 checkpoint-25\n",
-            "-rw-r--r-- 1 root root 1.2K May  7 22:11 config.json\n",
-            "-rw-r--r-- 1 root root 1.6M May  7 22:11 merges.txt\n",
-            "-rw-r--r-- 1 root root 2.6K May  7 22:21 README.md\n",
-            "-rw-r--r-- 1 root root  613 May  7 22:11 special_tokens_map.json\n",
-            "-rw-r--r-- 1 root root 9.5K May  7 22:11 tokenizer_config.json\n",
-            "-rw-r--r-- 1 root root  11M May  7 22:11 tokenizer.json\n",
-            "-rw-r--r-- 1 root root 2.7M May  7 22:11 vocab.json\n"
-          ]
-        }
-      ],
-      "source": [
-        "# Show the saved checkpoints in the output_dir\n",
-        "!ls -lh \"./outputs/qwen-sft-pirate-rrr\""
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "_PCIFWxuOZd6"
-      },
-      "source": [
-        "Setting `hub_model_id: ` in the original config would have automatically uploaded the model to HuggingFace Hub (e.g. `hub_model_id: username/model_id`)\n",
-        "\n",
-        "If you prefer to manually upload the training artifacts, we can still upload the entire final checkpoint to HuggingFace from the CLI."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 955,
-          "referenced_widgets": [
-            "c12ea43372ac4d57bb9605f1a429b397",
-            "86816687746246b4a6105e8010384e25",
-            "6f05e9bebf7b40c9835808e77de6c236",
-            "c7433acd3c4841e6958ae8f7e87b1808",
-            "19c1e38389fa46c7b7e2152a56e1df34",
-            "0e067d8db8ed48308a718d5f57683fd1",
-            "131065f118274a1586ac38e39ed84ef0",
-            "8640ac440fbc4644b9a3af7ba3ae7183",
-            "5cea7996f02040b187ece0bb2d6a8d1f",
-            "2e257c8be2da40b4bb67a9e4ab6811f3",
-            "56e3768bef5a4b9db4168c5c17f509c2",
-            "62c028fdef904dedb9cdeca2b3bda725",
-            "a7cf477e80fc43e0ad82c7997b076dce",
-            "835bcc28a5564fb9b3d651bc8e32dc46",
-            "9f1c9a0695384bdaa6f8b847ef89bee8",
-            "b1bea589efa14258a9982071b87938bf",
-            "590eef89881545aa8bbef9a8bbe7fb00",
-            "4b1f04ff63d14a118fdd15814dff50e4",
-            "39789237703c4a418134243055c9cbf5",
-            "a3a945817f684328b34651fe052393ec"
-          ]
-        },
-        "id": "2yw8pLvlSMl8",
-        "outputId": "6e489ab2-4abe-4e28-84ca-959f912433a4"
-      },
-      "outputs": [
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "c12ea43372ac4d57bb9605f1a429b397",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.\n",
-            "Start hashing 40 files.\n",
-            "Finished hashing 40 files.\n",
-            "Uploading files using Xet Storage..\n",
-            "Uploading...:  87% 1.82G/2.10G [00:23<00:04, 67.3MB/s]Cancellation requested; stopping current tasks.\n",
-            "Traceback (most recent call last):\n",
-            "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/_commit_api.py\", line 598, in _upload_xet_files\n",
-            "    upload_files(\n",
-            "RuntimeError: Xet Runtime Error: Task cancelled; possible runtime shutdown in progress (task 9 was cancelled).\n",
-            "\n",
-            "During handling of the above exception, another exception occurred:\n",
-            "\n",
-            "Traceback (most recent call last):\n",
-            "  File \"/usr/local/bin/huggingface-cli\", line 8, in <module>\n",
-            "    sys.exit(main())\n",
-            "             ^^^^^^\n",
-            "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/commands/huggingface_cli.py\", line 57, in main\n",
-            "    service.run()\n",
-            "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/commands/upload.py\", line 207, in run\n",
-            "    print(self._upload())\n",
-            "          ^^^^^^^^^^^^^^\n",
-            "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/commands/upload.py\", line 302, in _upload\n",
-            "    return self.api.upload_folder(\n",
-            "           ^^^^^^^^^^^^^^^^^^^^^^^\n",
-            "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_validators.py\", line 114, in _inner_fn\n",
-            "    return fn(*args, **kwargs)\n",
-            "           ^^^^^^^^^^^^^^^^^^^\n",
-            "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/hf_api.py\", line 1633, in _inner\n",
-            "    return fn(self, *args, **kwargs)\n",
-            "           ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
-            "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/hf_api.py\", line 4942, in upload_folder\n",
-            "    commit_info = self.create_commit(\n",
-            "                  ^^^^^^^^^^^^^^^^^^^\n",
-            "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_validators.py\", line 114, in _inner_fn\n",
-            "    return fn(*args, **kwargs)\n",
-            "           ^^^^^^^^^^^^^^^^^^^\n",
-            "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/hf_api.py\", line 1633, in _inner\n",
-            "    return fn(self, *args, **kwargs)\n",
-            "           ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
-            "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/hf_api.py\", line 4202, in create_commit\n",
-            "    self.preupload_lfs_files(\n",
-            "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/hf_api.py\", line 4483, in preupload_lfs_files\n",
-            "    _upload_xet_files(**upload_kwargs, create_pr=create_pr)  # type: ignore [arg-type]\n",
-            "    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
-            "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_validators.py\", line 114, in _inner_fn\n",
-            "    return fn(*args, **kwargs)\n",
-            "           ^^^^^^^^^^^^^^^^^^^\n",
-            "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/_commit_api.py\", line 592, in _upload_xet_files\n",
-            "    with progress_cm as progress:\n",
-            "  File \"/usr/local/lib/python3.11/dist-packages/tqdm/std.py\", line 1138, in __exit__\n",
-            "    def __exit__(self, exc_type, exc_value, traceback):\n",
-            "\n",
-            "KeyboardInterrupt\n",
-            "^C\n"
-          ]
-        }
-      ],
-      "source": [
-        "from huggingface_hub import notebook_login\n",
-        "# remove the partial epoch checkpoints\n",
-        "!rm -rf \"./outputs/qwen-sft-pirate-rrr/checkpoint-*\"\n",
-        "\n",
-        "# HF Notebook login widget\n",
-        "notebook_login()\n",
-        "\n",
-        "# upload the LoRA adapter for your model to HF, remember to update the username/model-name below\n",
-        "!huggingface-cli upload --repo-type=model winglian/pirate-qwen-14B \"./outputs/qwen-sft-pirate-rrr\""
-      ]
-    }
-  ],
-  "metadata": {
-    "accelerator": "GPU",
-    "colab": {
-      "gpuType": "T4",
-      "provenance": []
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python"
-    },
-    "widgets": {
-      "application/vnd.jupyter.widget-state+json": {
-        "00321cce58884f6f9b3855a21fcd9187": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "004d9177a6a14118a5930dc3cc13147b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_a80410b919e442c49aea15acc1ce1a72",
-              "IPY_MODEL_c6e00f5224364822bc4239b176686919",
-              "IPY_MODEL_ec11d1e5ae7b42c883d9b1f38a65356e"
-            ],
-            "layout": "IPY_MODEL_734185351eb543fa9a00a881dcbb9fe7"
-          }
-        },
-        "0077aedc3d174560bce924ee89e9c006": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "03a3c744d716431488163b4358b80f92": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "03b093d592ba4386aa61f7b8483da660": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_b8766a88716948cf968f4563531a76d9",
-              "IPY_MODEL_6f3a28b912714c6e931003549664bfa3",
-              "IPY_MODEL_16d1283741404b7bb319094c992fce01"
-            ],
-            "layout": "IPY_MODEL_2a5bb0e818ab47be8cf6465988328503"
-          }
-        },
-        "042e091f75694c47aee761e760e76773": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "0546d04aae644dde846c58a4afb598a6": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "054c8dffadba48c6b895a6cc62448ecc": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "07fb3a2c8315494e97b447e672dfae06": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_12815f401eba44658caa7b2e490137a8",
-            "placeholder": "​",
-            "style": "IPY_MODEL_30e02aa2d0d241979369e598287f2639",
-            "value": "Drop Samples with Zero Trainable Tokens (num_proc=2): 100%"
-          }
-        },
-        "083f9cda8d754c168beee10d2f8955a2": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_a0a11e929edd4189b79723d618522c33",
-            "max": 728,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_e87ea87fcff247b5bbcc331ba79a8dc2",
-            "value": 728
-          }
-        },
-        "09007681cf8d42aeb8c1d2f6a74e470a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_b195f160ca20442fadd8b5aed0ee41af",
-            "placeholder": "​",
-            "style": "IPY_MODEL_ca65e32eb52f48c09a84b33cb18f22cd",
-            "value": " 11.4M/11.4M [00:00&lt;00:00, 21.8MB/s]"
-          }
-        },
-        "0a46ad75c198463d843fb35e813642cb": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_b8e39e4dddc3497fbc29ae45c66da759",
-            "max": 11422654,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_63b4e563e85c4f03b1b72beda9577bcc",
-            "value": 11422654
-          }
-        },
-        "0aa8ab56b85f4171a79c3bc210594025": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "0b4c9753a7cb4354b8e5f187e6e1ad7c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "0cd7efffbb3c4c4b972e63749f61ab97": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "0dea5caa27384f5689e3cab51f558727": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "0e067d8db8ed48308a718d5f57683fd1": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_b1bea589efa14258a9982071b87938bf",
-            "placeholder": "​",
-            "style": "IPY_MODEL_590eef89881545aa8bbef9a8bbe7fb00",
-            "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
-          }
-        },
-        "0e50870ed0c643e0b6c18cc5d7ddae7f": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_bfcdbba993b74972a9e3e575f86908ff",
-            "placeholder": "​",
-            "style": "IPY_MODEL_6ebb2ec171414e47a14765505f64bb3c",
-            "value": " 3.84G/3.84G [00:09&lt;00:00, 664MB/s]"
-          }
-        },
-        "0e936d9dbf9c4fdd86bbfe9730dedc47": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "0f417447a7bd4a33acca96fa37aec877": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "0f480e3a0b0a45d2a2d2dec3cad923f3": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "0f6907ebbc6242c8bde059cef1e1bd29": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_5bdfd87fc6cd4f9dabef7cfee29c8060",
-              "IPY_MODEL_64f54d4a744a4627a07c3c0120276f3b",
-              "IPY_MODEL_65b75b9b8bc143cf997796af68ff6668"
-            ],
-            "layout": "IPY_MODEL_d6fe74e4255444368f8f90a62157d869"
-          }
-        },
-        "114dece49dba437c8572ef94b23c3b1e": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "12815f401eba44658caa7b2e490137a8": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "12b56912736849fea2ad8124456fdc5c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_97e36007e1304e1583fd81bfb13f0edd",
-            "max": 1671853,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_c65dc74c7d6f4bab8f7dd28455161dd8",
-            "value": 1671853
-          }
-        },
-        "131065f118274a1586ac38e39ed84ef0": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": "center",
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": "flex",
-            "flex": null,
-            "flex_flow": "column",
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": "50%"
-          }
-        },
-        "158c8b85dbf34de6a94b4e35e2fc7d5a": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "16a188a0b06d45f980dcf3933509fe0a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_349eee9f56d64f0cba6fc24ff2c50c9b",
-            "placeholder": "​",
-            "style": "IPY_MODEL_7e5d3774060e4589aa65982da5ea4ef4",
-            "value": " 9985/9985 [00:04&lt;00:00, 2604.11 examples/s]"
-          }
-        },
-        "16d1283741404b7bb319094c992fce01": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_a4e5789584564049b83df7c6c54a3e08",
-            "placeholder": "​",
-            "style": "IPY_MODEL_ff3a94b146a948b6907f5d80c7157f99",
-            "value": " 9985/0 [00:00&lt;00:00, 50763.46 examples/s]"
-          }
-        },
-        "1811cda0644e4190a9469d1774435d82": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "18357b321ce44d7b8bd9d1c886f69275": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_e366ae3fceec4566b9ed303d6c5f90af",
-            "placeholder": "​",
-            "style": "IPY_MODEL_5dd7d150dbe04f08b165ce7f2c27cd11",
-            "value": "model-00008-of-00008.safetensors: 100%"
-          }
-        },
-        "19127c7bb1554ccbac877059f9a82db0": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "danger",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_e400cbf14bcc446a9d33b210cd93550b",
-            "max": 3963750880,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_71002199df6b40c9a1ac40df5fb27a1b",
-            "value": 3963750502
-          }
-        },
-        "19c1e38389fa46c7b7e2152a56e1df34": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ButtonModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ButtonModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ButtonView",
-            "button_style": "",
-            "description": "Login",
-            "disabled": false,
-            "icon": "",
-            "layout": "IPY_MODEL_835bcc28a5564fb9b3d651bc8e32dc46",
-            "style": "IPY_MODEL_9f1c9a0695384bdaa6f8b847ef89bee8",
-            "tooltip": ""
-          }
-        },
-        "1bec6297c90242a88672d195bc09d429": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "1c6f1f10667545aaab958016ba7e2c94": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "1d5117195d4b49eb8f1a73b18419f7ce": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_0dea5caa27384f5689e3cab51f558727",
-            "placeholder": "​",
-            "style": "IPY_MODEL_a6f48410b9964fefba0c3009a77dc838",
-            "value": " 9.68k/9.68k [00:00&lt;00:00, 812kB/s]"
-          }
-        },
-        "1f7d30f71bbd4547a9150d21da071055": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "200df5e79b9244849e589ecb0250a520": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_f4a1795dc7514a718f478245f521f0ba",
-            "placeholder": "​",
-            "style": "IPY_MODEL_5e746eb25bbe416fb585fa24e79f5177",
-            "value": "model-00002-of-00008.safetensors: 100%"
-          }
-        },
-        "20352e5f58d24bb8b1f3940efd14fe4a": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "253017b0d0534e54ab44e181f6d7c82d": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_1c6f1f10667545aaab958016ba7e2c94",
-            "placeholder": "​",
-            "style": "IPY_MODEL_e6e969610738449887259063967f82b0",
-            "value": " 2.78M/2.78M [00:00&lt;00:00, 17.8MB/s]"
-          }
-        },
-        "258b7c635c1045329d4669e48c46ccd5": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_6f68ed9889f54ad2ae8a3b95ac263a83",
-              "IPY_MODEL_80366349d81e4dcc892db6cd56e384f3",
-              "IPY_MODEL_c73055099c084dca996159e23e162d0b"
-            ],
-            "layout": "IPY_MODEL_977f799afaac4a55b2dc1cffa7d5b63b"
-          }
-        },
-        "279937fe03bc4e4eb25b472d7e9df163": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "danger",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_b634bb73cfa743d09a5999101b840976",
-            "max": 1912371880,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_742b1030acfd414bbd9d5327b7e3826d",
-            "value": 1912371698
-          }
-        },
-        "27beaf06e41b472abdb544a43c720c5a": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "2860e3bb3baf4f7da058465850e800c5": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_3efd18ea8eaa41918894883da9541bfa",
-              "IPY_MODEL_e09f1bcbb9d94c09be53e5e1303642c2",
-              "IPY_MODEL_82177df57a494de8900c14c2f5185175"
-            ],
-            "layout": "IPY_MODEL_ccfcdc95baf646f8aeb3d516742383f2"
-          }
-        },
-        "2a51b36be41745468e4c2d7a21b1c0d2": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "2a5bb0e818ab47be8cf6465988328503": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "2b3a2659b12244bd8548320320016dbf": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "2e257c8be2da40b4bb67a9e4ab6811f3": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "2e2b0c1599c341a198f632f46a40c90e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_be724f04b03942b2a033a7e8898bb4fd",
-            "placeholder": "​",
-            "style": "IPY_MODEL_fcbab4d8dced41a18dfccce81e3a45a0",
-            "value": "model-00005-of-00008.safetensors: 100%"
-          }
-        },
-        "3036608c71904ce9ae4bb2a9fa8802d9": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_5ca6be24acb548cea130bd58e9954c7c",
-            "placeholder": "​",
-            "style": "IPY_MODEL_5cfb02ee044b4011a378efa8b54a370f",
-            "value": " 3.96G/3.96G [00:10&lt;00:00, 531MB/s]"
-          }
-        },
-        "30a81da86f8043eca301e86a8651201a": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "30e02aa2d0d241979369e598287f2639": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "3225603166b54e7aab766b9964a2f660": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "33b3b1d0295646edaac7b4822761aeb0": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "349eee9f56d64f0cba6fc24ff2c50c9b": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "34c9c0137b504cd799c6bd6de69507c2": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "34cf3df51fbc41cabfdbba153c007f0e": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "35c811d2ae8e43f3b5cecbdd3cfa857f": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "35cc989ca3374e7dba0cb166febc4bde": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "366a343b62fa47d8985a3bd464d99f9e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "37de928300e34184881039378bd75e7f": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "388f618924274d21a066f098f4f1e744": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_7c95f85a2b1f47a1bd846d110c47bb3c",
-              "IPY_MODEL_083f9cda8d754c168beee10d2f8955a2",
-              "IPY_MODEL_62e1a65582f446a78612eaa804e08a7d"
-            ],
-            "layout": "IPY_MODEL_487a177d020f4605834878b2fdc7afa3"
-          }
-        },
-        "39789237703c4a418134243055c9cbf5": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "3aaecbf540f54a2db9ab0931e3b1fe57": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "3c21e4a511b4441192c03b7f1d0976e9": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "3efd18ea8eaa41918894883da9541bfa": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_8f5bd719974e41c3a8dd9a5b0d3d71e6",
-            "placeholder": "​",
-            "style": "IPY_MODEL_b87c84de30e84b3abf4871461fb9cbd3",
-            "value": "Loading checkpoint shards: 100%"
-          }
-        },
-        "41f3b32c2f6b4034ae7a3b9124e28bc7": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "4471ff62258549fba9514bb67050f965": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_9cd5211b5d8b457aa0002f1d17b80028",
-              "IPY_MODEL_19127c7bb1554ccbac877059f9a82db0",
-              "IPY_MODEL_f4667818b9d34a09891cd727a429a610"
-            ],
-            "layout": "IPY_MODEL_9ed02dc43412471a9ab47f3620ccf3a5"
-          }
-        },
-        "4540927d98f54466b434ba4c0edf045d": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "487a177d020f4605834878b2fdc7afa3": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "4b1f04ff63d14a118fdd15814dff50e4": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "LabelModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "LabelModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "LabelView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_39789237703c4a418134243055c9cbf5",
-            "placeholder": "​",
-            "style": "IPY_MODEL_a3a945817f684328b34651fe052393ec",
-            "value": "Connecting..."
-          }
-        },
-        "4b27c267393640f28f6eae0875bd2ed9": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "4c727d40ef0443449afc31724ee79f0c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "4d05314858354e729d76094b3b0ce761": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_c42acf646f344a88b8c11f81e67f7206",
-              "IPY_MODEL_7be6f04c284e4326bb4ff3d301e7b3c6",
-              "IPY_MODEL_ffdbb12a2f2c4d14911685e7683e0ef0"
-            ],
-            "layout": "IPY_MODEL_bee3501b2a17427784a717e50a85e7fa"
-          }
-        },
-        "4d468f96ec924681ad65eb671674b93e": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "4f1977d7e4824ef1a14b65f0f42bba10": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "4fd114abe9f5494ab59858949f5055f1": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "500e272208a246089613bf788a165271": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_200df5e79b9244849e589ecb0250a520",
-              "IPY_MODEL_cc94432d08464affa3e58b560bdad194",
-              "IPY_MODEL_3036608c71904ce9ae4bb2a9fa8802d9"
-            ],
-            "layout": "IPY_MODEL_adacfdcc1b0140efac56918e9ccf064e"
-          }
-        },
-        "519a7b154022443db6703f04a9142bae": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_d02274afd47b462291c745f261209d42",
-            "max": 27341251,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_0f417447a7bd4a33acca96fa37aec877",
-            "value": 27341251
-          }
-        },
-        "56e3768bef5a4b9db4168c5c17f509c2": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "590eef89881545aa8bbef9a8bbe7fb00": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "598da69727bd4fb8b1caf465ac736d7a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "5bdfd87fc6cd4f9dabef7cfee29c8060": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_4d468f96ec924681ad65eb671674b93e",
-            "placeholder": "​",
-            "style": "IPY_MODEL_ad7599de524549c48bf2d3124ad4b299",
-            "value": "Dropping Long Sequences (num_proc=2): 100%"
-          }
-        },
-        "5ca240f31e6b44e3882c5eb37cd5a309": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": "20px"
-          }
-        },
-        "5ca6be24acb548cea130bd58e9954c7c": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "5cea7996f02040b187ece0bb2d6a8d1f": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "5cfb02ee044b4011a378efa8b54a370f": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "5dd7d150dbe04f08b165ce7f2c27cd11": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "5e18768f7ad6434ba8b8b8a2e853e204": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "5e5e15b0569b474c9620083b3ec6af55": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "5e746eb25bbe416fb585fa24e79f5177": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "5eb06edeb58e4930b1affef2a59eae81": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "5f86cd894de94c3280fadc1e2fd0ee13": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_a20927bf5f2c41f58c1e31ac858ab36c",
-              "IPY_MODEL_0a46ad75c198463d843fb35e813642cb",
-              "IPY_MODEL_09007681cf8d42aeb8c1d2f6a74e470a"
-            ],
-            "layout": "IPY_MODEL_ebc80d1a55fa47f4a5ea2756588569ec"
-          }
-        },
-        "60c1a0d765c14a1d888317e6a507e4ea": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "62c028fdef904dedb9cdeca2b3bda725": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "62e1a65582f446a78612eaa804e08a7d": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_5e18768f7ad6434ba8b8b8a2e853e204",
-            "placeholder": "​",
-            "style": "IPY_MODEL_bb33aec33a6447078c31bfd728942994",
-            "value": " 728/728 [00:00&lt;00:00, 20.3kB/s]"
-          }
-        },
-        "62e302ebdad64aada0ffe64ae1c873f3": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "63580b6fb30642479fe3000915bf551a": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "63b4e563e85c4f03b1b72beda9577bcc": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "64f54d4a744a4627a07c3c0120276f3b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_0546d04aae644dde846c58a4afb598a6",
-            "max": 9985,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_897b77a56c09479bb11d7f2a30997e55",
-            "value": 9985
-          }
-        },
-        "65b75b9b8bc143cf997796af68ff6668": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_81c3db71ac704280ad030072655f1537",
-            "placeholder": "​",
-            "style": "IPY_MODEL_042e091f75694c47aee761e760e76773",
-            "value": " 9985/9985 [00:02&lt;00:00, 3977.47 examples/s]"
-          }
-        },
-        "67da6c4260574869aa24c3cbc1bc1654": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "6932489232ec4ab18a160b1e7fbcdfe1": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "6ebb2ec171414e47a14765505f64bb3c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "6f05e9bebf7b40c9835808e77de6c236": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "PasswordModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "PasswordModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "PasswordView",
-            "continuous_update": true,
-            "description": "Token:",
-            "description_tooltip": null,
-            "disabled": false,
-            "layout": "IPY_MODEL_2e257c8be2da40b4bb67a9e4ab6811f3",
-            "placeholder": "​",
-            "style": "IPY_MODEL_56e3768bef5a4b9db4168c5c17f509c2",
-            "value": ""
-          }
-        },
-        "6f3a28b912714c6e931003549664bfa3": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_5ca240f31e6b44e3882c5eb37cd5a309",
-            "max": 1,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_5eb06edeb58e4930b1affef2a59eae81",
-            "value": 1
-          }
-        },
-        "6f68ed9889f54ad2ae8a3b95ac263a83": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_41f3b32c2f6b4034ae7a3b9124e28bc7",
-            "placeholder": "​",
-            "style": "IPY_MODEL_a10d0a76010f4e508c65a9b69ebc5156",
-            "value": "Tokenizing Prompts (num_proc=2): 100%"
-          }
-        },
-        "704f2f5a9b1c49d5a75a0025a5dda11b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "71002199df6b40c9a1ac40df5fb27a1b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "71c8af139cd248b1b51101fd46a93f35": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_d0e9dce55cec4c1ca619a0ccf209d924",
-            "max": 9675,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_4c727d40ef0443449afc31724ee79f0c",
-            "value": 9675
-          }
-        },
-        "734185351eb543fa9a00a881dcbb9fe7": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "735d4f225b24414294fc1b213c61223c": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "742b1030acfd414bbd9d5327b7e3826d": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "77304d1a46b3468a98483e02ec0ac4a4": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "7baeab52d6694c32b1efd1ea1a0a7782": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_93a44a11aa4846fa8efc6c1413ef1627",
-            "placeholder": "​",
-            "style": "IPY_MODEL_a55060adc3564407ac81ad7297d34aaa",
-            "value": "train.jsonl: 100%"
-          }
-        },
-        "7be6f04c284e4326bb4ff3d301e7b3c6": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "danger",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_9503a45960984adc97b58e16c50662e0",
-            "max": 3963750880,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_da6e93f3e4984780b930fe7a706983ea",
-            "value": 3963750502
-          }
-        },
-        "7c2485c6cdfe463da6fdb35982a1070d": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_ad1236893754446881e153adc9d5c962",
-              "IPY_MODEL_daee63fd167e4441a32324b51b00ad2b",
-              "IPY_MODEL_fe41858c6bd04c58840112b67c19a336"
-            ],
-            "layout": "IPY_MODEL_d262c82138024169b9f3aa034ca756fa"
-          }
-        },
-        "7c95f85a2b1f47a1bd846d110c47bb3c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_7fd44cf9ca6e4726bfd7ac21846d6a14",
-            "placeholder": "​",
-            "style": "IPY_MODEL_366a343b62fa47d8985a3bd464d99f9e",
-            "value": "config.json: 100%"
-          }
-        },
-        "7cd0b85ebd204b7aba908417811ce4e0": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_7baeab52d6694c32b1efd1ea1a0a7782",
-              "IPY_MODEL_519a7b154022443db6703f04a9142bae",
-              "IPY_MODEL_d4183e9715f34d249942b8271cca3bdf"
-            ],
-            "layout": "IPY_MODEL_da2347ac94764a3fa2743343cf0d3cd2"
-          }
-        },
-        "7e5d3774060e4589aa65982da5ea4ef4": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "7fd44cf9ca6e4726bfd7ac21846d6a14": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "80366349d81e4dcc892db6cd56e384f3": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_f8ef805b776145c3bfa9ba8d90972058",
-            "max": 9985,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_cc587493c33c4f118d1b1170f85be24c",
-            "value": 9985
-          }
-        },
-        "813621384dc748b0ad06775e22761c0b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "81c3db71ac704280ad030072655f1537": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "82177df57a494de8900c14c2f5185175": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_67da6c4260574869aa24c3cbc1bc1654",
-            "placeholder": "​",
-            "style": "IPY_MODEL_94b9088614464f60a203de39dbcae853",
-            "value": " 8/8 [01:47&lt;00:00, 11.64s/it]"
-          }
-        },
-        "823f1c78f15043e38bbd4dca3932a86a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_03a3c744d716431488163b4358b80f92",
-            "max": 239,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_a5434ee714f9498d83870544b67c0cb7",
-            "value": 239
-          }
-        },
-        "835bcc28a5564fb9b3d651bc8e32dc46": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "8640ac440fbc4644b9a3af7ba3ae7183": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "86816687746246b4a6105e8010384e25": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_8640ac440fbc4644b9a3af7ba3ae7183",
-            "placeholder": "​",
-            "style": "IPY_MODEL_5cea7996f02040b187ece0bb2d6a8d1f",
-            "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
-          }
-        },
-        "879c8ab5873847a8833bd74123be90a4": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_ef223e8504b64e3592589880326aaf41",
-            "placeholder": "​",
-            "style": "IPY_MODEL_598da69727bd4fb8b1caf465ac736d7a",
-            "value": " 1.67M/1.67M [00:00&lt;00:00, 19.0MB/s]"
-          }
-        },
-        "897b77a56c09479bb11d7f2a30997e55": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "8bc9d8ba866c442b9118d9630009939c": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "8c4d4fc5a30f4e7cb3be53fe2adda33d": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "8f5bd719974e41c3a8dd9a5b0d3d71e6": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "8f726dbfb45d4528afa33e36a6313267": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "9327977822be4b1294f80e876552e305": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_37de928300e34184881039378bd75e7f",
-            "placeholder": "​",
-            "style": "IPY_MODEL_0e936d9dbf9c4fdd86bbfe9730dedc47",
-            "value": " 3.96G/3.96G [00:13&lt;00:00, 273MB/s]"
-          }
-        },
-        "936d04b5fe1b4c63bf0b080e423d051b": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "93a44a11aa4846fa8efc6c1413ef1627": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "94b9088614464f60a203de39dbcae853": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "9503a45960984adc97b58e16c50662e0": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "95caff42f08a4c2aa14c867b8f37f231": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_de7c37ee83e24f0c889e84d07279c2ec",
-              "IPY_MODEL_9d4897eefb5f48259ffb2d23e332f752",
-              "IPY_MODEL_253017b0d0534e54ab44e181f6d7c82d"
-            ],
-            "layout": "IPY_MODEL_27beaf06e41b472abdb544a43c720c5a"
-          }
-        },
-        "977f799afaac4a55b2dc1cffa7d5b63b": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "97e36007e1304e1583fd81bfb13f0edd": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "9858cb74a09748a39e8149baac96702c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "9b42e08b3c9548818488268768a118b1": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_d955dcaa0e944e719f3a06139dd54a03",
-            "placeholder": "​",
-            "style": "IPY_MODEL_d3de2662c7964f1ba96e58da382af720",
-            "value": "merges.txt: 100%"
-          }
-        },
-        "9cd5211b5d8b457aa0002f1d17b80028": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_6932489232ec4ab18a160b1e7fbcdfe1",
-            "placeholder": "​",
-            "style": "IPY_MODEL_4540927d98f54466b434ba4c0edf045d",
-            "value": "model-00007-of-00008.safetensors: 100%"
-          }
-        },
-        "9d4897eefb5f48259ffb2d23e332f752": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_30a81da86f8043eca301e86a8651201a",
-            "max": 2776833,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_e8b7a81040904c1e89e58978223b1737",
-            "value": 2776833
-          }
-        },
-        "9e333ed3b5014069ac1dd969255dd591": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "9ed02dc43412471a9ab47f3620ccf3a5": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "9f1c9a0695384bdaa6f8b847ef89bee8": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ButtonStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ButtonStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "button_color": null,
-            "font_weight": ""
-          }
-        },
-        "9f56a2d9979c4bd8928c644c22c3ecdf": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "a0a11e929edd4189b79723d618522c33": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "a10d0a76010f4e508c65a9b69ebc5156": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "a138859f19b74fc0928dc236ab5359db": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_9b42e08b3c9548818488268768a118b1",
-              "IPY_MODEL_12b56912736849fea2ad8124456fdc5c",
-              "IPY_MODEL_879c8ab5873847a8833bd74123be90a4"
-            ],
-            "layout": "IPY_MODEL_20352e5f58d24bb8b1f3940efd14fe4a"
-          }
-        },
-        "a1959759c5424da9961fb2a308d4dee4": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_3aaecbf540f54a2db9ab0931e3b1fe57",
-            "placeholder": "​",
-            "style": "IPY_MODEL_9e333ed3b5014069ac1dd969255dd591",
-            "value": " 239/239 [00:00&lt;00:00, 30.9kB/s]"
-          }
-        },
-        "a20927bf5f2c41f58c1e31ac858ab36c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_1811cda0644e4190a9469d1774435d82",
-            "placeholder": "​",
-            "style": "IPY_MODEL_35c811d2ae8e43f3b5cecbdd3cfa857f",
-            "value": "tokenizer.json: 100%"
-          }
-        },
-        "a3a945817f684328b34651fe052393ec": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "a44f630e099e43899f20a77084ae60cd": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_ed5ca967ad5342929e578ac6aa4dc4c0",
-            "placeholder": "​",
-            "style": "IPY_MODEL_af401d117d5047629d3a6e2361757b62",
-            "value": "model-00001-of-00008.safetensors: 100%"
-          }
-        },
-        "a4e5789584564049b83df7c6c54a3e08": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "a5434ee714f9498d83870544b67c0cb7": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "a55060adc3564407ac81ad7297d34aaa": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "a6f48410b9964fefba0c3009a77dc838": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "a7cf477e80fc43e0ad82c7997b076dce": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "a80410b919e442c49aea15acc1ce1a72": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_fa1282ccc7544e4f818e2f03ccffe4a5",
-            "placeholder": "​",
-            "style": "IPY_MODEL_bbbf575d2a4b4c6ea8389be79b2a6039",
-            "value": "model.safetensors.index.json: 100%"
-          }
-        },
-        "ab93eabd7cea4b94b4b7a387f101e8a1": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "ac764024cf1c4e08ba7749afd2cd20ac": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "ad1236893754446881e153adc9d5c962": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_62e302ebdad64aada0ffe64ae1c873f3",
-            "placeholder": "​",
-            "style": "IPY_MODEL_bd1b0dfed6d34d16af33a4a58330f5ec",
-            "value": "Saving the dataset (1/1 shards): 100%"
-          }
-        },
-        "ad7599de524549c48bf2d3124ad4b299": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "adacfdcc1b0140efac56918e9ccf064e": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "af401d117d5047629d3a6e2361757b62": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "b191ac001a2e4962bc9a245fcdf26e6b": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "b195f160ca20442fadd8b5aed0ee41af": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "b1bea589efa14258a9982071b87938bf": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "b5b65414154544aa8a71b1a39164aad7": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "b634bb73cfa743d09a5999101b840976": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "b82aa8c57f7c422a9a9c90f333ed2a99": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_c0991cf63ee6458b96e9a75e7a88b61a",
-              "IPY_MODEL_71c8af139cd248b1b51101fd46a93f35",
-              "IPY_MODEL_1d5117195d4b49eb8f1a73b18419f7ce"
-            ],
-            "layout": "IPY_MODEL_3c21e4a511b4441192c03b7f1d0976e9"
-          }
-        },
-        "b8766a88716948cf968f4563531a76d9": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_2b3a2659b12244bd8548320320016dbf",
-            "placeholder": "​",
-            "style": "IPY_MODEL_0cd7efffbb3c4c4b972e63749f61ab97",
-            "value": "Generating train split: "
-          }
-        },
-        "b87c84de30e84b3abf4871461fb9cbd3": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "b8e39e4dddc3497fbc29ae45c66da759": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "bb33aec33a6447078c31bfd728942994": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "bbbf575d2a4b4c6ea8389be79b2a6039": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "bca2c7185b6749fd899c06a2ba4c5e46": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_0f480e3a0b0a45d2a2d2dec3cad923f3",
-            "placeholder": "​",
-            "style": "IPY_MODEL_fcb30372e7404c5d8a1ad4df91e6c7b2",
-            "value": " 1.91G/1.91G [00:05&lt;00:00, 444MB/s]"
-          }
-        },
-        "bd1b0dfed6d34d16af33a4a58330f5ec": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "be724f04b03942b2a033a7e8898bb4fd": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "bed8726b8069434687c75452e21f19e5": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_fa864b41586f4a7aa56aeafd1d84eb75",
-            "max": 9985,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_3225603166b54e7aab766b9964a2f660",
-            "value": 9985
-          }
-        },
-        "bee3501b2a17427784a717e50a85e7fa": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "bfcdbba993b74972a9e3e575f86908ff": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "bff139df987d4a62abec6456cb27f3d4": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "danger",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_c1f9c267ba3f40039cdb5eb3267e8043",
-            "max": 3963750880,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_33b3b1d0295646edaac7b4822761aeb0",
-            "value": 3963750502
-          }
-        },
-        "c0892a1881de4eb4bfabc6a68f87ae99": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_158c8b85dbf34de6a94b4e35e2fc7d5a",
-            "placeholder": "​",
-            "style": "IPY_MODEL_0b4c9753a7cb4354b8e5f187e6e1ad7c",
-            "value": " 3.96G/3.96G [00:15&lt;00:00, 564MB/s]"
-          }
-        },
-        "c0991cf63ee6458b96e9a75e7a88b61a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_ed28e2e0410d4e0b855467e798e53d66",
-            "placeholder": "​",
-            "style": "IPY_MODEL_d93f134f802b4b69b575bdaf07dbd27c",
-            "value": "tokenizer_config.json: 100%"
-          }
-        },
-        "c12ea43372ac4d57bb9605f1a429b397": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "VBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "VBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "VBoxView",
-            "box_style": "",
-            "children": [],
-            "layout": "IPY_MODEL_131065f118274a1586ac38e39ed84ef0"
-          }
-        },
-        "c1314f241a434c41b45d84dc4d3b30f8": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "c1f9c267ba3f40039cdb5eb3267e8043": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "c33ced495f70464aa4a3a91922090853": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "c3725c7f79fe415fbd1ea336f0cc9cf1": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "danger",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_b191ac001a2e4962bc9a245fcdf26e6b",
-            "max": 3841788544,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_054c8dffadba48c6b895a6cc62448ecc",
-            "value": 3841788178
-          }
-        },
-        "c3be9109d63c485d9c0ef4f9bc0f9218": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "c42acf646f344a88b8c11f81e67f7206": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_8bc9d8ba866c442b9118d9630009939c",
-            "placeholder": "​",
-            "style": "IPY_MODEL_9f56a2d9979c4bd8928c644c22c3ecdf",
-            "value": "model-00003-of-00008.safetensors: 100%"
-          }
-        },
-        "c6164e05a1914ae48083db9ad7f4ef7c": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "c65dc74c7d6f4bab8f7dd28455161dd8": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "c6e00f5224364822bc4239b176686919": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_2a51b36be41745468e4c2d7a21b1c0d2",
-            "max": 36514,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_4fd114abe9f5494ab59858949f5055f1",
-            "value": 36514
-          }
-        },
-        "c73055099c084dca996159e23e162d0b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_e40d1c1ac9494b3bade9858324e7ffdf",
-            "placeholder": "​",
-            "style": "IPY_MODEL_d65b6b060d9845779299491ac5599c31",
-            "value": " 9985/9985 [01:04&lt;00:00, 189.08 examples/s]"
-          }
-        },
-        "c7433acd3c4841e6958ae8f7e87b1808": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "CheckboxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "CheckboxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "CheckboxView",
-            "description": "Add token as git credential?",
-            "description_tooltip": null,
-            "disabled": false,
-            "indent": true,
-            "layout": "IPY_MODEL_62c028fdef904dedb9cdeca2b3bda725",
-            "style": "IPY_MODEL_a7cf477e80fc43e0ad82c7997b076dce",
-            "value": false
-          }
-        },
-        "c84cc07789be48aebb322c23d355289e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_0077aedc3d174560bce924ee89e9c006",
-            "placeholder": "​",
-            "style": "IPY_MODEL_00321cce58884f6f9b3855a21fcd9187",
-            "value": "Add position_id column (Sample Packing) (num_proc=2): 100%"
-          }
-        },
-        "ca65e32eb52f48c09a84b33cb18f22cd": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "cc587493c33c4f118d1b1170f85be24c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "cc94432d08464affa3e58b560bdad194": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "danger",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_b5b65414154544aa8a71b1a39164aad7",
-            "max": 3963750816,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_f0a58fbd0fca4340890041f99fa2f8c8",
-            "value": 3963750438
-          }
-        },
-        "ccfcdc95baf646f8aeb3d516742383f2": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "cdebbc55a1164c018546c2ac6f8c620c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_a44f630e099e43899f20a77084ae60cd",
-              "IPY_MODEL_c3725c7f79fe415fbd1ea336f0cc9cf1",
-              "IPY_MODEL_0e50870ed0c643e0b6c18cc5d7ddae7f"
-            ],
-            "layout": "IPY_MODEL_c33ced495f70464aa4a3a91922090853"
-          }
-        },
-        "d02274afd47b462291c745f261209d42": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "d07c8b97d3314f1c852e44bdd40f61ed": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "d0e9dce55cec4c1ca619a0ccf209d924": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "d1f9b10c130542f094c8fd3d1e23b5e9": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "d262c82138024169b9f3aa034ca756fa": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "d3de2662c7964f1ba96e58da382af720": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "d4183e9715f34d249942b8271cca3bdf": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_63580b6fb30642479fe3000915bf551a",
-            "placeholder": "​",
-            "style": "IPY_MODEL_8f726dbfb45d4528afa33e36a6313267",
-            "value": " 27.3M/27.3M [00:00&lt;00:00, 31.0MB/s]"
-          }
-        },
-        "d43c6df07ddb466587807d6dbe1ff614": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_8c4d4fc5a30f4e7cb3be53fe2adda33d",
-            "placeholder": "​",
-            "style": "IPY_MODEL_e90658f4bcb642baa78426012f863152",
-            "value": "model-00004-of-00008.safetensors: 100%"
-          }
-        },
-        "d65b6b060d9845779299491ac5599c31": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "d6fe74e4255444368f8f90a62157d869": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "d93f134f802b4b69b575bdaf07dbd27c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "d955dcaa0e944e719f3a06139dd54a03": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "da2347ac94764a3fa2743343cf0d3cd2": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "da6e93f3e4984780b930fe7a706983ea": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "daee63fd167e4441a32324b51b00ad2b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_d07c8b97d3314f1c852e44bdd40f61ed",
-            "max": 9985,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_ebb69a2c3d0a4299a484698287b3087c",
-            "value": 9985
-          }
-        },
-        "dc892a596f6942d7973c616c38f0eebb": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_c84cc07789be48aebb322c23d355289e",
-              "IPY_MODEL_bed8726b8069434687c75452e21f19e5",
-              "IPY_MODEL_16a188a0b06d45f980dcf3933509fe0a"
-            ],
-            "layout": "IPY_MODEL_60c1a0d765c14a1d888317e6a507e4ea"
-          }
-        },
-        "dd0e646fad3f4a89ba23b39d162bd8d9": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_d43c6df07ddb466587807d6dbe1ff614",
-              "IPY_MODEL_e0e8b840b8ea4d0d9db09afe99fa287d",
-              "IPY_MODEL_9327977822be4b1294f80e876552e305"
-            ],
-            "layout": "IPY_MODEL_77304d1a46b3468a98483e02ec0ac4a4"
-          }
-        },
-        "de7c37ee83e24f0c889e84d07279c2ec": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_34cf3df51fbc41cabfdbba153c007f0e",
-            "placeholder": "​",
-            "style": "IPY_MODEL_ac764024cf1c4e08ba7749afd2cd20ac",
-            "value": "vocab.json: 100%"
-          }
-        },
-        "dfd2a2649b8341ef913207526708aff1": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "e09f1bcbb9d94c09be53e5e1303642c2": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_e7d8e4fe58384e93a106de546068c65e",
-            "max": 8,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_0aa8ab56b85f4171a79c3bc210594025",
-            "value": 8
-          }
-        },
-        "e0e8b840b8ea4d0d9db09afe99fa287d": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "danger",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_f7434f3e03124a1c938a39af79d7fa59",
-            "max": 3963750880,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_c1314f241a434c41b45d84dc4d3b30f8",
-            "value": 3963750502
-          }
-        },
-        "e21e180307e5485cbbe908672fd6639a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_2e2b0c1599c341a198f632f46a40c90e",
-              "IPY_MODEL_bff139df987d4a62abec6456cb27f3d4",
-              "IPY_MODEL_ebe1cc366d324ad59b264c8b3c431441"
-            ],
-            "layout": "IPY_MODEL_114dece49dba437c8572ef94b23c3b1e"
-          }
-        },
-        "e366ae3fceec4566b9ed303d6c5f90af": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "e3fb3fc6afe04b3c9b7ac61809ce78fa": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_c6164e05a1914ae48083db9ad7f4ef7c",
-            "placeholder": "​",
-            "style": "IPY_MODEL_813621384dc748b0ad06775e22761c0b",
-            "value": " 9985/9985 [00:03&lt;00:00, 3622.89 examples/s]"
-          }
-        },
-        "e400cbf14bcc446a9d33b210cd93550b": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "e40d1c1ac9494b3bade9858324e7ffdf": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "e575d87a7efe4ec7b1efde489839d4a6": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "e5a82df528bb4e408797a3b6c2758f4a": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "e6e969610738449887259063967f82b0": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "e7d8e4fe58384e93a106de546068c65e": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "e87ea87fcff247b5bbcc331ba79a8dc2": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "e8b7a81040904c1e89e58978223b1737": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "e90658f4bcb642baa78426012f863152": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "eb1c9535e6a546098b760528b2ea387c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_18357b321ce44d7b8bd9d1c886f69275",
-              "IPY_MODEL_279937fe03bc4e4eb25b472d7e9df163",
-              "IPY_MODEL_bca2c7185b6749fd899c06a2ba4c5e46"
-            ],
-            "layout": "IPY_MODEL_1f7d30f71bbd4547a9150d21da071055"
-          }
-        },
-        "ebb69a2c3d0a4299a484698287b3087c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "ebc80d1a55fa47f4a5ea2756588569ec": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "ebe1cc366d324ad59b264c8b3c431441": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_fba7aa824b38467ab3061b226114cdec",
-            "placeholder": "​",
-            "style": "IPY_MODEL_f3075dccbd2747b4a7913b66f44f2596",
-            "value": " 3.96G/3.96G [00:13&lt;00:00, 398MB/s]"
-          }
-        },
-        "ec030fc3c346426f9abc3a89892258d3": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_dfd2a2649b8341ef913207526708aff1",
-            "max": 9985,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_4f1977d7e4824ef1a14b65f0f42bba10",
-            "value": 9985
-          }
-        },
-        "ec11d1e5ae7b42c883d9b1f38a65356e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_936d04b5fe1b4c63bf0b080e423d051b",
-            "placeholder": "​",
-            "style": "IPY_MODEL_f1cef8e8dc2646fb9fd09f3b09081074",
-            "value": " 36.5k/36.5k [00:00&lt;00:00, 4.32MB/s]"
-          }
-        },
-        "ed28e2e0410d4e0b855467e798e53d66": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "ed5ca967ad5342929e578ac6aa4dc4c0": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "edc99591b9c747b689b94d0052fec14c": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "ef0a3c7a6f14460fb4da096928ae249e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_07fb3a2c8315494e97b447e672dfae06",
-              "IPY_MODEL_ec030fc3c346426f9abc3a89892258d3",
-              "IPY_MODEL_e3fb3fc6afe04b3c9b7ac61809ce78fa"
-            ],
-            "layout": "IPY_MODEL_c3be9109d63c485d9c0ef4f9bc0f9218"
-          }
-        },
-        "ef223e8504b64e3592589880326aaf41": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "f0a58fbd0fca4340890041f99fa2f8c8": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "f113ebd8c1c34806bea4dd7ed3035173": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "f1cef8e8dc2646fb9fd09f3b09081074": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "f3075dccbd2747b4a7913b66f44f2596": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "f365820a3d3c42b2948abfe32065de14": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_735d4f225b24414294fc1b213c61223c",
-            "placeholder": "​",
-            "style": "IPY_MODEL_5e5e15b0569b474c9620083b3ec6af55",
-            "value": "generation_config.json: 100%"
-          }
-        },
-        "f4667818b9d34a09891cd727a429a610": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_4b27c267393640f28f6eae0875bd2ed9",
-            "placeholder": "​",
-            "style": "IPY_MODEL_9858cb74a09748a39e8149baac96702c",
-            "value": " 3.96G/3.96G [00:11&lt;00:00, 457MB/s]"
-          }
-        },
-        "f4a1795dc7514a718f478245f521f0ba": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "f60a2bdb6b6b4e0e8c3508580e247132": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "danger",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_edc99591b9c747b689b94d0052fec14c",
-            "max": 3963750880,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_35cc989ca3374e7dba0cb166febc4bde",
-            "value": 3963750502
-          }
-        },
-        "f7434f3e03124a1c938a39af79d7fa59": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "f8ef805b776145c3bfa9ba8d90972058": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "fa1282ccc7544e4f818e2f03ccffe4a5": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "fa864b41586f4a7aa56aeafd1d84eb75": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "fba7aa824b38467ab3061b226114cdec": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "fcb30372e7404c5d8a1ad4df91e6c7b2": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "fcbab4d8dced41a18dfccce81e3a45a0": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "fd4f333f7ece4450b04e1a9af1f9d2f6": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_d1f9b10c130542f094c8fd3d1e23b5e9",
-            "placeholder": "​",
-            "style": "IPY_MODEL_e575d87a7efe4ec7b1efde489839d4a6",
-            "value": "model-00006-of-00008.safetensors: 100%"
-          }
-        },
-        "fe18bba7f3fb4c31bf840541f36b3425": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_fd4f333f7ece4450b04e1a9af1f9d2f6",
-              "IPY_MODEL_f60a2bdb6b6b4e0e8c3508580e247132",
-              "IPY_MODEL_c0892a1881de4eb4bfabc6a68f87ae99"
-            ],
-            "layout": "IPY_MODEL_1bec6297c90242a88672d195bc09d429"
-          }
-        },
-        "fe41858c6bd04c58840112b67c19a336": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_e5a82df528bb4e408797a3b6c2758f4a",
-            "placeholder": "​",
-            "style": "IPY_MODEL_f113ebd8c1c34806bea4dd7ed3035173",
-            "value": " 9985/9985 [00:00&lt;00:00, 44264.88 examples/s]"
-          }
-        },
-        "fea1b70fb46745feb5111b3929175b5d": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_f365820a3d3c42b2948abfe32065de14",
-              "IPY_MODEL_823f1c78f15043e38bbd4dca3932a86a",
-              "IPY_MODEL_a1959759c5424da9961fb2a308d4dee4"
-            ],
-            "layout": "IPY_MODEL_34c9c0137b504cd799c6bd6de69507c2"
-          }
-        },
-        "ff3a94b146a948b6907f5d80c7157f99": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "ffdbb12a2f2c4d14911685e7683e0ef0": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_ab93eabd7cea4b94b4b7a387f101e8a1",
-            "placeholder": "​",
-            "style": "IPY_MODEL_704f2f5a9b1c49d5a75a0025a5dda11b",
-            "value": " 3.96G/3.96G [00:12&lt;00:00, 656MB/s]"
-          }
-        }
-      }
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "OPLSwmgdrB7g"
+   },
+   "source": [
+    "# Fine-Tune Qwen3 14B with Axolotl\n",
+    "\n",
+    "[<img src=\"https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png\" alt=\"Built with Axolotl\" width=\"200\" height=\"32\"/>](https://github.com/axolotl-ai-cloud/axolotl)\n",
+    "\n",
+    "Axolotl is the most performant LLM post-training framework available, delivering faster training with efficient, consistent and stable performance. Train your workload and ship your product 30% faster; saving you both time and money.\n",
+    "\n",
+    "- ⭐ us on [GitHub](https://github.com/axolotl-ai-cloud/axolotl)\n",
+    "- 📜 Read the [Docs](http://docs.axolotl.ai/)\n",
+    "- 💬 Chat with us on [Discord](https://discord.gg/mnpEYgRUmD)\n",
+    "- 📰 Get updates on [X/Twitter](https://x.com/axolotl_ai)\n"
+   ]
   },
-  "nbformat": 4,
-  "nbformat_minor": 0
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "rVjKD7CbxIP3"
+   },
+   "source": [
+    "# Installation\n",
+    "\n",
+    "Axolotl is easy to install from [pip](https://pypi.org/project/axolotl/), or use our [pre-built Docker images](http://docs.axolotl.ai/docs/docker.html) for a hassle free dependency experience. See our [docs](http://docs.axolotl.ai/docs/installation.html) for more information."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "msOCO4NRmRLa"
+   },
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "# This step can take ~5-10 minutes to install dependencies\n",
+    "!pip install --no-build-isolation axolotl[flash-attn]>=0.9.1\n",
+    "!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@0ee9ee8\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "N0OW0YeksDLr"
+   },
+   "source": [
+    "## Demo: Talk Like a Pirate\n",
+    "\n",
+    "In this demo, we are training the model ***to respond like a pirate***. This was chosen as a way to easily show how to train a model to respond in a certain style of your choosing (without being prompted) and is quite easy to validate within the scope of a Colab."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "8Du2fANTsNCK"
+   },
+   "source": [
+    "### Upload your own dataset or use a Huggingface dataset\n",
+    "\n",
+    "You can choose to use your own JSONL file from your own [Google Drive](https://drive.google.com/drive/home); for example downloading the [Pirate-Ultrachat JSONL](https://huggingface.co/datasets/winglian/pirate-ultrachat-10k/blob/main/train.jsonl) to your Google Drive. JSONL datasets should be formatted similar to the [OpenAI dataset format](https://cookbook.openai.com/examples/chat_finetuning_data_prep).\n",
+    "\n",
+    "You can also simply use the [`winglian/pirate-ultrachat-10k`](https://huggingface.co/datasets/winglian/pirate-ultrachat-10k) dataset directly.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "fGEEjyQ-r_IV"
+   },
+   "outputs": [],
+   "source": [
+    "# Default to HF dataset location\n",
+    "dataset_id = \"winglian/pirate-ultrachat-10k\"\n",
+    "uploaded = {}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "c5MyYqk7vIsG"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "# Optionally, upload your own JSONL to your Google Drive\n",
+    "GOOGLE_DRIVE_PATH = \"\"  # ex: \"MyDrive/Colab\\ Notebooks/train.jsonl\"\n",
+    "\n",
+    "# \"Select All\" permissions, or you may get the error:\n",
+    "# \"MessageError: Error: credential propagation was unsuccessful\"\n",
+    "if GOOGLE_DRIVE_PATH:\n",
+    "    from google.colab import drive\n",
+    "\n",
+    "    # Mount your Google Drive\n",
+    "    GOOGLE_DRIVE_MNT = \"/content/drive/\"\n",
+    "    drive.mount(GOOGLE_DRIVE_MNT, force_remount=True)\n",
+    "    tmp_path = os.path.join(GOOGLE_DRIVE_MNT, GOOGLE_DRIVE_PATH.lstrip(\"/\"))\n",
+    "    # make sure file exists\n",
+    "    if not os.path.isfile(tmp_path):\n",
+    "        raise ValueError(f\"File {tmp_path} does not exist\")\n",
+    "    dataset_id = tmp_path"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "U6pTk3A9xj1W"
+   },
+   "source": [
+    "# Configure for Supervised Fine-Tuning (SFT)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 151,
+     "referenced_widgets": [
+      "388f618924274d21a066f098f4f1e744",
+      "7c95f85a2b1f47a1bd846d110c47bb3c",
+      "083f9cda8d754c168beee10d2f8955a2",
+      "62e1a65582f446a78612eaa804e08a7d",
+      "487a177d020f4605834878b2fdc7afa3",
+      "7fd44cf9ca6e4726bfd7ac21846d6a14",
+      "366a343b62fa47d8985a3bd464d99f9e",
+      "a0a11e929edd4189b79723d618522c33",
+      "e87ea87fcff247b5bbcc331ba79a8dc2",
+      "5e18768f7ad6434ba8b8b8a2e853e204",
+      "bb33aec33a6447078c31bfd728942994"
+     ]
+    },
+    "id": "fdRioqytmTtX",
+    "outputId": "f0acdcec-4b41-4a3f-ffed-c2d2d929158e"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-05-08 13:40:27,488] [INFO] [root.register:348] [PID:174] Attempting to load plugin: axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin\n",
+      "[2025-05-08 13:40:27,493] [INFO] [root.register:351] [PID:174] Plugin loaded successfully: axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin\n",
+      "[2025-05-08 13:40:27,959] [INFO] [axolotl.utils.schemas.config.check_eval_packing:721] [PID:174] [RANK:0] explicitly setting `eval_sample_packing` to match `sample_packing`\u001b[39m\n",
+      "[2025-05-08 13:40:27,960] [INFO] [axolotl.utils.schemas.config.hint_sample_packing_padding:514] [PID:174] [RANK:0] Setting `pad_to_sequence_len: true` to prevent memory leaks when sample_packing\u001b[39m\n",
+      "[2025-05-08 13:40:27,961] [INFO] [axolotl.utils.schemas.config.check_bf16:1251] [PID:174] [RANK:0] bf16 support detected, but not enabled for this configuration.\u001b[39m\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "388f618924274d21a066f098f4f1e744",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/728 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-05-08 13:40:28,590] [INFO] [axolotl.normalize_config:237] [PID:174] [RANK:0] cuda memory usage baseline: 0.000GB (+0.002GB cache, +0.359GB misc)\u001b[39m\n"
+     ]
+    }
+   ],
+   "source": [
+    "from axolotl.utils.dict import DictDefault\n",
+    "from axolotl.cli.config import load_cfg\n",
+    "\n",
+    "# Axolotl provides full control and transparency over model and training configuration\n",
+    "config = DictDefault(\n",
+    "    base_model=\"Qwen/Qwen3-14B\",  # Use the instruct tuned model, but we're aligning it to be a pirate\n",
+    "    load_in_4bit=True,  # set to True for qLoRA\n",
+    "    adapter=\"qlora\",\n",
+    "    lora_r=32,\n",
+    "    lora_alpha=64,\n",
+    "    lora_target_modules=[\n",
+    "        \"q_proj\",\n",
+    "        \"k_proj\",\n",
+    "        \"v_proj\",\n",
+    "        \"o_proj\",  # train self_attn linear modules\n",
+    "        \"gate_proj\",\n",
+    "        \"down_proj\",\n",
+    "        \"up_proj\",  # train MLP linear modules\n",
+    "    ],\n",
+    "    lora_qkv_kernel=True,  # optimized triton kernels for LoRA\n",
+    "    lora_o_kernel=True,\n",
+    "    lora_mlp_kernel=True,\n",
+    "    embeddings_skip_upcast=True,  # keep embeddings in fp16 so the model fits in 15GB VRAM\n",
+    "    xformers_attention=True,  # use xformers on Colab w/ T4 for memory efficient attention, flash_attention only on Ampere or above\n",
+    "    plugins=[\n",
+    "        # more efficient training using Apple's Cut Cross Entropy; https://github.com/apple/ml-cross-entropy\n",
+    "        \"axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin\",\n",
+    "    ],\n",
+    "    sample_packing=True,  # 2-6x increase in tokens per micro-batch\n",
+    "    # when using packing, use a slightly higher learning rate to account for fewer steps\n",
+    "    # alternatively, reduce the micro_batch_size + gradient_accumulation_steps to achieve closer to the same number of steps/epoch\n",
+    "    learning_rate=0.00019,\n",
+    "    sequence_len=4096,  # larger sequence length improves packing efficiency for more tokens/sec\n",
+    "    micro_batch_size=1,\n",
+    "    gradient_accumulation_steps=1,\n",
+    "    gradient_checkpointing=True,  # tradeoff reduced VRAM for increased time\n",
+    "    gradient_checkpointing_kwargs={\n",
+    "        \"use_reentrant\": False,\n",
+    "    },\n",
+    "    optimizer=\"paged_adamw_8bit\",\n",
+    "    lr_scheduler=\"cosine\",\n",
+    "    warmup_steps=5,\n",
+    "    fp16=True,  # use float16 + automatic mixed precision, bfloat16 not supported on Colab w/ T4\n",
+    "    bf16=False,\n",
+    "    max_grad_norm=0.1,  # gradient clipping\n",
+    "    num_epochs=1,\n",
+    "    saves_per_epoch=2,  # how many checkpoints to save over one epoch\n",
+    "    logging_steps=1,\n",
+    "    output_dir=\"./outputs/qwen-sft-pirate-rrr\",\n",
+    "    chat_template=\"qwen3\",\n",
+    "    datasets=[\n",
+    "        {\n",
+    "            \"path\": dataset_id,  # Huggingface Dataset id or path to train.jsonl\n",
+    "            \"type\": \"chat_template\",\n",
+    "            \"split\": \"train\",\n",
+    "            \"eot_tokens\": [\"<|im_end|>\"],\n",
+    "        }\n",
+    "    ],\n",
+    "    dataloader_prefetch_factor=8,  # dataloader optimizations\n",
+    "    dataloader_num_workers=2,\n",
+    "    dataloader_pin_memory=True,\n",
+    ")\n",
+    "\n",
+    "# validates the configuration\n",
+    "cfg = load_cfg(config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "715UpvnSoBIS"
+   },
+   "outputs": [],
+   "source": [
+    "from axolotl.utils import patch_optimized_env\n",
+    "\n",
+    "# speedup downloads from HF 🤗 and set \"PYTORCH_CUDA_ALLOC_CONF\" env to save memory\n",
+    "patch_optimized_env()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Vc6MC-hwyH-n"
+   },
+   "source": [
+    "# Datasets\n",
+    "\n",
+    "Axolotl has a robust suite of loaders and transforms to parse most open datasets of any format into the appropriate chat template for your model. Axolotl will mask input tokens from the user's prompt so that the train loss is only calculated against the model's response. For more information, [see our documentation](http://docs.axolotl.ai/docs/dataset-formats/conversation.html) on dataset preparation.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 1000,
+     "referenced_widgets": [
+      "b82aa8c57f7c422a9a9c90f333ed2a99",
+      "c0991cf63ee6458b96e9a75e7a88b61a",
+      "71c8af139cd248b1b51101fd46a93f35",
+      "1d5117195d4b49eb8f1a73b18419f7ce",
+      "3c21e4a511b4441192c03b7f1d0976e9",
+      "ed28e2e0410d4e0b855467e798e53d66",
+      "d93f134f802b4b69b575bdaf07dbd27c",
+      "d0e9dce55cec4c1ca619a0ccf209d924",
+      "4c727d40ef0443449afc31724ee79f0c",
+      "0dea5caa27384f5689e3cab51f558727",
+      "a6f48410b9964fefba0c3009a77dc838",
+      "95caff42f08a4c2aa14c867b8f37f231",
+      "de7c37ee83e24f0c889e84d07279c2ec",
+      "9d4897eefb5f48259ffb2d23e332f752",
+      "253017b0d0534e54ab44e181f6d7c82d",
+      "27beaf06e41b472abdb544a43c720c5a",
+      "34cf3df51fbc41cabfdbba153c007f0e",
+      "ac764024cf1c4e08ba7749afd2cd20ac",
+      "30a81da86f8043eca301e86a8651201a",
+      "e8b7a81040904c1e89e58978223b1737",
+      "1c6f1f10667545aaab958016ba7e2c94",
+      "e6e969610738449887259063967f82b0",
+      "a138859f19b74fc0928dc236ab5359db",
+      "9b42e08b3c9548818488268768a118b1",
+      "12b56912736849fea2ad8124456fdc5c",
+      "879c8ab5873847a8833bd74123be90a4",
+      "20352e5f58d24bb8b1f3940efd14fe4a",
+      "d955dcaa0e944e719f3a06139dd54a03",
+      "d3de2662c7964f1ba96e58da382af720",
+      "97e36007e1304e1583fd81bfb13f0edd",
+      "c65dc74c7d6f4bab8f7dd28455161dd8",
+      "ef223e8504b64e3592589880326aaf41",
+      "598da69727bd4fb8b1caf465ac736d7a",
+      "5f86cd894de94c3280fadc1e2fd0ee13",
+      "a20927bf5f2c41f58c1e31ac858ab36c",
+      "0a46ad75c198463d843fb35e813642cb",
+      "09007681cf8d42aeb8c1d2f6a74e470a",
+      "ebc80d1a55fa47f4a5ea2756588569ec",
+      "1811cda0644e4190a9469d1774435d82",
+      "35c811d2ae8e43f3b5cecbdd3cfa857f",
+      "b8e39e4dddc3497fbc29ae45c66da759",
+      "63b4e563e85c4f03b1b72beda9577bcc",
+      "b195f160ca20442fadd8b5aed0ee41af",
+      "ca65e32eb52f48c09a84b33cb18f22cd",
+      "7cd0b85ebd204b7aba908417811ce4e0",
+      "7baeab52d6694c32b1efd1ea1a0a7782",
+      "519a7b154022443db6703f04a9142bae",
+      "d4183e9715f34d249942b8271cca3bdf",
+      "da2347ac94764a3fa2743343cf0d3cd2",
+      "93a44a11aa4846fa8efc6c1413ef1627",
+      "a55060adc3564407ac81ad7297d34aaa",
+      "d02274afd47b462291c745f261209d42",
+      "0f417447a7bd4a33acca96fa37aec877",
+      "63580b6fb30642479fe3000915bf551a",
+      "8f726dbfb45d4528afa33e36a6313267",
+      "03b093d592ba4386aa61f7b8483da660",
+      "b8766a88716948cf968f4563531a76d9",
+      "6f3a28b912714c6e931003549664bfa3",
+      "16d1283741404b7bb319094c992fce01",
+      "2a5bb0e818ab47be8cf6465988328503",
+      "2b3a2659b12244bd8548320320016dbf",
+      "0cd7efffbb3c4c4b972e63749f61ab97",
+      "5ca240f31e6b44e3882c5eb37cd5a309",
+      "5eb06edeb58e4930b1affef2a59eae81",
+      "a4e5789584564049b83df7c6c54a3e08",
+      "ff3a94b146a948b6907f5d80c7157f99",
+      "258b7c635c1045329d4669e48c46ccd5",
+      "6f68ed9889f54ad2ae8a3b95ac263a83",
+      "80366349d81e4dcc892db6cd56e384f3",
+      "c73055099c084dca996159e23e162d0b",
+      "977f799afaac4a55b2dc1cffa7d5b63b",
+      "41f3b32c2f6b4034ae7a3b9124e28bc7",
+      "a10d0a76010f4e508c65a9b69ebc5156",
+      "f8ef805b776145c3bfa9ba8d90972058",
+      "cc587493c33c4f118d1b1170f85be24c",
+      "e40d1c1ac9494b3bade9858324e7ffdf",
+      "d65b6b060d9845779299491ac5599c31",
+      "0f6907ebbc6242c8bde059cef1e1bd29",
+      "5bdfd87fc6cd4f9dabef7cfee29c8060",
+      "64f54d4a744a4627a07c3c0120276f3b",
+      "65b75b9b8bc143cf997796af68ff6668",
+      "d6fe74e4255444368f8f90a62157d869",
+      "4d468f96ec924681ad65eb671674b93e",
+      "ad7599de524549c48bf2d3124ad4b299",
+      "0546d04aae644dde846c58a4afb598a6",
+      "897b77a56c09479bb11d7f2a30997e55",
+      "81c3db71ac704280ad030072655f1537",
+      "042e091f75694c47aee761e760e76773",
+      "ef0a3c7a6f14460fb4da096928ae249e",
+      "07fb3a2c8315494e97b447e672dfae06",
+      "ec030fc3c346426f9abc3a89892258d3",
+      "e3fb3fc6afe04b3c9b7ac61809ce78fa",
+      "c3be9109d63c485d9c0ef4f9bc0f9218",
+      "12815f401eba44658caa7b2e490137a8",
+      "30e02aa2d0d241979369e598287f2639",
+      "dfd2a2649b8341ef913207526708aff1",
+      "4f1977d7e4824ef1a14b65f0f42bba10",
+      "c6164e05a1914ae48083db9ad7f4ef7c",
+      "813621384dc748b0ad06775e22761c0b",
+      "dc892a596f6942d7973c616c38f0eebb",
+      "c84cc07789be48aebb322c23d355289e",
+      "bed8726b8069434687c75452e21f19e5",
+      "16a188a0b06d45f980dcf3933509fe0a",
+      "60c1a0d765c14a1d888317e6a507e4ea",
+      "0077aedc3d174560bce924ee89e9c006",
+      "00321cce58884f6f9b3855a21fcd9187",
+      "fa864b41586f4a7aa56aeafd1d84eb75",
+      "3225603166b54e7aab766b9964a2f660",
+      "349eee9f56d64f0cba6fc24ff2c50c9b",
+      "7e5d3774060e4589aa65982da5ea4ef4",
+      "7c2485c6cdfe463da6fdb35982a1070d",
+      "ad1236893754446881e153adc9d5c962",
+      "daee63fd167e4441a32324b51b00ad2b",
+      "fe41858c6bd04c58840112b67c19a336",
+      "d262c82138024169b9f3aa034ca756fa",
+      "62e302ebdad64aada0ffe64ae1c873f3",
+      "bd1b0dfed6d34d16af33a4a58330f5ec",
+      "d07c8b97d3314f1c852e44bdd40f61ed",
+      "ebb69a2c3d0a4299a484698287b3087c",
+      "e5a82df528bb4e408797a3b6c2758f4a",
+      "f113ebd8c1c34806bea4dd7ed3035173"
+     ]
+    },
+    "id": "KQQhgK8FoDfF",
+    "outputId": "f69441d8-95f9-4885-c306-6c8709090ff6"
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b82aa8c57f7c422a9a9c90f333ed2a99",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer_config.json:   0%|          | 0.00/9.68k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "95caff42f08a4c2aa14c867b8f37f231",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a138859f19b74fc0928dc236ab5359db",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5f86cd894de94c3280fadc1e2fd0ee13",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-05-08 13:41:00,844] [DEBUG] [axolotl.utils.models.load_tokenizer:441] [PID:174] [RANK:0] EOS: 151645 / <|im_end|>\u001b[39m\n",
+      "[2025-05-08 13:41:00,845] [DEBUG] [axolotl.utils.models.load_tokenizer:442] [PID:174] [RANK:0] BOS: None / None\u001b[39m\n",
+      "[2025-05-08 13:41:00,846] [DEBUG] [axolotl.utils.models.load_tokenizer:443] [PID:174] [RANK:0] PAD: 151643 / <|endoftext|>\u001b[39m\n",
+      "[2025-05-08 13:41:00,847] [DEBUG] [axolotl.utils.models.load_tokenizer:444] [PID:174] [RANK:0] UNK: None / None\u001b[39m\n",
+      "[2025-05-08 13:41:00,869] [INFO] [axolotl.utils.data.sft.load_tokenized_prepared_datasets:271] [PID:174] [RANK:0] Unable to find prepared dataset in last_run_prepared/97037817611d38b3a9c681753c3c4c95\u001b[39m\n",
+      "[2025-05-08 13:41:00,870] [INFO] [axolotl.utils.data.sft.load_tokenized_prepared_datasets:272] [PID:174] [RANK:0] Loading raw datasets...\u001b[39m\n",
+      "\u001b[33m[2025-05-08 13:41:00,870] [WARNING] [axolotl.utils.data.sft.load_tokenized_prepared_datasets:274] [PID:174] [RANK:0] Processing datasets during training can lead to VRAM instability. Please pre-process your dataset.\u001b[39m\n",
+      "[2025-05-08 13:41:00,871] [INFO] [axolotl.utils.data.sft.load_tokenized_prepared_datasets:281] [PID:174] [RANK:0] No seed provided, using default seed of 42\u001b[39m\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7cd0b85ebd204b7aba908417811ce4e0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "train.jsonl:   0%|          | 0.00/27.3M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "03b093d592ba4386aa61f7b8483da660",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating train split: 0 examples [00:00, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-05-08 13:41:04,196] [INFO] [axolotl.utils.data.sft.get_dataset_wrapper:484] [PID:174] [RANK:0] Loading dataset with base_type: chat_template and prompt_style: None\u001b[39m\n",
+      "[2025-05-08 13:41:04,233] [INFO] [axolotl.__call__:761] [PID:174] [RANK:0] Using chat template:\n",
+      "---\n",
+      "{%- if tools %}\n",
+      "    {{- '<|im_start|>system\\n' }}\n",
+      "    {%- if messages[0].role == 'system' %}\n",
+      "        {{- messages[0].content + '\\n\\n' }}\n",
+      "    {%- endif %}\n",
+      "    {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n",
+      "    {%- for tool in tools %}\n",
+      "        {{- \"\\n\" }}\n",
+      "        {{- tool | tojson }}\n",
+      "    {%- endfor %}\n",
+      "    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n",
+      "{%- else %}\n",
+      "    {%- if messages[0].role == 'system' %}\n",
+      "        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n",
+      "    {%- endif %}\n",
+      "{%- endif %}\n",
+      "{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n",
+      "{%- for message in messages[::-1] %}\n",
+      "    {%- set index = (messages|length - 1) - loop.index0 %}\n",
+      "    {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n",
+      "        {%- set ns.multi_step_tool = false %}\n",
+      "        {%- set ns.last_query_index = index %}\n",
+      "    {%- endif %}\n",
+      "{%- endfor %}\n",
+      "{%- for message in messages %}\n",
+      "    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n",
+      "        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n",
+      "    {%- elif message.role == \"assistant\" %}\n",
+      "        {%- set content = message.content %}\n",
+      "        {%- set reasoning_content = '' %}\n",
+      "        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n",
+      "            {%- set reasoning_content = message.reasoning_content %}\n",
+      "        {%- else %}\n",
+      "            {%- if '</think>' in message.content %}\n",
+      "                {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n",
+      "                {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n",
+      "            {%- endif %}\n",
+      "        {%- endif %}\n",
+      "        {%- if loop.index0 > ns.last_query_index %}\n",
+      "            {%- if loop.last or (not loop.last and reasoning_content) %}\n",
+      "                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n",
+      "            {%- else %}\n",
+      "                {{- '<|im_start|>' + message.role + '\\n' + content }}\n",
+      "            {%- endif %}\n",
+      "        {%- else %}\n",
+      "            {{- '<|im_start|>' + message.role + '\\n' + content }}\n",
+      "        {%- endif %}\n",
+      "        {%- if message.tool_calls %}\n",
+      "            {%- for tool_call in message.tool_calls %}\n",
+      "                {%- if (loop.first and content) or (not loop.first) %}\n",
+      "                    {{- '\\n' }}\n",
+      "                {%- endif %}\n",
+      "                {%- if tool_call.function %}\n",
+      "                    {%- set tool_call = tool_call.function %}\n",
+      "                {%- endif %}\n",
+      "                {{- '<tool_call>\\n{\"name\": \"' }}\n",
+      "                {{- tool_call.name }}\n",
+      "                {{- '\", \"arguments\": ' }}\n",
+      "                {%- if tool_call.arguments is string %}\n",
+      "                    {{- tool_call.arguments }}\n",
+      "                {%- else %}\n",
+      "                    {{- tool_call.arguments | tojson }}\n",
+      "                {%- endif %}\n",
+      "                {{- '}\\n</tool_call>' }}\n",
+      "            {%- endfor %}\n",
+      "        {%- endif %}\n",
+      "        {{- '<|im_end|>\\n' }}\n",
+      "    {%- elif message.role == \"tool\" %}\n",
+      "        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n",
+      "            {{- '<|im_start|>user' }}\n",
+      "        {%- endif %}\n",
+      "        {{- '\\n<tool_response>\\n' }}\n",
+      "        {{- message.content }}\n",
+      "        {{- '\\n</tool_response>' }}\n",
+      "        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n",
+      "            {{- '<|im_end|>\\n' }}\n",
+      "        {%- endif %}\n",
+      "    {%- endif %}\n",
+      "{%- endfor %}\n",
+      "{%- if add_generation_prompt %}\n",
+      "    {{- '<|im_start|>assistant\\n' }}\n",
+      "    {%- if enable_thinking is defined and enable_thinking is false %}\n",
+      "        {{- '<think>\\n\\n</think>\\n\\n' }}\n",
+      "    {%- endif %}\n",
+      "{%- endif %}\n",
+      "---\u001b[39m\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "258b7c635c1045329d4669e48c46ccd5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Tokenizing Prompts (num_proc=2):   0%|          | 0/9985 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-05-08 13:42:09,195] [INFO] [axolotl.utils.data.utils.drop_long_seq_in_dataset:177] [PID:174] [RANK:0] min_input_len: 23\u001b[39m\n",
+      "[2025-05-08 13:42:09,196] [INFO] [axolotl.utils.data.utils.drop_long_seq_in_dataset:179] [PID:174] [RANK:0] max_input_len: 3380\u001b[39m\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0f6907ebbc6242c8bde059cef1e1bd29",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Dropping Long Sequences (num_proc=2):   0%|          | 0/9985 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ef0a3c7a6f14460fb4da096928ae249e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Drop Samples with Zero Trainable Tokens (num_proc=2):   0%|          | 0/9985 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dc892a596f6942d7973c616c38f0eebb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Add position_id column (Sample Packing) (num_proc=2):   0%|          | 0/9985 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-05-08 13:42:21,651] [INFO] [axolotl.utils.data.sft.load_tokenized_prepared_datasets:351] [PID:174] [RANK:0] Saving merged prepared dataset to disk... last_run_prepared/97037817611d38b3a9c681753c3c4c95\u001b[39m\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7c2485c6cdfe463da6fdb35982a1070d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Saving the dataset (0/1 shards):   0%|          | 0/9985 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-05-08 13:42:25,711] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:411] [PID:174] [RANK:0] gather_len_batches: [1540]\u001b[39m\n",
+      "[2025-05-08 13:42:25,714] [INFO] [axolotl.calc_sample_packing_eff_est:491] [PID:174] [RANK:0] sample_packing_eff_est across ranks: [0.9987832601968344]\u001b[39m\n"
+     ]
+    }
+   ],
+   "source": [
+    "from axolotl.common.datasets import load_datasets\n",
+    "\n",
+    "# Load, parse and tokenize the datasets to be formatted with qwen3 chat template\n",
+    "# Drop long samples from the dataset that overflow the max sequence length\n",
+    "dataset_meta = load_datasets(cfg=cfg)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "mrSNfHpk0EAe"
+   },
+   "source": [
+    "# Training\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 1000,
+     "referenced_widgets": [
+      "004d9177a6a14118a5930dc3cc13147b",
+      "a80410b919e442c49aea15acc1ce1a72",
+      "c6e00f5224364822bc4239b176686919",
+      "ec11d1e5ae7b42c883d9b1f38a65356e",
+      "734185351eb543fa9a00a881dcbb9fe7",
+      "fa1282ccc7544e4f818e2f03ccffe4a5",
+      "bbbf575d2a4b4c6ea8389be79b2a6039",
+      "2a51b36be41745468e4c2d7a21b1c0d2",
+      "4fd114abe9f5494ab59858949f5055f1",
+      "936d04b5fe1b4c63bf0b080e423d051b",
+      "f1cef8e8dc2646fb9fd09f3b09081074",
+      "cdebbc55a1164c018546c2ac6f8c620c",
+      "a44f630e099e43899f20a77084ae60cd",
+      "c3725c7f79fe415fbd1ea336f0cc9cf1",
+      "0e50870ed0c643e0b6c18cc5d7ddae7f",
+      "c33ced495f70464aa4a3a91922090853",
+      "ed5ca967ad5342929e578ac6aa4dc4c0",
+      "af401d117d5047629d3a6e2361757b62",
+      "b191ac001a2e4962bc9a245fcdf26e6b",
+      "054c8dffadba48c6b895a6cc62448ecc",
+      "bfcdbba993b74972a9e3e575f86908ff",
+      "6ebb2ec171414e47a14765505f64bb3c",
+      "500e272208a246089613bf788a165271",
+      "200df5e79b9244849e589ecb0250a520",
+      "cc94432d08464affa3e58b560bdad194",
+      "3036608c71904ce9ae4bb2a9fa8802d9",
+      "adacfdcc1b0140efac56918e9ccf064e",
+      "f4a1795dc7514a718f478245f521f0ba",
+      "5e746eb25bbe416fb585fa24e79f5177",
+      "b5b65414154544aa8a71b1a39164aad7",
+      "f0a58fbd0fca4340890041f99fa2f8c8",
+      "5ca6be24acb548cea130bd58e9954c7c",
+      "5cfb02ee044b4011a378efa8b54a370f",
+      "4d05314858354e729d76094b3b0ce761",
+      "c42acf646f344a88b8c11f81e67f7206",
+      "7be6f04c284e4326bb4ff3d301e7b3c6",
+      "ffdbb12a2f2c4d14911685e7683e0ef0",
+      "bee3501b2a17427784a717e50a85e7fa",
+      "8bc9d8ba866c442b9118d9630009939c",
+      "9f56a2d9979c4bd8928c644c22c3ecdf",
+      "9503a45960984adc97b58e16c50662e0",
+      "da6e93f3e4984780b930fe7a706983ea",
+      "ab93eabd7cea4b94b4b7a387f101e8a1",
+      "704f2f5a9b1c49d5a75a0025a5dda11b",
+      "dd0e646fad3f4a89ba23b39d162bd8d9",
+      "d43c6df07ddb466587807d6dbe1ff614",
+      "e0e8b840b8ea4d0d9db09afe99fa287d",
+      "9327977822be4b1294f80e876552e305",
+      "77304d1a46b3468a98483e02ec0ac4a4",
+      "8c4d4fc5a30f4e7cb3be53fe2adda33d",
+      "e90658f4bcb642baa78426012f863152",
+      "f7434f3e03124a1c938a39af79d7fa59",
+      "c1314f241a434c41b45d84dc4d3b30f8",
+      "37de928300e34184881039378bd75e7f",
+      "0e936d9dbf9c4fdd86bbfe9730dedc47",
+      "e21e180307e5485cbbe908672fd6639a",
+      "2e2b0c1599c341a198f632f46a40c90e",
+      "bff139df987d4a62abec6456cb27f3d4",
+      "ebe1cc366d324ad59b264c8b3c431441",
+      "114dece49dba437c8572ef94b23c3b1e",
+      "be724f04b03942b2a033a7e8898bb4fd",
+      "fcbab4d8dced41a18dfccce81e3a45a0",
+      "c1f9c267ba3f40039cdb5eb3267e8043",
+      "33b3b1d0295646edaac7b4822761aeb0",
+      "fba7aa824b38467ab3061b226114cdec",
+      "f3075dccbd2747b4a7913b66f44f2596",
+      "fe18bba7f3fb4c31bf840541f36b3425",
+      "fd4f333f7ece4450b04e1a9af1f9d2f6",
+      "f60a2bdb6b6b4e0e8c3508580e247132",
+      "c0892a1881de4eb4bfabc6a68f87ae99",
+      "1bec6297c90242a88672d195bc09d429",
+      "d1f9b10c130542f094c8fd3d1e23b5e9",
+      "e575d87a7efe4ec7b1efde489839d4a6",
+      "edc99591b9c747b689b94d0052fec14c",
+      "35cc989ca3374e7dba0cb166febc4bde",
+      "158c8b85dbf34de6a94b4e35e2fc7d5a",
+      "0b4c9753a7cb4354b8e5f187e6e1ad7c",
+      "4471ff62258549fba9514bb67050f965",
+      "9cd5211b5d8b457aa0002f1d17b80028",
+      "19127c7bb1554ccbac877059f9a82db0",
+      "f4667818b9d34a09891cd727a429a610",
+      "9ed02dc43412471a9ab47f3620ccf3a5",
+      "6932489232ec4ab18a160b1e7fbcdfe1",
+      "4540927d98f54466b434ba4c0edf045d",
+      "e400cbf14bcc446a9d33b210cd93550b",
+      "71002199df6b40c9a1ac40df5fb27a1b",
+      "4b27c267393640f28f6eae0875bd2ed9",
+      "9858cb74a09748a39e8149baac96702c",
+      "eb1c9535e6a546098b760528b2ea387c",
+      "18357b321ce44d7b8bd9d1c886f69275",
+      "279937fe03bc4e4eb25b472d7e9df163",
+      "bca2c7185b6749fd899c06a2ba4c5e46",
+      "1f7d30f71bbd4547a9150d21da071055",
+      "e366ae3fceec4566b9ed303d6c5f90af",
+      "5dd7d150dbe04f08b165ce7f2c27cd11",
+      "b634bb73cfa743d09a5999101b840976",
+      "742b1030acfd414bbd9d5327b7e3826d",
+      "0f480e3a0b0a45d2a2d2dec3cad923f3",
+      "fcb30372e7404c5d8a1ad4df91e6c7b2",
+      "2860e3bb3baf4f7da058465850e800c5",
+      "3efd18ea8eaa41918894883da9541bfa",
+      "e09f1bcbb9d94c09be53e5e1303642c2",
+      "82177df57a494de8900c14c2f5185175",
+      "ccfcdc95baf646f8aeb3d516742383f2",
+      "8f5bd719974e41c3a8dd9a5b0d3d71e6",
+      "b87c84de30e84b3abf4871461fb9cbd3",
+      "e7d8e4fe58384e93a106de546068c65e",
+      "0aa8ab56b85f4171a79c3bc210594025",
+      "67da6c4260574869aa24c3cbc1bc1654",
+      "94b9088614464f60a203de39dbcae853",
+      "fea1b70fb46745feb5111b3929175b5d",
+      "f365820a3d3c42b2948abfe32065de14",
+      "823f1c78f15043e38bbd4dca3932a86a",
+      "a1959759c5424da9961fb2a308d4dee4",
+      "34c9c0137b504cd799c6bd6de69507c2",
+      "735d4f225b24414294fc1b213c61223c",
+      "5e5e15b0569b474c9620083b3ec6af55",
+      "03a3c744d716431488163b4358b80f92",
+      "a5434ee714f9498d83870544b67c0cb7",
+      "3aaecbf540f54a2db9ab0931e3b1fe57",
+      "9e333ed3b5014069ac1dd969255dd591"
+     ]
+    },
+    "id": "IwrpurmloGOy",
+    "outputId": "84fa167f-ba27-4255-d508-dc9df56ad39b"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "     #@@ #@@      @@# @@#\n",
+      "    @@  @@          @@  @@           =@@#                               @@                 #@    =@@#.\n",
+      "    @@    #@@@@@@@@@    @@           #@#@=                              @@                 #@     .=@@\n",
+      "      #@@@@@@@@@@@@@@@@@            =@# @#     ##=     ##    =####=+    @@      =#####+  =#@@###.   @@\n",
+      "    @@@@@@@@@@/  +@@/  +@@          #@  =@=     #@=   @@   =@#+  +#@#   @@    =@#+  +#@#   #@.      @@\n",
+      "    @@@@@@@@@@  ##@@  ##@@         =@#   @#      =@# @#    @@      @@   @@    @@      #@   #@       @@\n",
+      "     @@@@@@@@@@@@@@@@@@@@          #@=+++#@=      =@@#     @@      @@   @@    @@      #@   #@       @@\n",
+      "                                  =@#=====@@     =@# @#    @@      @@   @@    @@      #@   #@       @@\n",
+      "    @@@@@@@@@@@@@@@@  @@@@        #@      #@=   #@=  +@@   #@#    =@#   @@.   =@#    =@#   #@.      @@\n",
+      "                                 =@#       @#  #@=     #@   =#@@@@#=    +#@@=  +#@@@@#=    .##@@+   @@\n",
+      "    @@@@  @@@@@@@@@@@@@@@@\n",
+      "\n",
+      "[2025-05-07 22:08:14,344] [INFO] [axolotl.monkeypatch.peft.utils.patch_peft_prep_code:76] [PID:1336] [RANK:0] patching prepare_model_for_kbit_training to allow for overrides\u001b[39m\n",
+      "[2025-05-07 22:08:14,549] [INFO] [axolotl.integrations.cut_cross_entropy.pre_model_load:80] [PID:1336] [RANK:0] Applying Cut Cross Entropy to model type: qwen3\u001b[39m\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "004d9177a6a14118a5930dc3cc13147b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model.safetensors.index.json:   0%|          | 0.00/36.5k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "cdebbc55a1164c018546c2ac6f8c620c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00001-of-00008.safetensors:   0%|          | 0.00/3.84G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "500e272208a246089613bf788a165271",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00002-of-00008.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4d05314858354e729d76094b3b0ce761",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00003-of-00008.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dd0e646fad3f4a89ba23b39d162bd8d9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00004-of-00008.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e21e180307e5485cbbe908672fd6639a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00005-of-00008.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fe18bba7f3fb4c31bf840541f36b3425",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00006-of-00008.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4471ff62258549fba9514bb67050f965",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00007-of-00008.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "eb1c9535e6a546098b760528b2ea387c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00008-of-00008.safetensors:   0%|          | 0.00/1.91G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-05-07 22:09:49,798] [INFO] [accelerate.utils.modeling.get_balanced_memory:990] [PID:1336] We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2860e3bb3baf4f7da058465850e800c5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fea1b70fb46745feb5111b3929175b5d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-05-07 22:11:37,521] [INFO] [axolotl.utils.models.load_model:1302] [PID:1336] [RANK:0] cuda memory usage after model load: 9.264GB (+1.721GB cache, +0.375GB misc)\u001b[39m\n",
+      "[2025-05-07 22:11:37,532] [INFO] [axolotl.utils.models.prepare_model:1205] [PID:1336] [RANK:0] converting PEFT model w/ prepare_model_for_kbit_training\u001b[39m\n",
+      "[2025-05-07 22:11:37,537] [INFO] [axolotl.utils.models.load_model:1341] [PID:1336] [RANK:0] Converting modules to torch.float16\u001b[39m\n",
+      "trainable params: 128,450,560 || all params: 14,896,757,760 || trainable%: 0.8623\n",
+      "[2025-05-07 22:11:40,170] [INFO] [axolotl.utils.models.load_model:1402] [PID:1336] [RANK:0] cuda memory usage after adapters: 9.743GB (+1.476GB cache, +0.375GB misc)\u001b[39m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/axolotl/core/trainers/base.py:64: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `AxolotlTrainer.__init__`. Use `processing_class` instead.\n",
+      "  super().__init__(*_args, **kwargs)\n",
+      "No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-05-07 22:11:41,755] [INFO] [axolotl.train.save_initial_configs:359] [PID:1336] [RANK:0] Pre-saving adapter config to ./outputs/qwen-sft-pirate-rrr...\u001b[39m\n",
+      "[2025-05-07 22:11:41,756] [INFO] [axolotl.train.save_initial_configs:363] [PID:1336] [RANK:0] Pre-saving tokenizer to ./outputs/qwen-sft-pirate-rrr...\u001b[39m\n",
+      "[2025-05-07 22:11:41,974] [INFO] [axolotl.train.save_initial_configs:366] [PID:1336] [RANK:0] Pre-saving model config to ./outputs/qwen-sft-pirate-rrr...\u001b[39m\n",
+      "[2025-05-07 22:11:41,982] [INFO] [axolotl.train.execute_training:211] [PID:1336] [RANK:0] Starting trainer...\u001b[39m\n",
+      "[2025-05-07 22:11:45,047] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:411] [PID:1336] [RANK:0] gather_len_batches: [1540]\u001b[39m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "You're using a Qwen2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n",
+      "You're using a Qwen2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='25' max='25' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [25/25 09:25, Epoch 0/1]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Step</th>\n",
+       "      <th>Training Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>1.092300</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2</td>\n",
+       "      <td>1.554200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3</td>\n",
+       "      <td>1.041400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>4</td>\n",
+       "      <td>1.733800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>5</td>\n",
+       "      <td>1.430000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>6</td>\n",
+       "      <td>1.258500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>7</td>\n",
+       "      <td>1.343600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>8</td>\n",
+       "      <td>1.101700</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>9</td>\n",
+       "      <td>1.086500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>10</td>\n",
+       "      <td>0.813200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>11</td>\n",
+       "      <td>0.689600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>12</td>\n",
+       "      <td>0.826700</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>13</td>\n",
+       "      <td>1.541800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>14</td>\n",
+       "      <td>0.948000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>15</td>\n",
+       "      <td>1.357000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>16</td>\n",
+       "      <td>1.085800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>17</td>\n",
+       "      <td>1.516800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>18</td>\n",
+       "      <td>1.146800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>19</td>\n",
+       "      <td>0.834800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>20</td>\n",
+       "      <td>0.968000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>21</td>\n",
+       "      <td>1.388800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>22</td>\n",
+       "      <td>1.511500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>23</td>\n",
+       "      <td>1.338500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>24</td>\n",
+       "      <td>1.206600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>25</td>\n",
+       "      <td>1.504600</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-05-07 22:12:42,746] [INFO] [axolotl.callbacks.on_step_end:128] [PID:1336] [RANK:0] cuda memory usage while training: 9.768GB (+3.287GB cache, +0.646GB misc)\u001b[39m\n",
+      "[2025-05-07 22:21:46,859] [INFO] [axolotl.train.save_trained_model:231] [PID:1336] [RANK:0] Training completed! Saving pre-trained model to ./outputs/qwen-sft-pirate-rrr.\u001b[39m\n"
+     ]
+    }
+   ],
+   "source": [
+    "from axolotl.train import train\n",
+    "\n",
+    "# just train the first 25 steps for demo.\n",
+    "# This is sufficient to align the model as we've used packing to maximize the trainable samples per step.\n",
+    "cfg.max_steps = 25\n",
+    "model, tokenizer, trainer = train(cfg=cfg, dataset_meta=dataset_meta)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "j1b9ypF78eCb"
+   },
+   "source": [
+    "# Inferencing the trained model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "r3_vHhif8YEs",
+    "outputId": "e5050605-f6c9-421c-98f9-bde56a281eae"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ahoy there, matey! Shiver me timbers, ye be lookin' for the Pythagorean theorem, eh? Well, hold yer horses and listen up, for I'll be tellin' ye all about it in me own special way.\n",
+      "\n",
+      "The Pythagorean theorem be a real gem of a mathematical trick that helps ye find the length of a side of a right triangle. Now, a right triangle be a triangle with a right angle, which be that little corner that looks like a square. \n",
+      "\n",
+      "The theorem be named after a clever fellow named Pythagoras, who be a mathematician from ancient Greece. He discovered that if ye have a right triangle, the square of the length of the hypotenuse (that be the side opposite the right angle) be equal to the sum of the squares of the other two sides. \n",
+      "\n",
+      "In other words, if ye have a triangle with sides of length a, b, and c (\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import TextStreamer\n",
+    "\n",
+    "messages = [\n",
+    "    {\n",
+    "        \"role\": \"user\",\n",
+    "        \"content\": \"Explain the Pythagorean theorem to me.\",\n",
+    "    },\n",
+    "]\n",
+    "\n",
+    "prompt = tokenizer.apply_chat_template(\n",
+    "    messages,\n",
+    "    add_generation_prompt=True,\n",
+    "    tokenize=False,\n",
+    "    enable_thinking=False,\n",
+    ")\n",
+    "\n",
+    "outputs = model.generate(\n",
+    "    **tokenizer(prompt, return_tensors=\"pt\").to(\"cuda\"),\n",
+    "    max_new_tokens=192,\n",
+    "    temperature=1.0,\n",
+    "    top_p=0.8,\n",
+    "    top_k=32,\n",
+    "    streamer=TextStreamer(tokenizer, skip_prompt=True),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "HoGwT2JRSIjA"
+   },
+   "source": [
+    "# Saving your trained model\n",
+    "\n",
+    "Axolotl automatically saves checkpoints to the `output_dir` path.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "5BmSbiy6NaaS",
+    "outputId": "f5e1d913-7d55-42d2-8340-f9f1b0bc2b38"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 506M\n",
+      "-rw-r--r-- 1 root root  845 May  7 22:21 adapter_config.json\n",
+      "-rw-r--r-- 1 root root 491M May  7 22:21 adapter_model.safetensors\n",
+      "-rw-r--r-- 1 root root  707 May  7 22:11 added_tokens.json\n",
+      "drwxr-xr-x 2 root root 4.0K May  7 22:17 checkpoint-13\n",
+      "drwxr-xr-x 2 root root 4.0K May  7 22:21 checkpoint-25\n",
+      "-rw-r--r-- 1 root root 1.2K May  7 22:11 config.json\n",
+      "-rw-r--r-- 1 root root 1.6M May  7 22:11 merges.txt\n",
+      "-rw-r--r-- 1 root root 2.6K May  7 22:21 README.md\n",
+      "-rw-r--r-- 1 root root  613 May  7 22:11 special_tokens_map.json\n",
+      "-rw-r--r-- 1 root root 9.5K May  7 22:11 tokenizer_config.json\n",
+      "-rw-r--r-- 1 root root  11M May  7 22:11 tokenizer.json\n",
+      "-rw-r--r-- 1 root root 2.7M May  7 22:11 vocab.json\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Show the saved checkpoints in the output_dir\n",
+    "!ls -lh \"./outputs/qwen-sft-pirate-rrr\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "_PCIFWxuOZd6"
+   },
+   "source": [
+    "Setting `hub_model_id: ` in the original config would have automatically uploaded the model to HuggingFace Hub (e.g. `hub_model_id: username/model_id`)\n",
+    "\n",
+    "If you prefer to manually upload the training artifacts, we can still upload the entire final checkpoint to HuggingFace from the CLI."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 955,
+     "referenced_widgets": [
+      "c12ea43372ac4d57bb9605f1a429b397",
+      "86816687746246b4a6105e8010384e25",
+      "6f05e9bebf7b40c9835808e77de6c236",
+      "c7433acd3c4841e6958ae8f7e87b1808",
+      "19c1e38389fa46c7b7e2152a56e1df34",
+      "0e067d8db8ed48308a718d5f57683fd1",
+      "131065f118274a1586ac38e39ed84ef0",
+      "8640ac440fbc4644b9a3af7ba3ae7183",
+      "5cea7996f02040b187ece0bb2d6a8d1f",
+      "2e257c8be2da40b4bb67a9e4ab6811f3",
+      "56e3768bef5a4b9db4168c5c17f509c2",
+      "62c028fdef904dedb9cdeca2b3bda725",
+      "a7cf477e80fc43e0ad82c7997b076dce",
+      "835bcc28a5564fb9b3d651bc8e32dc46",
+      "9f1c9a0695384bdaa6f8b847ef89bee8",
+      "b1bea589efa14258a9982071b87938bf",
+      "590eef89881545aa8bbef9a8bbe7fb00",
+      "4b1f04ff63d14a118fdd15814dff50e4",
+      "39789237703c4a418134243055c9cbf5",
+      "a3a945817f684328b34651fe052393ec"
+     ]
+    },
+    "id": "2yw8pLvlSMl8",
+    "outputId": "6e489ab2-4abe-4e28-84ca-959f912433a4"
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c12ea43372ac4d57bb9605f1a429b397",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.\n",
+      "Start hashing 40 files.\n",
+      "Finished hashing 40 files.\n",
+      "Uploading files using Xet Storage..\n",
+      "Uploading...:  87% 1.82G/2.10G [00:23<00:04, 67.3MB/s]Cancellation requested; stopping current tasks.\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/_commit_api.py\", line 598, in _upload_xet_files\n",
+      "    upload_files(\n",
+      "RuntimeError: Xet Runtime Error: Task cancelled; possible runtime shutdown in progress (task 9 was cancelled).\n",
+      "\n",
+      "During handling of the above exception, another exception occurred:\n",
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/usr/local/bin/huggingface-cli\", line 8, in <module>\n",
+      "    sys.exit(main())\n",
+      "             ^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/commands/huggingface_cli.py\", line 57, in main\n",
+      "    service.run()\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/commands/upload.py\", line 207, in run\n",
+      "    print(self._upload())\n",
+      "          ^^^^^^^^^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/commands/upload.py\", line 302, in _upload\n",
+      "    return self.api.upload_folder(\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_validators.py\", line 114, in _inner_fn\n",
+      "    return fn(*args, **kwargs)\n",
+      "           ^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/hf_api.py\", line 1633, in _inner\n",
+      "    return fn(self, *args, **kwargs)\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/hf_api.py\", line 4942, in upload_folder\n",
+      "    commit_info = self.create_commit(\n",
+      "                  ^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_validators.py\", line 114, in _inner_fn\n",
+      "    return fn(*args, **kwargs)\n",
+      "           ^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/hf_api.py\", line 1633, in _inner\n",
+      "    return fn(self, *args, **kwargs)\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/hf_api.py\", line 4202, in create_commit\n",
+      "    self.preupload_lfs_files(\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/hf_api.py\", line 4483, in preupload_lfs_files\n",
+      "    _upload_xet_files(**upload_kwargs, create_pr=create_pr)  # type: ignore [arg-type]\n",
+      "    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_validators.py\", line 114, in _inner_fn\n",
+      "    return fn(*args, **kwargs)\n",
+      "           ^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/huggingface_hub/_commit_api.py\", line 592, in _upload_xet_files\n",
+      "    with progress_cm as progress:\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/tqdm/std.py\", line 1138, in __exit__\n",
+      "    def __exit__(self, exc_type, exc_value, traceback):\n",
+      "\n",
+      "KeyboardInterrupt\n",
+      "^C\n"
+     ]
+    }
+   ],
+   "source": [
+    "from huggingface_hub import notebook_login\n",
+    "\n",
+    "# remove the partial epoch checkpoints\n",
+    "!rm -rf \"./outputs/qwen-sft-pirate-rrr/checkpoint-*\"\n",
+    "\n",
+    "# HF Notebook login widget\n",
+    "notebook_login()\n",
+    "\n",
+    "# upload the LoRA adapter for your model to HF, remember to update the username/model-name below\n",
+    "!huggingface-cli upload --repo-type=model winglian/pirate-qwen-14B \"./outputs/qwen-sft-pirate-rrr\""
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "gpuType": "T4",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "00321cce58884f6f9b3855a21fcd9187": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "004d9177a6a14118a5930dc3cc13147b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_a80410b919e442c49aea15acc1ce1a72",
+       "IPY_MODEL_c6e00f5224364822bc4239b176686919",
+       "IPY_MODEL_ec11d1e5ae7b42c883d9b1f38a65356e"
+      ],
+      "layout": "IPY_MODEL_734185351eb543fa9a00a881dcbb9fe7"
+     }
+    },
+    "0077aedc3d174560bce924ee89e9c006": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "03a3c744d716431488163b4358b80f92": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "03b093d592ba4386aa61f7b8483da660": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_b8766a88716948cf968f4563531a76d9",
+       "IPY_MODEL_6f3a28b912714c6e931003549664bfa3",
+       "IPY_MODEL_16d1283741404b7bb319094c992fce01"
+      ],
+      "layout": "IPY_MODEL_2a5bb0e818ab47be8cf6465988328503"
+     }
+    },
+    "042e091f75694c47aee761e760e76773": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "0546d04aae644dde846c58a4afb598a6": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "054c8dffadba48c6b895a6cc62448ecc": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "07fb3a2c8315494e97b447e672dfae06": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_12815f401eba44658caa7b2e490137a8",
+      "placeholder": "​",
+      "style": "IPY_MODEL_30e02aa2d0d241979369e598287f2639",
+      "value": "Drop Samples with Zero Trainable Tokens (num_proc=2): 100%"
+     }
+    },
+    "083f9cda8d754c168beee10d2f8955a2": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_a0a11e929edd4189b79723d618522c33",
+      "max": 728,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_e87ea87fcff247b5bbcc331ba79a8dc2",
+      "value": 728
+     }
+    },
+    "09007681cf8d42aeb8c1d2f6a74e470a": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_b195f160ca20442fadd8b5aed0ee41af",
+      "placeholder": "​",
+      "style": "IPY_MODEL_ca65e32eb52f48c09a84b33cb18f22cd",
+      "value": " 11.4M/11.4M [00:00&lt;00:00, 21.8MB/s]"
+     }
+    },
+    "0a46ad75c198463d843fb35e813642cb": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_b8e39e4dddc3497fbc29ae45c66da759",
+      "max": 11422654,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_63b4e563e85c4f03b1b72beda9577bcc",
+      "value": 11422654
+     }
+    },
+    "0aa8ab56b85f4171a79c3bc210594025": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "0b4c9753a7cb4354b8e5f187e6e1ad7c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "0cd7efffbb3c4c4b972e63749f61ab97": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "0dea5caa27384f5689e3cab51f558727": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "0e067d8db8ed48308a718d5f57683fd1": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_b1bea589efa14258a9982071b87938bf",
+      "placeholder": "​",
+      "style": "IPY_MODEL_590eef89881545aa8bbef9a8bbe7fb00",
+      "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
+     }
+    },
+    "0e50870ed0c643e0b6c18cc5d7ddae7f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_bfcdbba993b74972a9e3e575f86908ff",
+      "placeholder": "​",
+      "style": "IPY_MODEL_6ebb2ec171414e47a14765505f64bb3c",
+      "value": " 3.84G/3.84G [00:09&lt;00:00, 664MB/s]"
+     }
+    },
+    "0e936d9dbf9c4fdd86bbfe9730dedc47": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "0f417447a7bd4a33acca96fa37aec877": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "0f480e3a0b0a45d2a2d2dec3cad923f3": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "0f6907ebbc6242c8bde059cef1e1bd29": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_5bdfd87fc6cd4f9dabef7cfee29c8060",
+       "IPY_MODEL_64f54d4a744a4627a07c3c0120276f3b",
+       "IPY_MODEL_65b75b9b8bc143cf997796af68ff6668"
+      ],
+      "layout": "IPY_MODEL_d6fe74e4255444368f8f90a62157d869"
+     }
+    },
+    "114dece49dba437c8572ef94b23c3b1e": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "12815f401eba44658caa7b2e490137a8": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "12b56912736849fea2ad8124456fdc5c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_97e36007e1304e1583fd81bfb13f0edd",
+      "max": 1671853,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_c65dc74c7d6f4bab8f7dd28455161dd8",
+      "value": 1671853
+     }
+    },
+    "131065f118274a1586ac38e39ed84ef0": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": "center",
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": "flex",
+      "flex": null,
+      "flex_flow": "column",
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": "50%"
+     }
+    },
+    "158c8b85dbf34de6a94b4e35e2fc7d5a": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "16a188a0b06d45f980dcf3933509fe0a": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_349eee9f56d64f0cba6fc24ff2c50c9b",
+      "placeholder": "​",
+      "style": "IPY_MODEL_7e5d3774060e4589aa65982da5ea4ef4",
+      "value": " 9985/9985 [00:04&lt;00:00, 2604.11 examples/s]"
+     }
+    },
+    "16d1283741404b7bb319094c992fce01": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_a4e5789584564049b83df7c6c54a3e08",
+      "placeholder": "​",
+      "style": "IPY_MODEL_ff3a94b146a948b6907f5d80c7157f99",
+      "value": " 9985/0 [00:00&lt;00:00, 50763.46 examples/s]"
+     }
+    },
+    "1811cda0644e4190a9469d1774435d82": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "18357b321ce44d7b8bd9d1c886f69275": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_e366ae3fceec4566b9ed303d6c5f90af",
+      "placeholder": "​",
+      "style": "IPY_MODEL_5dd7d150dbe04f08b165ce7f2c27cd11",
+      "value": "model-00008-of-00008.safetensors: 100%"
+     }
+    },
+    "19127c7bb1554ccbac877059f9a82db0": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "danger",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_e400cbf14bcc446a9d33b210cd93550b",
+      "max": 3963750880,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_71002199df6b40c9a1ac40df5fb27a1b",
+      "value": 3963750502
+     }
+    },
+    "19c1e38389fa46c7b7e2152a56e1df34": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ButtonModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ButtonModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ButtonView",
+      "button_style": "",
+      "description": "Login",
+      "disabled": false,
+      "icon": "",
+      "layout": "IPY_MODEL_835bcc28a5564fb9b3d651bc8e32dc46",
+      "style": "IPY_MODEL_9f1c9a0695384bdaa6f8b847ef89bee8",
+      "tooltip": ""
+     }
+    },
+    "1bec6297c90242a88672d195bc09d429": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "1c6f1f10667545aaab958016ba7e2c94": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "1d5117195d4b49eb8f1a73b18419f7ce": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_0dea5caa27384f5689e3cab51f558727",
+      "placeholder": "​",
+      "style": "IPY_MODEL_a6f48410b9964fefba0c3009a77dc838",
+      "value": " 9.68k/9.68k [00:00&lt;00:00, 812kB/s]"
+     }
+    },
+    "1f7d30f71bbd4547a9150d21da071055": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "200df5e79b9244849e589ecb0250a520": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_f4a1795dc7514a718f478245f521f0ba",
+      "placeholder": "​",
+      "style": "IPY_MODEL_5e746eb25bbe416fb585fa24e79f5177",
+      "value": "model-00002-of-00008.safetensors: 100%"
+     }
+    },
+    "20352e5f58d24bb8b1f3940efd14fe4a": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "253017b0d0534e54ab44e181f6d7c82d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_1c6f1f10667545aaab958016ba7e2c94",
+      "placeholder": "​",
+      "style": "IPY_MODEL_e6e969610738449887259063967f82b0",
+      "value": " 2.78M/2.78M [00:00&lt;00:00, 17.8MB/s]"
+     }
+    },
+    "258b7c635c1045329d4669e48c46ccd5": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_6f68ed9889f54ad2ae8a3b95ac263a83",
+       "IPY_MODEL_80366349d81e4dcc892db6cd56e384f3",
+       "IPY_MODEL_c73055099c084dca996159e23e162d0b"
+      ],
+      "layout": "IPY_MODEL_977f799afaac4a55b2dc1cffa7d5b63b"
+     }
+    },
+    "279937fe03bc4e4eb25b472d7e9df163": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "danger",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_b634bb73cfa743d09a5999101b840976",
+      "max": 1912371880,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_742b1030acfd414bbd9d5327b7e3826d",
+      "value": 1912371698
+     }
+    },
+    "27beaf06e41b472abdb544a43c720c5a": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "2860e3bb3baf4f7da058465850e800c5": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_3efd18ea8eaa41918894883da9541bfa",
+       "IPY_MODEL_e09f1bcbb9d94c09be53e5e1303642c2",
+       "IPY_MODEL_82177df57a494de8900c14c2f5185175"
+      ],
+      "layout": "IPY_MODEL_ccfcdc95baf646f8aeb3d516742383f2"
+     }
+    },
+    "2a51b36be41745468e4c2d7a21b1c0d2": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "2a5bb0e818ab47be8cf6465988328503": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "2b3a2659b12244bd8548320320016dbf": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "2e257c8be2da40b4bb67a9e4ab6811f3": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "2e2b0c1599c341a198f632f46a40c90e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_be724f04b03942b2a033a7e8898bb4fd",
+      "placeholder": "​",
+      "style": "IPY_MODEL_fcbab4d8dced41a18dfccce81e3a45a0",
+      "value": "model-00005-of-00008.safetensors: 100%"
+     }
+    },
+    "3036608c71904ce9ae4bb2a9fa8802d9": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_5ca6be24acb548cea130bd58e9954c7c",
+      "placeholder": "​",
+      "style": "IPY_MODEL_5cfb02ee044b4011a378efa8b54a370f",
+      "value": " 3.96G/3.96G [00:10&lt;00:00, 531MB/s]"
+     }
+    },
+    "30a81da86f8043eca301e86a8651201a": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "30e02aa2d0d241979369e598287f2639": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "3225603166b54e7aab766b9964a2f660": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "33b3b1d0295646edaac7b4822761aeb0": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "349eee9f56d64f0cba6fc24ff2c50c9b": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "34c9c0137b504cd799c6bd6de69507c2": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "34cf3df51fbc41cabfdbba153c007f0e": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "35c811d2ae8e43f3b5cecbdd3cfa857f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "35cc989ca3374e7dba0cb166febc4bde": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "366a343b62fa47d8985a3bd464d99f9e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "37de928300e34184881039378bd75e7f": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "388f618924274d21a066f098f4f1e744": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_7c95f85a2b1f47a1bd846d110c47bb3c",
+       "IPY_MODEL_083f9cda8d754c168beee10d2f8955a2",
+       "IPY_MODEL_62e1a65582f446a78612eaa804e08a7d"
+      ],
+      "layout": "IPY_MODEL_487a177d020f4605834878b2fdc7afa3"
+     }
+    },
+    "39789237703c4a418134243055c9cbf5": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "3aaecbf540f54a2db9ab0931e3b1fe57": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "3c21e4a511b4441192c03b7f1d0976e9": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "3efd18ea8eaa41918894883da9541bfa": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_8f5bd719974e41c3a8dd9a5b0d3d71e6",
+      "placeholder": "​",
+      "style": "IPY_MODEL_b87c84de30e84b3abf4871461fb9cbd3",
+      "value": "Loading checkpoint shards: 100%"
+     }
+    },
+    "41f3b32c2f6b4034ae7a3b9124e28bc7": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "4471ff62258549fba9514bb67050f965": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_9cd5211b5d8b457aa0002f1d17b80028",
+       "IPY_MODEL_19127c7bb1554ccbac877059f9a82db0",
+       "IPY_MODEL_f4667818b9d34a09891cd727a429a610"
+      ],
+      "layout": "IPY_MODEL_9ed02dc43412471a9ab47f3620ccf3a5"
+     }
+    },
+    "4540927d98f54466b434ba4c0edf045d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "487a177d020f4605834878b2fdc7afa3": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "4b1f04ff63d14a118fdd15814dff50e4": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "LabelModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "LabelModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "LabelView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_39789237703c4a418134243055c9cbf5",
+      "placeholder": "​",
+      "style": "IPY_MODEL_a3a945817f684328b34651fe052393ec",
+      "value": "Connecting..."
+     }
+    },
+    "4b27c267393640f28f6eae0875bd2ed9": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "4c727d40ef0443449afc31724ee79f0c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "4d05314858354e729d76094b3b0ce761": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_c42acf646f344a88b8c11f81e67f7206",
+       "IPY_MODEL_7be6f04c284e4326bb4ff3d301e7b3c6",
+       "IPY_MODEL_ffdbb12a2f2c4d14911685e7683e0ef0"
+      ],
+      "layout": "IPY_MODEL_bee3501b2a17427784a717e50a85e7fa"
+     }
+    },
+    "4d468f96ec924681ad65eb671674b93e": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "4f1977d7e4824ef1a14b65f0f42bba10": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "4fd114abe9f5494ab59858949f5055f1": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "500e272208a246089613bf788a165271": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_200df5e79b9244849e589ecb0250a520",
+       "IPY_MODEL_cc94432d08464affa3e58b560bdad194",
+       "IPY_MODEL_3036608c71904ce9ae4bb2a9fa8802d9"
+      ],
+      "layout": "IPY_MODEL_adacfdcc1b0140efac56918e9ccf064e"
+     }
+    },
+    "519a7b154022443db6703f04a9142bae": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_d02274afd47b462291c745f261209d42",
+      "max": 27341251,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_0f417447a7bd4a33acca96fa37aec877",
+      "value": 27341251
+     }
+    },
+    "56e3768bef5a4b9db4168c5c17f509c2": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "590eef89881545aa8bbef9a8bbe7fb00": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "598da69727bd4fb8b1caf465ac736d7a": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "5bdfd87fc6cd4f9dabef7cfee29c8060": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_4d468f96ec924681ad65eb671674b93e",
+      "placeholder": "​",
+      "style": "IPY_MODEL_ad7599de524549c48bf2d3124ad4b299",
+      "value": "Dropping Long Sequences (num_proc=2): 100%"
+     }
+    },
+    "5ca240f31e6b44e3882c5eb37cd5a309": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": "20px"
+     }
+    },
+    "5ca6be24acb548cea130bd58e9954c7c": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "5cea7996f02040b187ece0bb2d6a8d1f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "5cfb02ee044b4011a378efa8b54a370f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "5dd7d150dbe04f08b165ce7f2c27cd11": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "5e18768f7ad6434ba8b8b8a2e853e204": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "5e5e15b0569b474c9620083b3ec6af55": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "5e746eb25bbe416fb585fa24e79f5177": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "5eb06edeb58e4930b1affef2a59eae81": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "5f86cd894de94c3280fadc1e2fd0ee13": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_a20927bf5f2c41f58c1e31ac858ab36c",
+       "IPY_MODEL_0a46ad75c198463d843fb35e813642cb",
+       "IPY_MODEL_09007681cf8d42aeb8c1d2f6a74e470a"
+      ],
+      "layout": "IPY_MODEL_ebc80d1a55fa47f4a5ea2756588569ec"
+     }
+    },
+    "60c1a0d765c14a1d888317e6a507e4ea": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "62c028fdef904dedb9cdeca2b3bda725": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "62e1a65582f446a78612eaa804e08a7d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_5e18768f7ad6434ba8b8b8a2e853e204",
+      "placeholder": "​",
+      "style": "IPY_MODEL_bb33aec33a6447078c31bfd728942994",
+      "value": " 728/728 [00:00&lt;00:00, 20.3kB/s]"
+     }
+    },
+    "62e302ebdad64aada0ffe64ae1c873f3": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "63580b6fb30642479fe3000915bf551a": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "63b4e563e85c4f03b1b72beda9577bcc": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "64f54d4a744a4627a07c3c0120276f3b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_0546d04aae644dde846c58a4afb598a6",
+      "max": 9985,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_897b77a56c09479bb11d7f2a30997e55",
+      "value": 9985
+     }
+    },
+    "65b75b9b8bc143cf997796af68ff6668": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_81c3db71ac704280ad030072655f1537",
+      "placeholder": "​",
+      "style": "IPY_MODEL_042e091f75694c47aee761e760e76773",
+      "value": " 9985/9985 [00:02&lt;00:00, 3977.47 examples/s]"
+     }
+    },
+    "67da6c4260574869aa24c3cbc1bc1654": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "6932489232ec4ab18a160b1e7fbcdfe1": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "6ebb2ec171414e47a14765505f64bb3c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "6f05e9bebf7b40c9835808e77de6c236": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "PasswordModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "PasswordModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "PasswordView",
+      "continuous_update": true,
+      "description": "Token:",
+      "description_tooltip": null,
+      "disabled": false,
+      "layout": "IPY_MODEL_2e257c8be2da40b4bb67a9e4ab6811f3",
+      "placeholder": "​",
+      "style": "IPY_MODEL_56e3768bef5a4b9db4168c5c17f509c2",
+      "value": ""
+     }
+    },
+    "6f3a28b912714c6e931003549664bfa3": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_5ca240f31e6b44e3882c5eb37cd5a309",
+      "max": 1,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_5eb06edeb58e4930b1affef2a59eae81",
+      "value": 1
+     }
+    },
+    "6f68ed9889f54ad2ae8a3b95ac263a83": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_41f3b32c2f6b4034ae7a3b9124e28bc7",
+      "placeholder": "​",
+      "style": "IPY_MODEL_a10d0a76010f4e508c65a9b69ebc5156",
+      "value": "Tokenizing Prompts (num_proc=2): 100%"
+     }
+    },
+    "704f2f5a9b1c49d5a75a0025a5dda11b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "71002199df6b40c9a1ac40df5fb27a1b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "71c8af139cd248b1b51101fd46a93f35": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_d0e9dce55cec4c1ca619a0ccf209d924",
+      "max": 9675,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_4c727d40ef0443449afc31724ee79f0c",
+      "value": 9675
+     }
+    },
+    "734185351eb543fa9a00a881dcbb9fe7": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "735d4f225b24414294fc1b213c61223c": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "742b1030acfd414bbd9d5327b7e3826d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "77304d1a46b3468a98483e02ec0ac4a4": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "7baeab52d6694c32b1efd1ea1a0a7782": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_93a44a11aa4846fa8efc6c1413ef1627",
+      "placeholder": "​",
+      "style": "IPY_MODEL_a55060adc3564407ac81ad7297d34aaa",
+      "value": "train.jsonl: 100%"
+     }
+    },
+    "7be6f04c284e4326bb4ff3d301e7b3c6": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "danger",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_9503a45960984adc97b58e16c50662e0",
+      "max": 3963750880,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_da6e93f3e4984780b930fe7a706983ea",
+      "value": 3963750502
+     }
+    },
+    "7c2485c6cdfe463da6fdb35982a1070d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_ad1236893754446881e153adc9d5c962",
+       "IPY_MODEL_daee63fd167e4441a32324b51b00ad2b",
+       "IPY_MODEL_fe41858c6bd04c58840112b67c19a336"
+      ],
+      "layout": "IPY_MODEL_d262c82138024169b9f3aa034ca756fa"
+     }
+    },
+    "7c95f85a2b1f47a1bd846d110c47bb3c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_7fd44cf9ca6e4726bfd7ac21846d6a14",
+      "placeholder": "​",
+      "style": "IPY_MODEL_366a343b62fa47d8985a3bd464d99f9e",
+      "value": "config.json: 100%"
+     }
+    },
+    "7cd0b85ebd204b7aba908417811ce4e0": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_7baeab52d6694c32b1efd1ea1a0a7782",
+       "IPY_MODEL_519a7b154022443db6703f04a9142bae",
+       "IPY_MODEL_d4183e9715f34d249942b8271cca3bdf"
+      ],
+      "layout": "IPY_MODEL_da2347ac94764a3fa2743343cf0d3cd2"
+     }
+    },
+    "7e5d3774060e4589aa65982da5ea4ef4": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "7fd44cf9ca6e4726bfd7ac21846d6a14": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "80366349d81e4dcc892db6cd56e384f3": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_f8ef805b776145c3bfa9ba8d90972058",
+      "max": 9985,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_cc587493c33c4f118d1b1170f85be24c",
+      "value": 9985
+     }
+    },
+    "813621384dc748b0ad06775e22761c0b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "81c3db71ac704280ad030072655f1537": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "82177df57a494de8900c14c2f5185175": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_67da6c4260574869aa24c3cbc1bc1654",
+      "placeholder": "​",
+      "style": "IPY_MODEL_94b9088614464f60a203de39dbcae853",
+      "value": " 8/8 [01:47&lt;00:00, 11.64s/it]"
+     }
+    },
+    "823f1c78f15043e38bbd4dca3932a86a": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_03a3c744d716431488163b4358b80f92",
+      "max": 239,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_a5434ee714f9498d83870544b67c0cb7",
+      "value": 239
+     }
+    },
+    "835bcc28a5564fb9b3d651bc8e32dc46": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "8640ac440fbc4644b9a3af7ba3ae7183": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "86816687746246b4a6105e8010384e25": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_8640ac440fbc4644b9a3af7ba3ae7183",
+      "placeholder": "​",
+      "style": "IPY_MODEL_5cea7996f02040b187ece0bb2d6a8d1f",
+      "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
+     }
+    },
+    "879c8ab5873847a8833bd74123be90a4": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_ef223e8504b64e3592589880326aaf41",
+      "placeholder": "​",
+      "style": "IPY_MODEL_598da69727bd4fb8b1caf465ac736d7a",
+      "value": " 1.67M/1.67M [00:00&lt;00:00, 19.0MB/s]"
+     }
+    },
+    "897b77a56c09479bb11d7f2a30997e55": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "8bc9d8ba866c442b9118d9630009939c": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "8c4d4fc5a30f4e7cb3be53fe2adda33d": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "8f5bd719974e41c3a8dd9a5b0d3d71e6": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "8f726dbfb45d4528afa33e36a6313267": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "9327977822be4b1294f80e876552e305": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_37de928300e34184881039378bd75e7f",
+      "placeholder": "​",
+      "style": "IPY_MODEL_0e936d9dbf9c4fdd86bbfe9730dedc47",
+      "value": " 3.96G/3.96G [00:13&lt;00:00, 273MB/s]"
+     }
+    },
+    "936d04b5fe1b4c63bf0b080e423d051b": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "93a44a11aa4846fa8efc6c1413ef1627": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "94b9088614464f60a203de39dbcae853": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "9503a45960984adc97b58e16c50662e0": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "95caff42f08a4c2aa14c867b8f37f231": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_de7c37ee83e24f0c889e84d07279c2ec",
+       "IPY_MODEL_9d4897eefb5f48259ffb2d23e332f752",
+       "IPY_MODEL_253017b0d0534e54ab44e181f6d7c82d"
+      ],
+      "layout": "IPY_MODEL_27beaf06e41b472abdb544a43c720c5a"
+     }
+    },
+    "977f799afaac4a55b2dc1cffa7d5b63b": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "97e36007e1304e1583fd81bfb13f0edd": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "9858cb74a09748a39e8149baac96702c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "9b42e08b3c9548818488268768a118b1": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_d955dcaa0e944e719f3a06139dd54a03",
+      "placeholder": "​",
+      "style": "IPY_MODEL_d3de2662c7964f1ba96e58da382af720",
+      "value": "merges.txt: 100%"
+     }
+    },
+    "9cd5211b5d8b457aa0002f1d17b80028": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_6932489232ec4ab18a160b1e7fbcdfe1",
+      "placeholder": "​",
+      "style": "IPY_MODEL_4540927d98f54466b434ba4c0edf045d",
+      "value": "model-00007-of-00008.safetensors: 100%"
+     }
+    },
+    "9d4897eefb5f48259ffb2d23e332f752": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_30a81da86f8043eca301e86a8651201a",
+      "max": 2776833,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_e8b7a81040904c1e89e58978223b1737",
+      "value": 2776833
+     }
+    },
+    "9e333ed3b5014069ac1dd969255dd591": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "9ed02dc43412471a9ab47f3620ccf3a5": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "9f1c9a0695384bdaa6f8b847ef89bee8": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ButtonStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ButtonStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "button_color": null,
+      "font_weight": ""
+     }
+    },
+    "9f56a2d9979c4bd8928c644c22c3ecdf": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "a0a11e929edd4189b79723d618522c33": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "a10d0a76010f4e508c65a9b69ebc5156": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "a138859f19b74fc0928dc236ab5359db": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_9b42e08b3c9548818488268768a118b1",
+       "IPY_MODEL_12b56912736849fea2ad8124456fdc5c",
+       "IPY_MODEL_879c8ab5873847a8833bd74123be90a4"
+      ],
+      "layout": "IPY_MODEL_20352e5f58d24bb8b1f3940efd14fe4a"
+     }
+    },
+    "a1959759c5424da9961fb2a308d4dee4": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_3aaecbf540f54a2db9ab0931e3b1fe57",
+      "placeholder": "​",
+      "style": "IPY_MODEL_9e333ed3b5014069ac1dd969255dd591",
+      "value": " 239/239 [00:00&lt;00:00, 30.9kB/s]"
+     }
+    },
+    "a20927bf5f2c41f58c1e31ac858ab36c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_1811cda0644e4190a9469d1774435d82",
+      "placeholder": "​",
+      "style": "IPY_MODEL_35c811d2ae8e43f3b5cecbdd3cfa857f",
+      "value": "tokenizer.json: 100%"
+     }
+    },
+    "a3a945817f684328b34651fe052393ec": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "a44f630e099e43899f20a77084ae60cd": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_ed5ca967ad5342929e578ac6aa4dc4c0",
+      "placeholder": "​",
+      "style": "IPY_MODEL_af401d117d5047629d3a6e2361757b62",
+      "value": "model-00001-of-00008.safetensors: 100%"
+     }
+    },
+    "a4e5789584564049b83df7c6c54a3e08": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "a5434ee714f9498d83870544b67c0cb7": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "a55060adc3564407ac81ad7297d34aaa": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "a6f48410b9964fefba0c3009a77dc838": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "a7cf477e80fc43e0ad82c7997b076dce": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "a80410b919e442c49aea15acc1ce1a72": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_fa1282ccc7544e4f818e2f03ccffe4a5",
+      "placeholder": "​",
+      "style": "IPY_MODEL_bbbf575d2a4b4c6ea8389be79b2a6039",
+      "value": "model.safetensors.index.json: 100%"
+     }
+    },
+    "ab93eabd7cea4b94b4b7a387f101e8a1": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "ac764024cf1c4e08ba7749afd2cd20ac": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "ad1236893754446881e153adc9d5c962": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_62e302ebdad64aada0ffe64ae1c873f3",
+      "placeholder": "​",
+      "style": "IPY_MODEL_bd1b0dfed6d34d16af33a4a58330f5ec",
+      "value": "Saving the dataset (1/1 shards): 100%"
+     }
+    },
+    "ad7599de524549c48bf2d3124ad4b299": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "adacfdcc1b0140efac56918e9ccf064e": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "af401d117d5047629d3a6e2361757b62": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "b191ac001a2e4962bc9a245fcdf26e6b": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "b195f160ca20442fadd8b5aed0ee41af": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "b1bea589efa14258a9982071b87938bf": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "b5b65414154544aa8a71b1a39164aad7": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "b634bb73cfa743d09a5999101b840976": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "b82aa8c57f7c422a9a9c90f333ed2a99": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_c0991cf63ee6458b96e9a75e7a88b61a",
+       "IPY_MODEL_71c8af139cd248b1b51101fd46a93f35",
+       "IPY_MODEL_1d5117195d4b49eb8f1a73b18419f7ce"
+      ],
+      "layout": "IPY_MODEL_3c21e4a511b4441192c03b7f1d0976e9"
+     }
+    },
+    "b8766a88716948cf968f4563531a76d9": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_2b3a2659b12244bd8548320320016dbf",
+      "placeholder": "​",
+      "style": "IPY_MODEL_0cd7efffbb3c4c4b972e63749f61ab97",
+      "value": "Generating train split: "
+     }
+    },
+    "b87c84de30e84b3abf4871461fb9cbd3": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "b8e39e4dddc3497fbc29ae45c66da759": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "bb33aec33a6447078c31bfd728942994": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "bbbf575d2a4b4c6ea8389be79b2a6039": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "bca2c7185b6749fd899c06a2ba4c5e46": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_0f480e3a0b0a45d2a2d2dec3cad923f3",
+      "placeholder": "​",
+      "style": "IPY_MODEL_fcb30372e7404c5d8a1ad4df91e6c7b2",
+      "value": " 1.91G/1.91G [00:05&lt;00:00, 444MB/s]"
+     }
+    },
+    "bd1b0dfed6d34d16af33a4a58330f5ec": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "be724f04b03942b2a033a7e8898bb4fd": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "bed8726b8069434687c75452e21f19e5": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_fa864b41586f4a7aa56aeafd1d84eb75",
+      "max": 9985,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_3225603166b54e7aab766b9964a2f660",
+      "value": 9985
+     }
+    },
+    "bee3501b2a17427784a717e50a85e7fa": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "bfcdbba993b74972a9e3e575f86908ff": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "bff139df987d4a62abec6456cb27f3d4": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "danger",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_c1f9c267ba3f40039cdb5eb3267e8043",
+      "max": 3963750880,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_33b3b1d0295646edaac7b4822761aeb0",
+      "value": 3963750502
+     }
+    },
+    "c0892a1881de4eb4bfabc6a68f87ae99": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_158c8b85dbf34de6a94b4e35e2fc7d5a",
+      "placeholder": "​",
+      "style": "IPY_MODEL_0b4c9753a7cb4354b8e5f187e6e1ad7c",
+      "value": " 3.96G/3.96G [00:15&lt;00:00, 564MB/s]"
+     }
+    },
+    "c0991cf63ee6458b96e9a75e7a88b61a": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_ed28e2e0410d4e0b855467e798e53d66",
+      "placeholder": "​",
+      "style": "IPY_MODEL_d93f134f802b4b69b575bdaf07dbd27c",
+      "value": "tokenizer_config.json: 100%"
+     }
+    },
+    "c12ea43372ac4d57bb9605f1a429b397": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "VBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "VBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "VBoxView",
+      "box_style": "",
+      "children": [],
+      "layout": "IPY_MODEL_131065f118274a1586ac38e39ed84ef0"
+     }
+    },
+    "c1314f241a434c41b45d84dc4d3b30f8": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "c1f9c267ba3f40039cdb5eb3267e8043": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "c33ced495f70464aa4a3a91922090853": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "c3725c7f79fe415fbd1ea336f0cc9cf1": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "danger",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_b191ac001a2e4962bc9a245fcdf26e6b",
+      "max": 3841788544,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_054c8dffadba48c6b895a6cc62448ecc",
+      "value": 3841788178
+     }
+    },
+    "c3be9109d63c485d9c0ef4f9bc0f9218": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "c42acf646f344a88b8c11f81e67f7206": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_8bc9d8ba866c442b9118d9630009939c",
+      "placeholder": "​",
+      "style": "IPY_MODEL_9f56a2d9979c4bd8928c644c22c3ecdf",
+      "value": "model-00003-of-00008.safetensors: 100%"
+     }
+    },
+    "c6164e05a1914ae48083db9ad7f4ef7c": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "c65dc74c7d6f4bab8f7dd28455161dd8": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "c6e00f5224364822bc4239b176686919": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_2a51b36be41745468e4c2d7a21b1c0d2",
+      "max": 36514,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_4fd114abe9f5494ab59858949f5055f1",
+      "value": 36514
+     }
+    },
+    "c73055099c084dca996159e23e162d0b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_e40d1c1ac9494b3bade9858324e7ffdf",
+      "placeholder": "​",
+      "style": "IPY_MODEL_d65b6b060d9845779299491ac5599c31",
+      "value": " 9985/9985 [01:04&lt;00:00, 189.08 examples/s]"
+     }
+    },
+    "c7433acd3c4841e6958ae8f7e87b1808": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "CheckboxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "CheckboxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "CheckboxView",
+      "description": "Add token as git credential?",
+      "description_tooltip": null,
+      "disabled": false,
+      "indent": true,
+      "layout": "IPY_MODEL_62c028fdef904dedb9cdeca2b3bda725",
+      "style": "IPY_MODEL_a7cf477e80fc43e0ad82c7997b076dce",
+      "value": false
+     }
+    },
+    "c84cc07789be48aebb322c23d355289e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_0077aedc3d174560bce924ee89e9c006",
+      "placeholder": "​",
+      "style": "IPY_MODEL_00321cce58884f6f9b3855a21fcd9187",
+      "value": "Add position_id column (Sample Packing) (num_proc=2): 100%"
+     }
+    },
+    "ca65e32eb52f48c09a84b33cb18f22cd": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "cc587493c33c4f118d1b1170f85be24c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "cc94432d08464affa3e58b560bdad194": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "danger",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_b5b65414154544aa8a71b1a39164aad7",
+      "max": 3963750816,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_f0a58fbd0fca4340890041f99fa2f8c8",
+      "value": 3963750438
+     }
+    },
+    "ccfcdc95baf646f8aeb3d516742383f2": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "cdebbc55a1164c018546c2ac6f8c620c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_a44f630e099e43899f20a77084ae60cd",
+       "IPY_MODEL_c3725c7f79fe415fbd1ea336f0cc9cf1",
+       "IPY_MODEL_0e50870ed0c643e0b6c18cc5d7ddae7f"
+      ],
+      "layout": "IPY_MODEL_c33ced495f70464aa4a3a91922090853"
+     }
+    },
+    "d02274afd47b462291c745f261209d42": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "d07c8b97d3314f1c852e44bdd40f61ed": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "d0e9dce55cec4c1ca619a0ccf209d924": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "d1f9b10c130542f094c8fd3d1e23b5e9": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "d262c82138024169b9f3aa034ca756fa": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "d3de2662c7964f1ba96e58da382af720": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "d4183e9715f34d249942b8271cca3bdf": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_63580b6fb30642479fe3000915bf551a",
+      "placeholder": "​",
+      "style": "IPY_MODEL_8f726dbfb45d4528afa33e36a6313267",
+      "value": " 27.3M/27.3M [00:00&lt;00:00, 31.0MB/s]"
+     }
+    },
+    "d43c6df07ddb466587807d6dbe1ff614": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_8c4d4fc5a30f4e7cb3be53fe2adda33d",
+      "placeholder": "​",
+      "style": "IPY_MODEL_e90658f4bcb642baa78426012f863152",
+      "value": "model-00004-of-00008.safetensors: 100%"
+     }
+    },
+    "d65b6b060d9845779299491ac5599c31": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "d6fe74e4255444368f8f90a62157d869": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "d93f134f802b4b69b575bdaf07dbd27c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "d955dcaa0e944e719f3a06139dd54a03": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "da2347ac94764a3fa2743343cf0d3cd2": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "da6e93f3e4984780b930fe7a706983ea": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "daee63fd167e4441a32324b51b00ad2b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_d07c8b97d3314f1c852e44bdd40f61ed",
+      "max": 9985,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_ebb69a2c3d0a4299a484698287b3087c",
+      "value": 9985
+     }
+    },
+    "dc892a596f6942d7973c616c38f0eebb": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_c84cc07789be48aebb322c23d355289e",
+       "IPY_MODEL_bed8726b8069434687c75452e21f19e5",
+       "IPY_MODEL_16a188a0b06d45f980dcf3933509fe0a"
+      ],
+      "layout": "IPY_MODEL_60c1a0d765c14a1d888317e6a507e4ea"
+     }
+    },
+    "dd0e646fad3f4a89ba23b39d162bd8d9": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_d43c6df07ddb466587807d6dbe1ff614",
+       "IPY_MODEL_e0e8b840b8ea4d0d9db09afe99fa287d",
+       "IPY_MODEL_9327977822be4b1294f80e876552e305"
+      ],
+      "layout": "IPY_MODEL_77304d1a46b3468a98483e02ec0ac4a4"
+     }
+    },
+    "de7c37ee83e24f0c889e84d07279c2ec": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_34cf3df51fbc41cabfdbba153c007f0e",
+      "placeholder": "​",
+      "style": "IPY_MODEL_ac764024cf1c4e08ba7749afd2cd20ac",
+      "value": "vocab.json: 100%"
+     }
+    },
+    "dfd2a2649b8341ef913207526708aff1": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "e09f1bcbb9d94c09be53e5e1303642c2": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_e7d8e4fe58384e93a106de546068c65e",
+      "max": 8,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_0aa8ab56b85f4171a79c3bc210594025",
+      "value": 8
+     }
+    },
+    "e0e8b840b8ea4d0d9db09afe99fa287d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "danger",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_f7434f3e03124a1c938a39af79d7fa59",
+      "max": 3963750880,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_c1314f241a434c41b45d84dc4d3b30f8",
+      "value": 3963750502
+     }
+    },
+    "e21e180307e5485cbbe908672fd6639a": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_2e2b0c1599c341a198f632f46a40c90e",
+       "IPY_MODEL_bff139df987d4a62abec6456cb27f3d4",
+       "IPY_MODEL_ebe1cc366d324ad59b264c8b3c431441"
+      ],
+      "layout": "IPY_MODEL_114dece49dba437c8572ef94b23c3b1e"
+     }
+    },
+    "e366ae3fceec4566b9ed303d6c5f90af": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "e3fb3fc6afe04b3c9b7ac61809ce78fa": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_c6164e05a1914ae48083db9ad7f4ef7c",
+      "placeholder": "​",
+      "style": "IPY_MODEL_813621384dc748b0ad06775e22761c0b",
+      "value": " 9985/9985 [00:03&lt;00:00, 3622.89 examples/s]"
+     }
+    },
+    "e400cbf14bcc446a9d33b210cd93550b": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "e40d1c1ac9494b3bade9858324e7ffdf": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "e575d87a7efe4ec7b1efde489839d4a6": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "e5a82df528bb4e408797a3b6c2758f4a": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "e6e969610738449887259063967f82b0": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "e7d8e4fe58384e93a106de546068c65e": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "e87ea87fcff247b5bbcc331ba79a8dc2": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "e8b7a81040904c1e89e58978223b1737": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "e90658f4bcb642baa78426012f863152": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "eb1c9535e6a546098b760528b2ea387c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_18357b321ce44d7b8bd9d1c886f69275",
+       "IPY_MODEL_279937fe03bc4e4eb25b472d7e9df163",
+       "IPY_MODEL_bca2c7185b6749fd899c06a2ba4c5e46"
+      ],
+      "layout": "IPY_MODEL_1f7d30f71bbd4547a9150d21da071055"
+     }
+    },
+    "ebb69a2c3d0a4299a484698287b3087c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "ebc80d1a55fa47f4a5ea2756588569ec": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "ebe1cc366d324ad59b264c8b3c431441": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_fba7aa824b38467ab3061b226114cdec",
+      "placeholder": "​",
+      "style": "IPY_MODEL_f3075dccbd2747b4a7913b66f44f2596",
+      "value": " 3.96G/3.96G [00:13&lt;00:00, 398MB/s]"
+     }
+    },
+    "ec030fc3c346426f9abc3a89892258d3": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_dfd2a2649b8341ef913207526708aff1",
+      "max": 9985,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_4f1977d7e4824ef1a14b65f0f42bba10",
+      "value": 9985
+     }
+    },
+    "ec11d1e5ae7b42c883d9b1f38a65356e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_936d04b5fe1b4c63bf0b080e423d051b",
+      "placeholder": "​",
+      "style": "IPY_MODEL_f1cef8e8dc2646fb9fd09f3b09081074",
+      "value": " 36.5k/36.5k [00:00&lt;00:00, 4.32MB/s]"
+     }
+    },
+    "ed28e2e0410d4e0b855467e798e53d66": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "ed5ca967ad5342929e578ac6aa4dc4c0": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "edc99591b9c747b689b94d0052fec14c": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "ef0a3c7a6f14460fb4da096928ae249e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_07fb3a2c8315494e97b447e672dfae06",
+       "IPY_MODEL_ec030fc3c346426f9abc3a89892258d3",
+       "IPY_MODEL_e3fb3fc6afe04b3c9b7ac61809ce78fa"
+      ],
+      "layout": "IPY_MODEL_c3be9109d63c485d9c0ef4f9bc0f9218"
+     }
+    },
+    "ef223e8504b64e3592589880326aaf41": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "f0a58fbd0fca4340890041f99fa2f8c8": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "f113ebd8c1c34806bea4dd7ed3035173": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "f1cef8e8dc2646fb9fd09f3b09081074": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "f3075dccbd2747b4a7913b66f44f2596": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "f365820a3d3c42b2948abfe32065de14": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_735d4f225b24414294fc1b213c61223c",
+      "placeholder": "​",
+      "style": "IPY_MODEL_5e5e15b0569b474c9620083b3ec6af55",
+      "value": "generation_config.json: 100%"
+     }
+    },
+    "f4667818b9d34a09891cd727a429a610": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_4b27c267393640f28f6eae0875bd2ed9",
+      "placeholder": "​",
+      "style": "IPY_MODEL_9858cb74a09748a39e8149baac96702c",
+      "value": " 3.96G/3.96G [00:11&lt;00:00, 457MB/s]"
+     }
+    },
+    "f4a1795dc7514a718f478245f521f0ba": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "f60a2bdb6b6b4e0e8c3508580e247132": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "danger",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_edc99591b9c747b689b94d0052fec14c",
+      "max": 3963750880,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_35cc989ca3374e7dba0cb166febc4bde",
+      "value": 3963750502
+     }
+    },
+    "f7434f3e03124a1c938a39af79d7fa59": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "f8ef805b776145c3bfa9ba8d90972058": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "fa1282ccc7544e4f818e2f03ccffe4a5": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "fa864b41586f4a7aa56aeafd1d84eb75": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "fba7aa824b38467ab3061b226114cdec": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "fcb30372e7404c5d8a1ad4df91e6c7b2": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "fcbab4d8dced41a18dfccce81e3a45a0": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "fd4f333f7ece4450b04e1a9af1f9d2f6": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_d1f9b10c130542f094c8fd3d1e23b5e9",
+      "placeholder": "​",
+      "style": "IPY_MODEL_e575d87a7efe4ec7b1efde489839d4a6",
+      "value": "model-00006-of-00008.safetensors: 100%"
+     }
+    },
+    "fe18bba7f3fb4c31bf840541f36b3425": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_fd4f333f7ece4450b04e1a9af1f9d2f6",
+       "IPY_MODEL_f60a2bdb6b6b4e0e8c3508580e247132",
+       "IPY_MODEL_c0892a1881de4eb4bfabc6a68f87ae99"
+      ],
+      "layout": "IPY_MODEL_1bec6297c90242a88672d195bc09d429"
+     }
+    },
+    "fe41858c6bd04c58840112b67c19a336": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_e5a82df528bb4e408797a3b6c2758f4a",
+      "placeholder": "​",
+      "style": "IPY_MODEL_f113ebd8c1c34806bea4dd7ed3035173",
+      "value": " 9985/9985 [00:00&lt;00:00, 44264.88 examples/s]"
+     }
+    },
+    "fea1b70fb46745feb5111b3929175b5d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_f365820a3d3c42b2948abfe32065de14",
+       "IPY_MODEL_823f1c78f15043e38bbd4dca3932a86a",
+       "IPY_MODEL_a1959759c5424da9961fb2a308d4dee4"
+      ],
+      "layout": "IPY_MODEL_34c9c0137b504cd799c6bd6de69507c2"
+     }
+    },
+    "ff3a94b146a948b6907f5d80c7157f99": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "ffdbb12a2f2c4d14911685e7683e0ef0": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_ab93eabd7cea4b94b4b7a387f101e8a1",
+      "placeholder": "​",
+      "style": "IPY_MODEL_704f2f5a9b1c49d5a75a0025a5dda11b",
+      "value": " 3.96G/3.96G [00:12&lt;00:00, 656MB/s]"
+     }
+    }
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
diff --git a/pyproject.toml b/pyproject.toml
index 36138c65d..932219d9e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,3 +26,34 @@ include-package-data = true
 
 [tool.setuptools.cmdclass]
 build_py = "setuptools_axolotl_dynamic_dependencies.BuildPyCommand"
+
+[tool.ruff]
+line-length = 88
+target-version = "py310"
+
+[tool.ruff.lint]
+select = ["E", "F", "W", "C90", "B"]
+ignore = [
+    "E203",  # Whitespace before ':'
+    "E501",  # Line too long
+    "C901",  # Too complex
+    "B019",  # Use of functools.cache on methods
+    "E722",  # Bare except
+    "F821",  # Undefined name (for dynamic exec)
+]
+
+[tool.ruff.lint.isort]
+known-third-party = ["wandb", "comet_ml"]
+known-local-folder = ["src", "tests"]
+# Black-compatible isort settings
+force-single-line = false
+combine-as-imports = true
+split-on-trailing-comma = true
+
+[tool.ruff.format]
+# Use black's formatting style exactly
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
+docstring-code-format = false
diff --git a/scripts/chat_datasets.py b/scripts/chat_datasets.py
index 1a85fcef9..0c1e0bd03 100644
--- a/scripts/chat_datasets.py
+++ b/scripts/chat_datasets.py
@@ -27,7 +27,7 @@ def parse_dataset(dataset=None, split="train"):
             break
     if not field_messages:
         raise ValueError(
-            f'No conversation field found in dataset: {", ".join(feature_keys)}'
+            f"No conversation field found in dataset: {', '.join(feature_keys)}"
         )
     ds_cfg["field_messages"] = field_messages
 
@@ -40,7 +40,7 @@ def parse_dataset(dataset=None, split="train"):
             break
     if not message_property_mappings["role"]:
         raise ValueError(
-            f'No role field found in messages: {", ".join(message_fields)}'
+            f"No role field found in messages: {', '.join(message_fields)}"
         )
 
     for key in ["content", "text", "value"]:
@@ -49,7 +49,7 @@ def parse_dataset(dataset=None, split="train"):
             break
     if not message_property_mappings["content"]:
         raise ValueError(
-            f'No content field found in messages: {", ".join(message_fields)}'
+            f"No content field found in messages: {', '.join(message_fields)}"
         )
     ds_cfg["message_property_mappings"] = message_property_mappings
 
diff --git a/scripts/unsloth_install.py b/scripts/unsloth_install.py
index acbd05e90..c0e5bbe70 100644
--- a/scripts/unsloth_install.py
+++ b/scripts/unsloth_install.py
@@ -1,11 +1,10 @@
 # noqa
-# pylint: skip-file
 import sys
 
 try:
     import torch
-except ImportError:
-    raise ImportError("Install torch via `pip install torch`")
+except ImportError as error:
+    raise ImportError("Install torch via `pip install torch`") from error
 from packaging.version import Version as V
 
 use_uv = "--uv" in sys.argv[1:]
diff --git a/src/axolotl/cli/art.py b/src/axolotl/cli/art.py
index 2051784e9..81dbb9831 100644
--- a/src/axolotl/cli/art.py
+++ b/src/axolotl/cli/art.py
@@ -22,7 +22,7 @@ HAS_PRINTED_LOGO = False
 def print_axolotl_text_art():
     """Prints axolotl ASCII art."""
 
-    global HAS_PRINTED_LOGO  # pylint: disable=global-statement
+    global HAS_PRINTED_LOGO
     if HAS_PRINTED_LOGO:
         return
     if is_main_process():
diff --git a/src/axolotl/cli/cloud/modal_.py b/src/axolotl/cli/cloud/modal_.py
index 6d4f999b4..7f953372d 100644
--- a/src/axolotl/cli/cloud/modal_.py
+++ b/src/axolotl/cli/cloud/modal_.py
@@ -41,7 +41,7 @@ def run_cmd(cmd: str, run_folder: str, volumes=None):
     if exit_code := subprocess.call(  # nosec B603
         cmd.split(), cwd=run_folder, env=new_env
     ):
-        exit(exit_code)  # pylint: disable=consider-using-sys-exit
+        exit(exit_code)
 
     # Commit writes to volume.
     if volumes:
@@ -130,7 +130,6 @@ class ModalCloud(Cloud):
         res = []
         if self.config.secrets:
             for key in self.config.get("secrets", []):
-                # pylint: disable=duplicate-code
                 if isinstance(key, str):
                     if val := os.environ.get(key, ""):
                         res.append(modal.Secret.from_dict({key: val}))
@@ -177,8 +176,8 @@ class ModalCloud(Cloud):
             with self.app.run(detach=True):
                 modal_fn.remote(
                     config_yaml,
-                    volumes={k: v[0] for k, v in self.volumes.items()},
                     *args,
+                    volumes={k: v[0] for k, v in self.volumes.items()},
                     **kwargs,
                 )
 
@@ -187,7 +186,7 @@ class ModalCloud(Cloud):
             return int(self.config.timeout)
         return 60 * 60 * 24  # 24 hours
 
-    def get_train_gpu(self):  # pylint: disable=too-many-return-statements
+    def get_train_gpu(self):
         count = self.config.gpu_count or 1
         family = self.config.gpu.lower() or "l40s"
 
@@ -277,7 +276,7 @@ def _train(
     launcher: Literal["accelerate", "torchrun", "python"] = "accelerate",
     launcher_args: list[str] | None = None,
     volumes=None,
-    **kwargs,  # pylint: disable=unused-argument
+    **kwargs,
 ):
     Path("/workspace/mounts").mkdir(parents=True, exist_ok=True)
     with open("/workspace/mounts/config.yaml", "w", encoding="utf-8") as f_out:
diff --git a/src/axolotl/cli/config.py b/src/axolotl/cli/config.py
index 0f1245aed..20e341a0b 100644
--- a/src/axolotl/cli/config.py
+++ b/src/axolotl/cli/config.py
@@ -210,7 +210,7 @@ def load_cfg(
     try:
         device_props = torch.cuda.get_device_properties("cuda")
         gpu_version = "sm_" + str(device_props.major) + str(device_props.minor)
-    except:  # pylint: disable=bare-except # noqa: E722
+    except:
         gpu_version = None
 
     prepare_plugins(cfg)
diff --git a/src/axolotl/cli/evaluate.py b/src/axolotl/cli/evaluate.py
index 9dd3b0083..1a73937a2 100644
--- a/src/axolotl/cli/evaluate.py
+++ b/src/axolotl/cli/evaluate.py
@@ -28,7 +28,7 @@ def do_evaluate(cfg: DictDefault, cli_args: TrainerCliArgs) -> None:
         cfg: Dictionary mapping `axolotl` config keys to values.
         cli_args: CLI arguments.
     """
-    # pylint: disable=duplicate-code
+
     check_accelerate_default_config()
     if int(os.getenv("LOCAL_RANK", "0")) == 0:
         check_user_token()
@@ -49,7 +49,7 @@ def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs) -> None:
         config: Path to `axolotl` config YAML file.
         kwargs: Additional keyword arguments to override config file values.
     """
-    # pylint: disable=duplicate-code
+
     parsed_cfg = load_cfg(config, **kwargs)
     parser = HfArgumentParser(TrainerCliArgs)
     parsed_cli_args, _ = parser.parse_args_into_dataclasses(
diff --git a/src/axolotl/cli/inference.py b/src/axolotl/cli/inference.py
index d03a91bc7..06b64292f 100644
--- a/src/axolotl/cli/inference.py
+++ b/src/axolotl/cli/inference.py
@@ -35,7 +35,7 @@ def get_multi_line_input() -> str:
 
     instruction = ""
     for line in sys.stdin:
-        instruction += line  # pylint: disable=consider-using-join
+        instruction += line
 
     return instruction
 
@@ -167,7 +167,6 @@ def do_inference_gradio(
         if not instruction:
             return
         if prompter_module:
-            # pylint: disable=stop-iteration-return
             prompt: str = next(
                 prompter_module().build_prompt(instruction=instruction.strip("\n"))
             )
@@ -252,7 +251,7 @@ def do_cli(
         config: Path to `axolotl` config YAML file.
         kwargs: Additional keyword arguments to override config file values.
     """
-    # pylint: disable=duplicate-code
+
     parsed_cfg = load_cfg(config, inference=True, rl=None, **kwargs)
     parsed_cfg.sample_packing = False
     parser = transformers.HfArgumentParser(InferenceCliArgs)
diff --git a/src/axolotl/cli/main.py b/src/axolotl/cli/main.py
index e63392802..acfa81389 100644
--- a/src/axolotl/cli/main.py
+++ b/src/axolotl/cli/main.py
@@ -1,7 +1,5 @@
 """Click CLI definitions for various axolotl commands."""
 
-# pylint: disable=redefined-outer-name
-
 import os
 import subprocess  # nosec B404
 from typing import Literal, Optional
diff --git a/src/axolotl/cli/merge_sharded_fsdp_weights.py b/src/axolotl/cli/merge_sharded_fsdp_weights.py
index c99f37fb1..43142d79e 100644
--- a/src/axolotl/cli/merge_sharded_fsdp_weights.py
+++ b/src/axolotl/cli/merge_sharded_fsdp_weights.py
@@ -32,7 +32,7 @@ LOG = get_logger(__name__)
 class BFloat16CastPlanner(_EmptyStateDictLoadPlanner):
     """A custom planner to cast tensors to bfloat16 on the fly during loading."""
 
-    def commit_tensor(self, read_item, tensor):  # pylint: disable=unused-argument
+    def commit_tensor(self, read_item, tensor):
         tensor.copy_(tensor.to(torch.bfloat16))
 
 
@@ -59,10 +59,10 @@ def _distributed_checkpoint_to_merged_weights(
     state_dict: Dict = {}
     save_path_ = Path(save_path)
     save_path_.mkdir(exist_ok=True)
-    dist_cp_format_utils._load_state_dict(  # pylint: disable=protected-access
+    dist_cp_format_utils._load_state_dict(
         state_dict,
         storage_reader=dist_cp.FileSystemReader(checkpoint_dir),
-        planner=BFloat16CastPlanner(),  # pylint: disable=protected-access
+        planner=BFloat16CastPlanner(),
         no_dist=True,
     )
 
@@ -191,7 +191,7 @@ def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs):
         config: Path to `axolotl` config YAML file.
         kwargs: Additional keyword arguments to override config file values.
     """
-    # pylint: disable=duplicate-code
+
     parsed_cfg = load_cfg(config, **kwargs)
 
     fsdp_dir = Path(parsed_cfg.output_dir) / "pytorch_model_fsdp_0"
diff --git a/src/axolotl/cli/preprocess.py b/src/axolotl/cli/preprocess.py
index 4120062d8..ff4551c64 100644
--- a/src/axolotl/cli/preprocess.py
+++ b/src/axolotl/cli/preprocess.py
@@ -73,7 +73,7 @@ def do_preprocess(cfg: DictDefault, cli_args: PreprocessCliArgs) -> None:
                     AutoModelForCausalLM.from_pretrained(
                         model_name, trust_remote_code=True
                     )
-                except Exception as exc:  # pylint: disable=broad-exception-caught,unused-variable  # nosec B110  # noqa F841
+                except Exception:  # nosec B110
                     pass
                 # fmt: on
 
@@ -95,7 +95,7 @@ def do_cli(
         config: Path to `axolotl` config YAML file.
         kwargs: Additional keyword arguments to override config file values.
     """
-    # pylint: disable=duplicate-code
+
     os.environ["AXOLOTL_IS_PREPROCESS"] = "1"
     is_preprocess = kwargs.pop("is_preprocess", True)
     parsed_cfg = load_cfg(config, is_preprocess=is_preprocess, **kwargs)
diff --git a/src/axolotl/cli/train.py b/src/axolotl/cli/train.py
index 7f0b0bdd2..5e766de37 100644
--- a/src/axolotl/cli/train.py
+++ b/src/axolotl/cli/train.py
@@ -59,7 +59,7 @@ def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs):
         config: Path to `axolotl` config YAML file.
         kwargs: Additional keyword arguments to override config file values.
     """
-    # pylint: disable=duplicate-code
+
     parsed_cfg = load_cfg(config, **kwargs)
     parser = HfArgumentParser(TrainerCliArgs)
     parsed_cli_args, _ = parser.parse_args_into_dataclasses(
diff --git a/src/axolotl/cli/utils/args.py b/src/axolotl/cli/utils/args.py
index 3aea1a378..0aec737b8 100644
--- a/src/axolotl/cli/utils/args.py
+++ b/src/axolotl/cli/utils/args.py
@@ -65,7 +65,7 @@ def add_options_from_dataclass(config_class: Type[Any]) -> Callable:
         for field in reversed(dataclasses.fields(config_class)):
             field_type = _strip_optional_type(field.type)
 
-            if field_type == bool:
+            if field_type is bool:
                 field_name = field.name.replace("_", "-")
                 option_name = f"--{field_name}/--no-{field_name}"
                 function = click.option(
@@ -103,7 +103,7 @@ def add_options_from_config(config_class: Type[BaseModel]) -> Callable:
         for name, field in reversed(config_class.model_fields.items()):
             field_type = _strip_optional_type(field.annotation)
 
-            if field_type == bool:
+            if field_type is bool:
                 field_name = name.replace("_", "-")
                 option_name = f"--{field_name}/--no-{field_name}"
                 function = click.option(
diff --git a/src/axolotl/cli/utils/sweeps.py b/src/axolotl/cli/utils/sweeps.py
index bb1368cf6..2a0aa1367 100644
--- a/src/axolotl/cli/utils/sweeps.py
+++ b/src/axolotl/cli/utils/sweeps.py
@@ -49,7 +49,10 @@ def generate_sweep_configs(
                 new_config = {}
                 # new_config = deepcopy(base_config)
                 # Combine regular parameters with paired parameters
-                full_combo = {**dict(zip(param_names, reg_combo)), **paired_set}
+                full_combo = {
+                    **dict(zip(param_names, reg_combo, strict=False)),
+                    **paired_set,
+                }
                 for param_name, param_value in full_combo.items():
                     new_config[param_name] = param_value
                 print(new_config)
@@ -58,7 +61,7 @@ def generate_sweep_configs(
             # If no paired values, just use regular combinations
             # new_config = deepcopy(base_config)
             new_config = {}
-            for param_name, param_value in zip(param_names, reg_combo):
+            for param_name, param_value in zip(param_names, reg_combo, strict=False):
                 new_config[param_name] = param_value
             print(new_config)
             all_combinations.append(new_config)
diff --git a/src/axolotl/cli/utils/train.py b/src/axolotl/cli/utils/train.py
index b133d7271..6ce7d8df3 100644
--- a/src/axolotl/cli/utils/train.py
+++ b/src/axolotl/cli/utils/train.py
@@ -95,7 +95,6 @@ def generate_config_files(config: str, sweep: str | None) -> Iterator[tuple[str,
         permutation_id = f"sweep{idx:04d}"
         permutation["output_dir"] = str(permutation_dir / permutation_id)
 
-        # pylint: disable=consider-using-with
         temp_file = tempfile.NamedTemporaryFile(
             mode="w",
             suffix=".yaml",
diff --git a/src/axolotl/cli/vllm_serve.py b/src/axolotl/cli/vllm_serve.py
index cf687bea2..ea454fc96 100644
--- a/src/axolotl/cli/vllm_serve.py
+++ b/src/axolotl/cli/vllm_serve.py
@@ -39,7 +39,7 @@ def do_vllm_serve(
     model = cfg.base_model
 
     serve_module = cli_args.get("serve_module", "trl.scripts.vllm_serve")
-    vllm_serve_main = getattr(__import__(serve_module, fromlist=["main"]), "main")
+    vllm_serve_main = __import__(serve_module, fromlist=["main"]).main
     tensor_parallel_size = 1
     data_parallel_size = 1
 
@@ -68,7 +68,6 @@ def do_vllm_serve(
         cli_args.get("enable_reasoning") or cfg.vllm.enable_reasoning or False
     )
 
-    # pylint: disable=unexpected-keyword-arg
     vllm_script_args = AxolotlScriptArguments(
         model=model,
         tensor_parallel_size=tensor_parallel_size,
diff --git a/src/axolotl/common/datasets.py b/src/axolotl/common/datasets.py
index 0ff52ebe1..e7433e3c2 100644
--- a/src/axolotl/common/datasets.py
+++ b/src/axolotl/common/datasets.py
@@ -6,6 +6,7 @@ from dataclasses import dataclass
 
 from datasets import Dataset
 
+import axolotl.monkeypatch.data.batch_dataset_fetcher  # noqa: F401
 from axolotl.cli.args import PreprocessCliArgs, TrainerCliArgs
 from axolotl.loaders import load_processor, load_tokenizer
 from axolotl.utils.data import prepare_datasets, prepare_preference_datasets
diff --git a/src/axolotl/convert.py b/src/axolotl/convert.py
index d1bdb34db..9e09b37dc 100644
--- a/src/axolotl/convert.py
+++ b/src/axolotl/convert.py
@@ -67,9 +67,7 @@ class JsonToJsonlConverter:
         self.json_parser = json_parser
         self.jsonl_serializer = jsonl_serializer
 
-    def convert(
-        self, input_file_path, output_file_path
-    ):  # pylint: disable=unused-argument
+    def convert(self, input_file_path, output_file_path):
         content = self.file_reader.read(input_file_path)
         data = self.json_parser.parse(content)
         # data = [r for r in data if r["conversations"]]  # vicuna cleaned has rows with empty conversations
diff --git a/src/axolotl/core/attention/flex_block_mask.py b/src/axolotl/core/attention/flex_block_mask.py
index fb9820f35..37149983c 100644
--- a/src/axolotl/core/attention/flex_block_mask.py
+++ b/src/axolotl/core/attention/flex_block_mask.py
@@ -84,9 +84,7 @@ def create_causal_mask(
     batch_size, dtype = input_embeds.shape[0], input_embeds.dtype
     if attention_mask is not None:
 
-        def causal_doc_mask_mod(
-            batch_idx, head_idx, q_idx, kv_idx
-        ):  # pylint: disable=unused-argument
+        def causal_doc_mask_mod(batch_idx, head_idx, q_idx, kv_idx):
             """
             Defines the logic of a block causal mask by combining both a standard causal mask
             and a block diagonal document mask.
@@ -103,9 +101,7 @@ def create_causal_mask(
         mask_factory_function = causal_doc_mask_mod
     else:
         mask_factory_function = causal_mask_function
-    mask_interface = ALL_MASK_ATTENTION_FUNCTIONS[
-        config._attn_implementation  # pylint: disable=protected-access
-    ]
+    mask_interface = ALL_MASK_ATTENTION_FUNCTIONS[config._attn_implementation]
 
     # Do not allow skip if we are compiling (this is to match BC)
     allow_is_causal_skip = (
diff --git a/src/axolotl/core/builders/base.py b/src/axolotl/core/builders/base.py
index e1f649715..44699e6ac 100644
--- a/src/axolotl/core/builders/base.py
+++ b/src/axolotl/core/builders/base.py
@@ -44,7 +44,7 @@ from axolotl.utils.schemas.enums import CustomSupportedOptimizers
 LOG = logging.getLogger(__name__)
 
 with suppress(ImportError):
-    import torch._dynamo  # pylint: disable=ungrouped-imports
+    import torch._dynamo
 
 
 class TrainerBuilderBase(abc.ABC):
@@ -260,14 +260,14 @@ class TrainerBuilderBase(abc.ABC):
                 adam_kwargs["eps"] = training_args_kwargs.get("adam_epsilon")
 
             if self.cfg.optimizer == "muon":
-                from axolotl.contribs.mit.muon import (  # pylint: disable=no-name-in-module
+                from axolotl.contribs.mit.muon import (
                     MuonOptimizerFactory,
                 )
 
                 optimizer_cls = MuonOptimizerFactory
                 optimizer_kwargs.update(adam_kwargs)
             elif self.cfg.optimizer == "dion":
-                from axolotl.contribs.mit.dion import (  # pylint: disable=no-name-in-module
+                from axolotl.contribs.mit.dion import (
                     DionOptimizerFactory,
                 )
 
@@ -414,12 +414,8 @@ class TrainerBuilderBase(abc.ABC):
 
     def _configure_torch_compile(self, training_args_kwargs: dict):
         if self.cfg.torch_compile and getattr(torch, "_dynamo", None):
-            torch._dynamo.config.suppress_errors = (  # pylint: disable=protected-access
-                True
-            )
-            torch._dynamo.config.accumulated_cache_size_limit = (  # pylint: disable=protected-access
-                256
-            )
+            torch._dynamo.config.suppress_errors = True
+            torch._dynamo.config.accumulated_cache_size_limit = 256
             training_args_kwargs["torch_compile"] = self.cfg.torch_compile
             if self.cfg.torch_compile_backend:
                 training_args_kwargs["torch_compile_backend"] = (
diff --git a/src/axolotl/core/builders/causal.py b/src/axolotl/core/builders/causal.py
index e5bc21762..94b0db851 100644
--- a/src/axolotl/core/builders/causal.py
+++ b/src/axolotl/core/builders/causal.py
@@ -344,16 +344,14 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
             training_args_cls = AxolotlPRMConfig
         else:
             training_args_cls = AxolotlTrainingArguments
-        training_args = training_args_cls(  # pylint: disable=unexpected-keyword-arg
+        training_args = training_args_cls(
             **training_arguments_kwargs,
         )
         training_args = self.hook_post_create_training_args(training_args)
 
         # unset run_name so wandb sets up experiment names
         if self.cfg.use_wandb and training_args.run_name == training_args.output_dir:
-            training_args.run_name = (  # pylint: disable=attribute-defined-outside-init
-                None
-            )
+            training_args.run_name = None
 
         data_collator_kwargs = {
             "padding": True,  # True/"longest" is the default
diff --git a/src/axolotl/core/builders/rl.py b/src/axolotl/core/builders/rl.py
index bc7816807..a6e8355f4 100644
--- a/src/axolotl/core/builders/rl.py
+++ b/src/axolotl/core/builders/rl.py
@@ -168,16 +168,14 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
             if plugin_training_args:
                 training_args_kwargs.update(plugin_training_args)
 
-        training_args = training_args_cls(  # pylint: disable=unexpected-keyword-arg
+        training_args = training_args_cls(
             logging_first_step=True,
             **training_args_kwargs,
         )
 
         # unset run_name so wandb sets up experiment names
         if self.cfg.use_wandb and training_args.run_name == training_args.output_dir:
-            training_args.run_name = (  # pylint: disable=attribute-defined-outside-init
-                None
-            )
+            training_args.run_name = None
 
         return training_args, trainer_kwargs
 
diff --git a/src/axolotl/core/chat/format/chatml.py b/src/axolotl/core/chat/format/chatml.py
index 04c398fe8..deb8a9997 100644
--- a/src/axolotl/core/chat/format/chatml.py
+++ b/src/axolotl/core/chat/format/chatml.py
@@ -10,7 +10,7 @@ from .shared import wrap_tools
 
 def format_message(
     message: Messages,
-    message_index: Optional[int] = None,  # pylint: disable=unused-argument
+    message_index: Optional[int] = None,
 ) -> Messages:
     if message.is_chat_formatted:
         return message
diff --git a/src/axolotl/core/chat/messages.py b/src/axolotl/core/chat/messages.py
index 923b177c1..912a12ca1 100644
--- a/src/axolotl/core/chat/messages.py
+++ b/src/axolotl/core/chat/messages.py
@@ -15,11 +15,11 @@ class MessageRoles(str, Enum):
     Message roles for the system, user, assistant, and tools
     """
 
-    system = "system"  # pylint: disable=invalid-name
-    user = "user"  # pylint: disable=invalid-name
-    assistant = "assistant"  # pylint: disable=invalid-name
-    tool = "tool"  # pylint: disable=invalid-name
-    ipython = (  # pylint: disable=invalid-name
+    system = "system"
+    user = "user"
+    assistant = "assistant"
+    tool = "tool"
+    ipython = (
         # for responses from builtin tools
         "ipython"
     )
@@ -30,12 +30,12 @@ class MessageContentTypes(str, Enum):
     Message content types for text, image, audio, tool calls, and tool responses
     """
 
-    special_token = "special_token"  # pylint: disable=invalid-name  # nosec B105
-    text = "text"  # pylint: disable=invalid-name
-    image = "image"  # pylint: disable=invalid-name
-    audio = "audio"  # pylint: disable=invalid-name
-    tool_call = "tool_call"  # pylint: disable=invalid-name  # to differentiate regular responses from tool calls from the assistant
-    tool_response = "tool_response"  # pylint: disable=invalid-name
+    special_token = "special_token"  # nosec B105
+    text = "text"
+    image = "image"
+    audio = "audio"
+    tool_call = "tool_call"
+    tool_response = "tool_response"
 
 
 class SpecialToken(str, Enum):
@@ -43,8 +43,8 @@ class SpecialToken(str, Enum):
     Special tokens for beginning of string and end of string
     """
 
-    bos_token = "bos_token"  # pylint: disable=invalid-name  # nosec B105
-    eos_token = "eos_token"  # pylint: disable=invalid-name  # nosec B105
+    bos_token = "bos_token"  # nosec B105
+    eos_token = "eos_token"  # nosec B105
 
 
 class ToolCallFunction(BaseModel):
@@ -73,7 +73,7 @@ class ToolCallContents(BaseModel):
 
     name: str
     arguments: dict[str, Union[str, int]]
-    id: Optional[str] = None  # pylint: disable=invalid-name
+    id: Optional[str] = None
 
     def __str__(self) -> str:
         data = {"name": self.name, "arguments": self.arguments}
@@ -89,7 +89,7 @@ class ToolResponseContents(BaseModel):
 
     name: str
     content: Union[str, dict[str, Union[str, int, float]]]
-    id: Optional[str] = None  # pylint: disable=invalid-name
+    id: Optional[str] = None
 
     def __str__(self) -> str:
         data = {"name": self.name, "content": self.content}
diff --git a/src/axolotl/core/datasets/transforms/chat_builder.py b/src/axolotl/core/datasets/transforms/chat_builder.py
index 692fe3ebb..8f2013027 100644
--- a/src/axolotl/core/datasets/transforms/chat_builder.py
+++ b/src/axolotl/core/datasets/transforms/chat_builder.py
@@ -1,23 +1,17 @@
 """
-This module contains a function that builds a transform that takes a row from the dataset and converts it to a Chat.
+This module contains a function that builds a transform that takes a row from the
+dataset and converts it to a Chat.
 """
 
-from typing import Any, Mapping, Union
+from typing import Any, Mapping
 
 
-def chat_message_transform_builder(  # pylint: disable=dangerous-default-value
+def chat_message_transform_builder(
     train_on_inputs=False,
     conversations_field: str = "conversations",
-    message_field_role: Union[str, list[str]] = ["role", "from"],  # commonly "role"
-    message_field_content: Union[str, list[str]] = [
-        "value",
-        "text",
-        "content",
-    ],  # commonly "content"
-    message_field_training: Union[str, list[str]] = [
-        "train",
-        "weight",
-    ],  # commonly "weight"
+    message_field_role: str | list[str] | None = None,  # commonly "role"
+    message_field_content: str | list[str] | None = None,  # commonly "content"
+    message_field_training: str | list[str] | None = None,  # commonly "weight"
 ):
     """Builds a transform that takes a row from the dataset and converts it to a Chat
 
@@ -39,6 +33,12 @@ def chat_message_transform_builder(  # pylint: disable=dangerous-default-value
             A function that takes a list of conversations and returns a list of messages.
     """
 
+    if message_field_training is None:
+        message_field_training = ["train", "weight"]
+    if message_field_content is None:
+        message_field_content = ["value", "text", "content"]
+    if message_field_role is None:
+        message_field_role = ["role", "from"]
     message_field_role = (
         [message_field_role]
         if isinstance(message_field_role, str)
diff --git a/src/axolotl/core/trainers/__init__.py b/src/axolotl/core/trainers/__init__.py
index a9cda4efc..22d8b64f6 100644
--- a/src/axolotl/core/trainers/__init__.py
+++ b/src/axolotl/core/trainers/__init__.py
@@ -1,6 +1,5 @@
 """Init for axolotl.core.trainers"""
 
-# pylint: disable=unused-import
 # flake8: noqa
 
 from .base import AxolotlTrainer
diff --git a/src/axolotl/core/trainers/base.py b/src/axolotl/core/trainers/base.py
index 0f9f6e4c4..4b8861790 100644
--- a/src/axolotl/core/trainers/base.py
+++ b/src/axolotl/core/trainers/base.py
@@ -1,7 +1,5 @@
 """Module for customized trainers"""
 
-# pylint: disable=too-many-lines
-
 from __future__ import annotations
 
 import os
@@ -285,9 +283,9 @@ class AxolotlTrainer(
         # fmt: off
         if dataloader_key is not None and self.args.dataloader_persistent_workers:
             if hasattr(self, "_eval_dataloaders"):
-                self._eval_dataloaders[dataloader_key] = dataloader  # type: ignore  # pylint: disable=access-member-before-definition
+                self._eval_dataloaders[dataloader_key] = dataloader  # type: ignore
             else:
-                self._eval_dataloaders = {dataloader_key: dataloader}  # pylint: disable=attribute-defined-outside-init
+                self._eval_dataloaders = {dataloader_key: dataloader}
         # fmt: on
 
         return self.accelerator.prepare(dataloader)
@@ -443,7 +441,7 @@ class AxolotlTrainer(
         model,
         inputs,
         return_outputs=False,
-        num_items_in_batch=None,  # pylint: disable=unused-argument
+        num_items_in_batch=None,
     ):
         concat_inputs = AxolotlTrainer.orpo_concatenate_inputs(
             inputs,
@@ -524,9 +522,7 @@ class AxolotlTrainer(
         accelerator_config = self.args.accelerator_config.to_dict()
         use_configured_state = accelerator_config.get("use_configured_state", False)
         if not use_configured_state:
-            AcceleratorState._reset_state(  # pylint: disable=protected-access
-                reset_partial_state=True
-            )
+            AcceleratorState._reset_state(reset_partial_state=True)
 
         super().create_accelerator_and_postprocess()
 
@@ -540,7 +536,6 @@ class AxolotlTrainer(
             ):
                 self.accelerator.state.fsdp_plugin.limit_all_gathers = True
 
-    # pylint: disable=unused-argument
     def additional_accelerator_args(
         self, fp8: bool = False, enable_fsdp_float8_all_gather: bool = False, **kwargs
     ) -> dict[str, Any]:
diff --git a/src/axolotl/core/trainers/dpo/trainer.py b/src/axolotl/core/trainers/dpo/trainer.py
index b3067bb46..b04505d89 100644
--- a/src/axolotl/core/trainers/dpo/trainer.py
+++ b/src/axolotl/core/trainers/dpo/trainer.py
@@ -101,11 +101,11 @@ class AxolotlDPOTrainer(
     ) -> dict[str, torch.Tensor]:
         if self.args.dpo_norm_loss:
             # fmt: off
-            loss_type: str = self.loss_type  # type: ignore[has-type]  # pylint: disable=access-member-before-definition
+            loss_type: str = self.loss_type  # type: ignore[has-type]
             # fmt: on
             # concatenated_forward handles avg token logprob for ipo case already
-            self.loss_type = "ipo"  # pylint: disable=attribute-defined-outside-init
+            self.loss_type = "ipo"
             res = super().concatenated_forward(model, batch, is_ref_model=is_ref_model)
-            self.loss_type = loss_type  # pylint: disable=attribute-defined-outside-init
+            self.loss_type = loss_type
             return res
         return super().concatenated_forward(model, batch, is_ref_model=is_ref_model)
diff --git a/src/axolotl/core/trainers/grpo/__init__.py b/src/axolotl/core/trainers/grpo/__init__.py
index 4106a2a7d..7eda7a0ba 100644
--- a/src/axolotl/core/trainers/grpo/__init__.py
+++ b/src/axolotl/core/trainers/grpo/__init__.py
@@ -128,9 +128,7 @@ class GRPOStrategy:
         return grpo_args_kwargs
 
     @classmethod
-    def set_trainer_args(
-        cls, cfg: DictDefault
-    ) -> list[Any]:  # pylint: disable=unused-argument
+    def set_trainer_args(cls, cfg: DictDefault) -> list[Any]:
         trainer_args = []
         if cfg.trl and cfg.trl.reward_funcs:
             reward_funcs = []
@@ -151,7 +149,7 @@ class GRPOStrategy:
         return trainer_kwargs
 
     @classmethod
-    def get_collator(cls, *args, **kwargs):  # pylint: disable=unused-argument
+    def get_collator(cls, *args, **kwargs):
         # No data collation is needed in GRPO, handled by trl's trainer __init__
         return None
 
diff --git a/src/axolotl/core/trainers/grpo/trainer.py b/src/axolotl/core/trainers/grpo/trainer.py
index 49caa6406..f9f5a695b 100644
--- a/src/axolotl/core/trainers/grpo/trainer.py
+++ b/src/axolotl/core/trainers/grpo/trainer.py
@@ -1,7 +1,5 @@
 """Axolotl GRPO trainers (with and without sequence parallelism handling)"""
 
-# pylint: disable=too-many-lines,duplicate-code,protected-access,no-member
-
 import warnings
 from functools import partial
 from typing import Any
@@ -52,7 +50,6 @@ from axolotl.core.trainers.mixins.optimizer import OptimizerInitMixin, Optimizer
 from axolotl.monkeypatch.ring_attn import get_ring_attn_group
 
 if is_peft_available():
-    # pylint: disable=unused-import
     from peft import PeftConfig
 
 
@@ -253,7 +250,7 @@ class AxolotlGRPOSequenceParallelTrainer(AxolotlGRPOTrainer):
     def get_train_dataloader(self) -> DataLoader:
         """Get dataloader for training"""
         train_dataset = self.train_dataset
-        # pylint: disable=access-member-before-definition
+
         data_collator = self.data_collator  # type: ignore
 
         # Handle dataset preprocessing
@@ -266,7 +263,7 @@ class AxolotlGRPOSequenceParallelTrainer(AxolotlGRPOTrainer):
                     train_dataset, description="training"
                 )
         else:
-            self.data_collator = self._get_collator_with_removed_columns(  # pylint: disable=attribute-defined-outside-init
+            self.data_collator = self._get_collator_with_removed_columns(
                 data_collator,
                 description="training",
             )
@@ -308,10 +305,10 @@ class AxolotlGRPOSequenceParallelTrainer(AxolotlGRPOTrainer):
         # Generate completions using either vLLM or regular generation
         if self.args.use_vllm:
             # First, have main process load weights if needed
-            # pylint: disable=access-member-before-definition
+
             if self.state.global_step != self._last_loaded_step:  # type: ignore[has-type]
                 self._move_model_to_vllm()
-                # pylint: disable=attribute-defined-outside-init
+
                 self._last_loaded_step = self.state.global_step
 
             # Generate completions using vLLM: gather all prompts and use them in a single call in the main process
@@ -333,8 +330,9 @@ class AxolotlGRPOSequenceParallelTrainer(AxolotlGRPOTrainer):
                         # Extract prompts from this SP group, accounting for num_generations duplicates
                         # We only need prompts from one rank in each SP group
                         group_prompts = all_prompts_text[
-                            group_leader_rank
-                            * len(prompts_text) : (group_leader_rank + 1)
+                            group_leader_rank * len(prompts_text) : (
+                                group_leader_rank + 1
+                            )
                             * len(prompts_text) : self.num_generations
                         ]
 
@@ -485,7 +483,7 @@ class AxolotlGRPOSequenceParallelTrainer(AxolotlGRPOTrainer):
         )
         if is_conversational(inputs[0]):
             completions = []
-            for prompt, completion in zip(prompts, completions_text):
+            for prompt, completion in zip(prompts, completions_text, strict=False):
                 bootstrap = (
                     prompt.pop()["content"] if prompt[-1]["role"] == "assistant" else ""
                 )
@@ -503,6 +501,7 @@ class AxolotlGRPOSequenceParallelTrainer(AxolotlGRPOTrainer):
                 self.reward_funcs,
                 self.reward_processing_classes,
                 self.reward_func_names,
+                strict=False,
             )
         ):
             with profiling_context(self, reward_func_name):
@@ -511,14 +510,17 @@ class AxolotlGRPOSequenceParallelTrainer(AxolotlGRPOTrainer):
                 ):  # Module instead of PretrainedModel for compat with compiled models
                     if is_conversational(inputs[0]):
                         messages = [
-                            {"messages": p + c} for p, c in zip(prompts, completions)
+                            {"messages": p + c}
+                            for p, c in zip(prompts, completions, strict=False)
                         ]
                         texts = [
                             apply_chat_template(x, reward_processing_class)["text"]
                             for x in messages
                         ]
                     else:
-                        texts = [p + c for p, c in zip(prompts, completions)]
+                        texts = [
+                            p + c for p, c in zip(prompts, completions, strict=False)
+                        ]
                     reward_inputs = reward_processing_class(
                         text=texts,
                         return_tensors="pt",
@@ -564,7 +566,8 @@ class AxolotlGRPOSequenceParallelTrainer(AxolotlGRPOTrainer):
             row_reward_kwargs["completion"] = completions[nan_row_idx]
             warnings.warn(
                 f"All reward functions returned None for the following kwargs: {row_reward_kwargs}. "
-                "Please ensure that at least one reward function returns a valid reward."
+                "Please ensure that at least one reward function returns a valid reward.",
+                stacklevel=2,
             )
 
         # Gather the reward per function: this part is crucial, because the rewards are normalized per group and the
diff --git a/src/axolotl/core/trainers/mamba.py b/src/axolotl/core/trainers/mamba.py
index b475b26d9..dedda1b29 100644
--- a/src/axolotl/core/trainers/mamba.py
+++ b/src/axolotl/core/trainers/mamba.py
@@ -5,7 +5,6 @@ import torch
 from axolotl.core.trainers.base import AxolotlTrainer
 
 
-# pylint: disable=too-many-ancestors
 class AxolotlMambaTrainer(AxolotlTrainer):
     """Mamba specific trainer to handle loss calculation"""
 
@@ -15,8 +14,8 @@ class AxolotlMambaTrainer(AxolotlTrainer):
         self,
         model,
         inputs,
-        return_outputs=False,  # pylint: disable=unused-argument
-        num_items_in_batch=None,  # pylint: disable=unused-argument
+        return_outputs=False,
+        num_items_in_batch=None,
     ):
         input_ids = inputs.pop("input_ids")
         lm_logits = model(input_ids).logits
diff --git a/src/axolotl/core/trainers/mixins/__init__.py b/src/axolotl/core/trainers/mixins/__init__.py
index b54577765..5fced1692 100644
--- a/src/axolotl/core/trainers/mixins/__init__.py
+++ b/src/axolotl/core/trainers/mixins/__init__.py
@@ -1,6 +1,5 @@
 """Init for axolotl.core.trainers.mixins"""
 
-# pylint: disable=unused-import
 # flake8: noqa
 
 from .activation_checkpointing import ActivationOffloadingMixin
diff --git a/src/axolotl/core/trainers/mixins/activation_checkpointing.py b/src/axolotl/core/trainers/mixins/activation_checkpointing.py
index 1bfdb49f7..b61c45fee 100644
--- a/src/axolotl/core/trainers/mixins/activation_checkpointing.py
+++ b/src/axolotl/core/trainers/mixins/activation_checkpointing.py
@@ -92,7 +92,7 @@ def get_lora_act_offloading_ctx_manager(
         `contextlib.ContextDecorator`:
             Activation offloading context manager for the model.
     """
-    # pylint: disable=unnecessary-dunder-call
+
     activations_handling_ctx = OffloadActivations(
         use_pin_memory=use_pin_memory,
         use_streams=use_streams,
diff --git a/src/axolotl/core/trainers/mixins/distributed_parallel.py b/src/axolotl/core/trainers/mixins/distributed_parallel.py
index d163e4eb5..77aee5236 100644
--- a/src/axolotl/core/trainers/mixins/distributed_parallel.py
+++ b/src/axolotl/core/trainers/mixins/distributed_parallel.py
@@ -26,7 +26,6 @@ class DistributedParallelMixin(Trainer):
             self.accelerator.distributed_type == "FSDP"
             and self.accelerator.state.fsdp_plugin is None
         ):
-            # pylint: disable=protected-access
             # handle Context Parallelism without FSDP
             self.accelerator.state.distributed_type = "MULTI_GPU"
             self.accelerator.state._shared_state["distributed_type"] = "MULTI_GPU"
diff --git a/src/axolotl/core/trainers/mixins/optimizer.py b/src/axolotl/core/trainers/mixins/optimizer.py
index a9a9a3992..850442c60 100644
--- a/src/axolotl/core/trainers/mixins/optimizer.py
+++ b/src/axolotl/core/trainers/mixins/optimizer.py
@@ -70,11 +70,11 @@ class OptimizerMixin(Trainer):
                 }
             )
         if params["embeddings"]:
-            lr = optimizer_kwargs["lr"]  # pylint: disable=invalid-name
+            lr = optimizer_kwargs["lr"]
             if self.args.embedding_lr_scale:
-                lr *= self.args.embedding_lr_scale  # pylint: disable=invalid-name
+                lr *= self.args.embedding_lr_scale
             elif self.args.embedding_lr:
-                lr = self.args.embedding_lr  # pylint: disable=invalid-name
+                lr = self.args.embedding_lr
             optimizer_grouped_parameters.append(
                 {
                     "params": list(params["embeddings"].values()),
@@ -143,7 +143,7 @@ class OptimizerMixin(Trainer):
                 loraplus_lr_embedding = getattr(
                     self.args, "loraplus_lr_embedding", 1e-6
                 )
-                self.optimizer = create_loraplus_optimizer(  # pylint: disable=attribute-defined-outside-init
+                self.optimizer = create_loraplus_optimizer(
                     opt_model,
                     optimizer_cls,
                     loraplus_lr_ratio=loraplus_lr_ratio,
@@ -185,17 +185,15 @@ class OptimizerMixin(Trainer):
                                 p.data_ptr(): p.numel() for p in module.parameters()
                             }.values()
                         )
-                        LOG.info(f"skipped {module}: {skipped/2**20}M params")
+                        LOG.info(f"skipped {module}: {skipped / 2**20}M params")
                         manager.register_module_override(
                             module, "weight", {"optim_bits": 32}
                         )
                         LOG.debug(f"bitsandbytes: will optimize {module} in fp32")
-                LOG.info(f"skipped: {skipped/2**20}M params")
+                LOG.info(f"skipped: {skipped / 2**20}M params")
 
         if is_sagemaker_mp_enabled():
-            self.optimizer = smp.DistributedOptimizer(  # pylint: disable=attribute-defined-outside-init
-                self.optimizer
-            )
+            self.optimizer = smp.DistributedOptimizer(self.optimizer)
 
         return self.optimizer
 
diff --git a/src/axolotl/core/trainers/mixins/scheduler.py b/src/axolotl/core/trainers/mixins/scheduler.py
index 399bf5947..fc2b0e59d 100644
--- a/src/axolotl/core/trainers/mixins/scheduler.py
+++ b/src/axolotl/core/trainers/mixins/scheduler.py
@@ -46,7 +46,7 @@ class SchedulerMixin(Trainer):
         )
 
         # fmt: off
-        if self.lr_scheduler is None:  # type: ignore  # pylint: disable=access-member-before-definition
+        if self.lr_scheduler is None:  # type: ignore
             # fmt: on
             plugin_manager = PluginManager.get_instance()
             lr_scheduler: LRScheduler | None = plugin_manager.create_lr_scheduler(
@@ -90,7 +90,7 @@ class SchedulerMixin(Trainer):
                     LOG.warning(
                         "Both cosine quadratic warmup and min lr detected. Using quadratic warmup.")
 
-                self.lr_scheduler = get_cosine_schedule_with_quadratic_warmup(  # pylint: disable=attribute-defined-outside-init
+                self.lr_scheduler = get_cosine_schedule_with_quadratic_warmup(
                     optimizer,
                     num_warmup_steps=self.args.get_warmup_steps(num_training_steps),
                     num_training_steps=num_training_steps,
@@ -98,7 +98,7 @@ class SchedulerMixin(Trainer):
             elif self.args.cosine_min_lr_ratio and self.args.cosine_constant_lr_ratio and use_cosine_min_lr:
                 assert 0 <= self.args.cosine_min_lr_ratio <= 1.0, "cosine_min_lr_ratio must be between 0.0 and 1.0"
                 assert 0 <= self.args.cosine_constant_lr_ratio <= 1.0, "cosine_constant_lr_ratio must be between 0.0 and 1.0"
-                self.lr_scheduler = get_cosine_schedule_with_warmup_decay_constant(  # pylint: disable=attribute-defined-outside-init
+                self.lr_scheduler = get_cosine_schedule_with_warmup_decay_constant(
                     optimizer,
                     num_warmup_steps=self.args.get_warmup_steps(num_training_steps),
                     num_training_steps=num_training_steps,
@@ -107,7 +107,7 @@ class SchedulerMixin(Trainer):
                 )
             elif self.args.cosine_min_lr_ratio and use_cosine_min_lr:
                 assert 0 <= self.args.cosine_min_lr_ratio <= 1.0, "cosine_min_lr_ratio must be between 0.0 and 1.0"
-                self.lr_scheduler = get_cosine_schedule_with_min_lr(  # pylint: disable=attribute-defined-outside-init
+                self.lr_scheduler = get_cosine_schedule_with_min_lr(
                     optimizer,
                     num_warmup_steps=self.args.get_warmup_steps(num_training_steps),
                     num_training_steps=num_training_steps,
@@ -133,7 +133,7 @@ class SchedulerMixin(Trainer):
             )
             if not self.lr_scheduler:
                 super().create_scheduler(num_training_steps, optimizer)
-            self.lr_scheduler = JaggedLRRestartScheduler(  # pylint: disable=attribute-defined-outside-init
+            self.lr_scheduler = JaggedLRRestartScheduler(
                 optimizer,
                 self.lr_scheduler,
                 self.args.jagged_restart_steps,
diff --git a/src/axolotl/core/training_args_base.py b/src/axolotl/core/training_args_base.py
index fd0859ae9..a9cc7d224 100644
--- a/src/axolotl/core/training_args_base.py
+++ b/src/axolotl/core/training_args_base.py
@@ -14,7 +14,6 @@ class AxolotlTrainingMixins:
     Mixin class for the Axolotl training args.
     """
 
-    # pylint: disable=duplicate-code
     model_type: Optional[str] = field(
         default=None, metadata={"help": "HF model configuration model_type."}
     )
diff --git a/src/axolotl/datasets.py b/src/axolotl/datasets.py
index c9d006ac8..b8f9484bc 100644
--- a/src/axolotl/datasets.py
+++ b/src/axolotl/datasets.py
@@ -26,7 +26,7 @@ class TokenizedPromptDataset(Dataset):
         keep_in_memory: Whether to keep the tokenized dataset in memory.
     """
 
-    def __init__(  # pylint: disable=super-init-not-called
+    def __init__(
         self,
         prompt_tokenizer: PromptTokenizingStrategy,
         dataset: Dataset,
@@ -99,7 +99,7 @@ class ConstantLengthDataset(IterableDataset):
         seq_length: Length of token sequences to return.
     """
 
-    def __init__(  # pylint: disable=super-init-not-called
+    def __init__(
         self,
         tokenizer,
         datasets,
diff --git a/src/axolotl/evaluate.py b/src/axolotl/evaluate.py
index 2b5869939..e4496bee6 100644
--- a/src/axolotl/evaluate.py
+++ b/src/axolotl/evaluate.py
@@ -79,7 +79,7 @@ def evaluate(*, cfg: DictDefault, dataset_meta: TrainDatasetMeta) -> Dict[str, f
     model, tokenizer, _, processor = setup_model_and_tokenizer(cfg)
 
     # Get datasets
-    # pylint: disable=duplicate-code
+
     train_dataset = dataset_meta.train_dataset
     eval_dataset = dataset_meta.eval_dataset
     total_num_steps = dataset_meta.total_num_steps
diff --git a/src/axolotl/integrations/base.py b/src/axolotl/integrations/base.py
index 94ee8d4b1..8edee18a3 100644
--- a/src/axolotl/integrations/base.py
+++ b/src/axolotl/integrations/base.py
@@ -76,7 +76,7 @@ class BasePlugin:
     def __init__(self):
         """Initializes the BasePlugin."""
 
-    def register(self, cfg: dict):  # pylint: disable=unused-argument
+    def register(self, cfg: dict):
         """Registers the plugin with the given configuration as an unparsed dict.
 
         Args:
@@ -104,14 +104,13 @@ class BasePlugin:
             dataset_meta: The metadata for the training dataset.
         """
 
-    def pre_model_load(self, cfg: DictDefault):  # pylint: disable=unused-argument
+    def pre_model_load(self, cfg: DictDefault):
         """Performs actions before the model is loaded.
 
         Args:
             cfg: The configuration for the plugin.
         """
 
-    # pylint: disable=unused-argument
     def post_model_build(self, cfg: DictDefault, model: PreTrainedModel):
         """Performs actions after the model is built/loaded, but before any adapters are applied.
 
@@ -119,7 +118,6 @@ class BasePlugin:
             cfg: The configuration for the plugin.
         """
 
-    # pylint: disable=unused-argument
     def pre_lora_load(self, cfg: DictDefault, model: PreTrainedModel):
         """Performs actions before LoRA weights are loaded.
 
@@ -128,7 +126,6 @@ class BasePlugin:
             model: The loaded model.
         """
 
-    # pylint: disable=unused-argument
     def post_lora_load(self, cfg: DictDefault, model: PreTrainedModel | PeftModel):
         """Performs actions after LoRA weights are loaded.
 
@@ -137,7 +134,6 @@ class BasePlugin:
             model: The loaded model.
         """
 
-    # pylint: disable=unused-argument
     def post_model_load(self, cfg: DictDefault, model: PreTrainedModel | PeftModel):
         """Performs actions after the model is loaded.
 
@@ -146,7 +142,6 @@ class BasePlugin:
             model: The loaded model.
         """
 
-    # pylint: disable=unused-argument
     def get_trainer_cls(self, cfg: DictDefault) -> Trainer | None:
         """Returns a custom class for the trainer.
 
@@ -157,7 +152,6 @@ class BasePlugin:
             The first non-`None` trainer class returned by a plugin.
         """
 
-    # pylint: disable=unused-argument
     def post_trainer_create(self, cfg: DictDefault, trainer: Trainer):
         """Performs actions after the trainer is created.
 
@@ -166,7 +160,7 @@ class BasePlugin:
             trainer: The trainer object for training.
         """
 
-    def get_training_args(self, cfg: DictDefault):  # pylint: disable=unused-argument):
+    def get_training_args(self, cfg: DictDefault):
         """
         Returns custom training arguments to set on TrainingArgs.
 
@@ -177,9 +171,7 @@ class BasePlugin:
             object: dict containing the training arguments.
         """
 
-    def get_collator_cls_and_kwargs(
-        self, cfg: DictDefault, is_eval: bool = False
-    ):  # pylint: disable=unused-argument):
+    def get_collator_cls_and_kwargs(self, cfg: DictDefault, is_eval: bool = False):
         """
         Returns a custom class for the collator.
 
@@ -191,7 +183,6 @@ class BasePlugin:
             class: The class for the collator.
         """
 
-    # pylint: disable=unused-argument
     def create_optimizer(self, cfg: DictDefault, trainer: Trainer) -> Optimizer | None:
         """Creates and returns an optimizer for training.
 
@@ -203,7 +194,6 @@ class BasePlugin:
             The created optimizer.
         """
 
-    # pylint: disable=unused-argument
     def create_lr_scheduler(
         self,
         cfg: DictDefault,
@@ -223,7 +213,6 @@ class BasePlugin:
             The created learning rate scheduler.
         """
 
-    # pylint: disable=unused-argument
     def add_callbacks_pre_trainer(
         self, cfg: DictDefault, model: PreTrainedModel
     ) -> list[Callable]:
@@ -238,7 +227,6 @@ class BasePlugin:
         """
         return []
 
-    # pylint: disable=unused-argument
     def add_callbacks_post_trainer(
         self, cfg: DictDefault, trainer: Trainer
     ) -> list[Callable]:
@@ -254,7 +242,6 @@ class BasePlugin:
         """
         return []
 
-    # pylint: disable=unused-argument
     def post_train(self, cfg: DictDefault, model: PreTrainedModel | PeftModel):
         """Performs actions after training is complete.
 
@@ -263,7 +250,7 @@ class BasePlugin:
             model: The loaded model.
         """
 
-    def post_train_unload(self, cfg: DictDefault):  # pylint: disable=unused-argument
+    def post_train_unload(self, cfg: DictDefault):
         """Performs actions after training is complete and the model is unloaded.
 
         Args:
@@ -311,7 +298,7 @@ def load_plugin(plugin_name: str) -> BasePlugin:
     return plugin
 
 
-class PluginManager:  # pylint: disable=too-many-public-methods
+class PluginManager:
     """The `PluginManager` class is responsible for loading and managing plugins. It
     should be a singleton so it can be accessed from anywhere in the codebase.
 
diff --git a/src/axolotl/integrations/config.py b/src/axolotl/integrations/config.py
index f5fc07e9e..2217b2819 100644
--- a/src/axolotl/integrations/config.py
+++ b/src/axolotl/integrations/config.py
@@ -50,15 +50,9 @@ def merge_input_args():
         dynamic_input += f"class AxolotlInputConfig(AxolotlInputConfigBase, {', '.join(plugin_classes)}):\n    pass\n"
 
         namespace: Dict[Any, Any] = {}
-        exec(  # pylint: disable=exec-used  # nosec B102
-            dynamic_input, globals(), namespace
-        )
-        AxolotlInputConfig = namespace[  # pylint: disable=invalid-name
-            "AxolotlInputConfig"
-        ]
-        AxolotlConfigWCapabilities = namespace[  # pylint: disable=invalid-name
-            "AxolotlConfigWCapabilities"
-        ]
+        exec(dynamic_input, globals(), namespace)  # nosec B102
+        AxolotlInputConfig = namespace["AxolotlInputConfig"]
+        AxolotlConfigWCapabilities = namespace["AxolotlConfigWCapabilities"]
         return AxolotlConfigWCapabilities, AxolotlInputConfig
     return AxolotlConfigWCapabilitiesBase, AxolotlInputConfigBase
 
@@ -74,7 +68,7 @@ def merge_training_args() -> Type:
     Returns:
     tuple: A tuple containing the newly created classes, AxolotlTrainingMixins.
     """
-    # pylint: disable=duplicate-code
+
     from axolotl.core.training_args_base import (
         AxolotlTrainingMixins as AxolotlTrainingMixinsBase,
     )
@@ -93,11 +87,7 @@ def merge_training_args() -> Type:
 
         namespace: Dict[Any, Any] = {}
         local_vars = {"AxolotlTrainingMixinsBase": AxolotlTrainingMixinsBase}
-        exec(  # pylint: disable=exec-used  # nosec B102
-            dynamic_input, {**globals(), **local_vars}, namespace
-        )
-        AxolotlTrainingMixins = namespace[  # pylint: disable=invalid-name
-            "AxolotlTrainingMixins"
-        ]
+        exec(dynamic_input, {**globals(), **local_vars}, namespace)  # nosec B102
+        AxolotlTrainingMixins = namespace["AxolotlTrainingMixins"]
         return AxolotlTrainingMixins
     return AxolotlTrainingMixinsBase
diff --git a/src/axolotl/integrations/cut_cross_entropy/__init__.py b/src/axolotl/integrations/cut_cross_entropy/__init__.py
index 4689cc9a8..6dd7c97e1 100644
--- a/src/axolotl/integrations/cut_cross_entropy/__init__.py
+++ b/src/axolotl/integrations/cut_cross_entropy/__init__.py
@@ -18,6 +18,7 @@ Module for the Plugin for Cut Cross Entropy integration with Axolotl.
 Cut Cross Entropy is an optimized implementation of cross entropy loss
 from Apple's ML team.
 """
+
 import importlib
 from functools import partial
 
@@ -28,7 +29,7 @@ from axolotl.utils import get_pytorch_version
 from axolotl.utils.callbacks.models import get_causal_lm_model_cls_prefix
 from axolotl.utils.logging import get_logger
 
-from .args import CutCrossEntropyArgs  # pylint: disable=unused-import. # noqa: F401
+from .args import CutCrossEntropyArgs as CutCrossEntropyArgs
 
 LOG = get_logger(__name__)
 
@@ -106,9 +107,7 @@ class CutCrossEntropyPlugin(BasePlugin):
         """
         from cut_cross_entropy.transformers.patch import PATCH_FNS
 
-        def patch_generic(
-            maybe_model, patch_options, model_type: str
-        ):  # pylint: disable=unused-argument
+        def patch_generic(maybe_model, patch_options, model_type: str):
             import cut_cross_entropy.transformers.llama
             from cut_cross_entropy.transformers.llama import cce_forward
 
@@ -121,12 +120,10 @@ class CutCrossEntropyPlugin(BasePlugin):
                 )
                 model_cls = getattr(module, f"{model_cls_prefix}ForCausalLM")
 
-                cut_cross_entropy.transformers.llama._PATCH_OPTS = (  # pylint: disable=protected-access
-                    patch_options
-                )
+                cut_cross_entropy.transformers.llama._PATCH_OPTS = patch_options
 
                 model_cls.forward = cce_forward
-            # pylint: disable=duplicate-code
+
             except (ImportError, AttributeError) as e:
                 raise RuntimeError(
                     f"Could not import ForCausalLM class for model_type: {model_type}. "
diff --git a/src/axolotl/integrations/cut_cross_entropy/args.py b/src/axolotl/integrations/cut_cross_entropy/args.py
index 22852479a..3eeb9fac7 100644
--- a/src/axolotl/integrations/cut_cross_entropy/args.py
+++ b/src/axolotl/integrations/cut_cross_entropy/args.py
@@ -15,6 +15,7 @@
 """
 Module for handling Cut Cross Entropy input arguments.
 """
+
 from typing import Optional
 
 from pydantic import BaseModel, model_validator
diff --git a/src/axolotl/integrations/grokfast/__init__.py b/src/axolotl/integrations/grokfast/__init__.py
index 234d27226..df8cf2cf3 100644
--- a/src/axolotl/integrations/grokfast/__init__.py
+++ b/src/axolotl/integrations/grokfast/__init__.py
@@ -7,7 +7,7 @@ from transformers.trainer_callback import TrainerCallback
 from axolotl.utils.logging import get_logger
 
 from ..base import BasePlugin
-from .args import GrokfastArgs  # pylint: disable=unused-import. # noqa: F401
+from .args import GrokfastArgs as GrokfastArgs
 from .optimizer import gradfilter_ema
 
 LOG = get_logger(__name__)
@@ -24,12 +24,10 @@ class GrokfastCallbackHandler(TrainerCallback):
         self.alpha = alpha
         self.lamb = lamb
 
-    def on_train_begin(self, *args_, **kwargs):  # pylint: disable=unused-argument
+    def on_train_begin(self, *args_, **kwargs):
         self.grads = None
 
-    def on_pre_optimizer_step(
-        self, args_, state, control, **kwargs
-    ):  # pylint: disable=unused-argument
+    def on_pre_optimizer_step(self, args_, state, control, **kwargs):
         model = kwargs.pop("model")
         self.grads = gradfilter_ema(model, self.grads, alpha=self.alpha, lamb=self.lamb)
         return control
diff --git a/src/axolotl/integrations/grokfast/optimizer.py b/src/axolotl/integrations/grokfast/optimizer.py
index 38cda2c93..c83ef43bc 100644
--- a/src/axolotl/integrations/grokfast/optimizer.py
+++ b/src/axolotl/integrations/grokfast/optimizer.py
@@ -1,7 +1,6 @@
 # Copyright: MIT License (c) 2024 Jaerin Lee, Bong Gyun Kang, Kihoon Kim, Kyoung Mu Lee
 # Reference: https://github.com/ironjr/grokfast
 
-# pylint: skip-file
 from collections import deque
 from typing import Dict, Literal, Optional
 
diff --git a/src/axolotl/integrations/kd/__init__.py b/src/axolotl/integrations/kd/__init__.py
index 4c8535a0a..b1a990553 100644
--- a/src/axolotl/integrations/kd/__init__.py
+++ b/src/axolotl/integrations/kd/__init__.py
@@ -15,6 +15,7 @@
 """
 Plugin init to add KD support to Axolotl.
 """
+
 from typing import Any
 
 from transformers import Trainer
@@ -22,7 +23,7 @@ from transformers import Trainer
 from axolotl.integrations.base import BasePlugin
 from axolotl.integrations.kd.callbacks import KDTemperatureSchedulerCallback
 
-from .args import KDArgs  # pylint: disable=unused-import. # noqa: F401
+from .args import KDArgs as KDArgs
 
 
 class KDPlugin(BasePlugin):
diff --git a/src/axolotl/integrations/kd/args.py b/src/axolotl/integrations/kd/args.py
index 758bc8917..425d8ddf6 100644
--- a/src/axolotl/integrations/kd/args.py
+++ b/src/axolotl/integrations/kd/args.py
@@ -15,6 +15,7 @@
 """
 Plugin args for KD support.
 """
+
 from dataclasses import dataclass
 from enum import Enum
 
@@ -26,8 +27,8 @@ class InferenceServerType(str, Enum):
     Online inferences server types to handle different request args
     """
 
-    vllm = "vllm"  # pylint: disable=invalid-name
-    sglang = "sglang"  # pylint: disable=invalid-name
+    vllm = "vllm"
+    sglang = "sglang"
 
 
 class KDArgs(BaseModel):
diff --git a/src/axolotl/integrations/kd/callbacks.py b/src/axolotl/integrations/kd/callbacks.py
index 911c3d517..c73d8a8bb 100644
--- a/src/axolotl/integrations/kd/callbacks.py
+++ b/src/axolotl/integrations/kd/callbacks.py
@@ -19,9 +19,7 @@ class KDTemperatureSchedulerCallback(TrainerCallback):
 
         self.trainer = trainer
 
-    def on_step_end(
-        self, args, state, control, **kwargs
-    ):  # pylint: disable=unused-argument
+    def on_step_end(self, args, state, control, **kwargs):
         # cosine decay temperature over the max steps
 
         progress = state.global_step / state.max_steps
diff --git a/src/axolotl/integrations/kd/chat_template.py b/src/axolotl/integrations/kd/chat_template.py
index 6376ecb09..04f0f24a4 100644
--- a/src/axolotl/integrations/kd/chat_template.py
+++ b/src/axolotl/integrations/kd/chat_template.py
@@ -15,6 +15,7 @@
 """
 Chat template prompt strategy loader with KD support
 """
+
 import logging
 from typing import Any, Dict
 
@@ -192,7 +193,6 @@ class ChatTemplateStrategyWithKDv2(ChatTemplateStrategyWithKD):
         """
         Transform logprobs to target format for KD training
         """
-        # pylint: disable=duplicate-code
 
         logprobs = sample.pop(self.logprobs_field)
         target_seq_len = len(logprobs)
@@ -240,7 +240,7 @@ class ChatTemplateStrategyWithKDv2(ChatTemplateStrategyWithKD):
                 target_mask.append([1] * top_k)
 
         for token_pos_logprobs, pos_target_token_ids in zip(
-            logprobs, sample["target_token_ids"]
+            logprobs, sample["target_token_ids"], strict=False
         ):
             # Convert to a tensor for easier manipulation
             position_logprobs_tensor = torch.tensor(
@@ -299,7 +299,7 @@ class KDStrategyLoader(StrategyLoader):
     Load ChatTemplateStrategy with KD support using StrategyLoader.
     """
 
-    def _get_strategy_cls(self, cfg):  # pylint: disable=unused-argument
+    def _get_strategy_cls(self, cfg):
         return ChatTemplateStrategyWithKD
 
     def _get_strategy_params(self, cfg, ds_cfg: Dict[str, Any]):
@@ -319,7 +319,7 @@ class KDStrategyLoaderV2(KDStrategyLoader):
     Load KD chat template datasets with pre-tokenized logprob data
     """
 
-    def _get_strategy_cls(self, cfg):  # pylint: disable=unused-argument
+    def _get_strategy_cls(self, cfg):
         return ChatTemplateStrategyWithKDv2
 
 
diff --git a/src/axolotl/integrations/kd/collator.py b/src/axolotl/integrations/kd/collator.py
index 0cc745b78..675485d9d 100644
--- a/src/axolotl/integrations/kd/collator.py
+++ b/src/axolotl/integrations/kd/collator.py
@@ -37,7 +37,6 @@ class DataCollatorForKD(DataCollatorForSeq2Seq):
     target_logprobs. It also creates a teacher_mask to indicate which entries are valid.
     """
 
-    # pylint: disable=duplicate-code
     tokenizer: PreTrainedTokenizerBase
     model: Optional[Any] = None
     padding: Union[bool, str, PaddingStrategy] = True
@@ -72,7 +71,7 @@ class DataCollatorForKD(DataCollatorForSeq2Seq):
                         // self.pad_to_multiple_of
                     ) * self.pad_to_multiple_of
 
-                for f in features:  # pylint: disable=invalid-name
+                for f in features:
                     remainder = [pad_token_id] * (max_len - len(f[feature_name]))
                     if isinstance(f[feature_name], list):
                         f[feature_name] = (
@@ -101,7 +100,7 @@ class DataCollatorForKD(DataCollatorForSeq2Seq):
 
         if has_teacher_data:
             # Extract and remove from features
-            for f in features:  # pylint: disable=invalid-name
+            for f in features:
                 target_logprobs_list.append(f.pop("target_logprobs"))
                 target_token_ids_list.append(f.pop("target_token_ids"))
                 target_mask_list.append(f.pop("target_mask"))
@@ -117,24 +116,25 @@ class DataCollatorForKD(DataCollatorForSeq2Seq):
             padded_teacher_mask_list = []
 
             for t_logprobs, t_ids, t_mask in zip(
-                target_logprobs_list, target_token_ids_list, target_mask_list
+                target_logprobs_list,
+                target_token_ids_list,
+                target_mask_list,
+                strict=False,
             ):
                 t_logprobs_padded = []
                 t_ids_padded = []
                 t_mask_padded = []
 
-                for lp, ids, mask in zip(  # pylint: disable=invalid-name
-                    t_logprobs, t_ids, t_mask
-                ):
+                for lp, ids, mask in zip(t_logprobs, t_ids, t_mask, strict=False):
                     lp_len = len(lp)
                     if lp_len < max_k:
                         # Use -1e9 for padding logprobs and 0 for token_ids
                         pad_len = max_k - lp_len
-                        lp = lp + [-1e9] * pad_len  # pylint: disable=invalid-name
+                        lp = lp + [-1e9] * pad_len
                         ids = ids + [0] * pad_len
                         mask = mask + [0] * pad_len
                     else:
-                        lp = lp[:max_k]  # pylint: disable=invalid-name
+                        lp = lp[:max_k]
                         ids = ids[:max_k]
                         mask = mask[:max_k]
 
@@ -216,9 +216,7 @@ class KDBatchSamplerDataCollatorForSeq2Seq(DataCollatorForKD):
         #    We want to produce a single "merged" feature dict for each sub-batch.
         out_features = [{} for _ in features]
 
-        for i, sub_features in enumerate(  # pylint: disable=too-many-nested-blocks
-            features
-        ):
+        for i, sub_features in enumerate(features):
             # sub_features is a list of dicts, each dict = one sequence’s features
             # We'll merge them into out_features[i].
             #
@@ -255,9 +253,7 @@ class KDBatchSamplerDataCollatorForSeq2Seq(DataCollatorForKD):
                         if field_name in feat and isinstance(
                             feat[field_name], (list, torch.Tensor)
                         ):
-                            if isinstance(
-                                feat[field_name][0], (dict, str)
-                            ):  # pylint: disable=too-many-nested-blocks
+                            if isinstance(feat[field_name][0], (dict, str)):
                                 continue
                             arr = np.array(feat[field_name])
                             arrays.append(arr)
diff --git a/src/axolotl/integrations/kd/collator_online_teacher.py b/src/axolotl/integrations/kd/collator_online_teacher.py
index 584ace481..54e55a5e7 100644
--- a/src/axolotl/integrations/kd/collator_online_teacher.py
+++ b/src/axolotl/integrations/kd/collator_online_teacher.py
@@ -144,7 +144,7 @@ class OnlineTeacherCollator(KDBatchSamplerDataCollatorForSeq2Seq):
                 }
 
             for sequence_data, seq_input_ids, seq_labels in zip(
-                api_data, batch_input_ids, labels
+                api_data, batch_input_ids, labels, strict=False
             ):
                 current_target_logprobs = []
                 current_target_token_ids = []
@@ -165,7 +165,7 @@ class OnlineTeacherCollator(KDBatchSamplerDataCollatorForSeq2Seq):
                 assert len(seq_input_ids) == len(input_top_logprobs)
 
                 for i, _, label in zip(
-                    range(len(seq_input_ids)), seq_input_ids, seq_labels
+                    range(len(seq_input_ids)), seq_input_ids, seq_labels, strict=False
                 ):
                     if i < len(input_top_logprobs) and input_top_logprobs[i] is None:
                         # this is always the case for the first token.
@@ -202,7 +202,8 @@ class OnlineTeacherCollator(KDBatchSamplerDataCollatorForSeq2Seq):
 
                         # pos_top_logprobs: list of logprobs, pos_token_ids: list of token_ids
                         pos_logprobs_raw, pos_token_ids, _ = [
-                            list(row) for row in zip(*pos_top_logprobs_data)
+                            list(row)
+                            for row in zip(*pos_top_logprobs_data, strict=False)
                         ]
 
                         # Ensure correct length (top_k)
@@ -317,7 +318,7 @@ class OnlineTeacherCollator(KDBatchSamplerDataCollatorForSeq2Seq):
                 }
 
             for sequence_data, seq_input_ids, seq_labels in zip(
-                choices, batch_input_ids, labels
+                choices, batch_input_ids, labels, strict=False
             ):
                 # seq_input_ids: List[int]
                 # seq_labels: List[int]
@@ -342,7 +343,9 @@ class OnlineTeacherCollator(KDBatchSamplerDataCollatorForSeq2Seq):
 
                 seq_len = len(seq_input_ids)
 
-                for i, _, label in zip(range(seq_len), seq_input_ids, seq_labels):
+                for i, _, label in zip(
+                    range(seq_len), seq_input_ids, seq_labels, strict=False
+                ):
                     if i < len(input_top_logprobs) and input_top_logprobs[i] is None:
                         # this is always the case for the first token.
                         # there is never logprob data for the first token since that's a true input
@@ -424,7 +427,7 @@ class OnlineTeacherCollator(KDBatchSamplerDataCollatorForSeq2Seq):
                             list(range(self.kd_online_topk))
                         )
                         current_target_mask.append([0] * self.kd_online_topk)
-                for i in range(max(0, seq_len - len(current_target_logprobs))):
+                for _ in range(max(0, seq_len - len(current_target_logprobs))):
                     current_target_logprobs.append(
                         [-float("inf")] * self.kd_online_topk
                     )
diff --git a/src/axolotl/integrations/kd/kernels/liger.py b/src/axolotl/integrations/kd/kernels/liger.py
index 6356643c2..61ef3e10a 100644
--- a/src/axolotl/integrations/kd/kernels/liger.py
+++ b/src/axolotl/integrations/kd/kernels/liger.py
@@ -197,7 +197,7 @@ class LigerFusedLinearKLTopKLogprobFunction(LigerFusedLinearDistillationBase):
         compute_ce_loss: bool = True,
         normalize_topk: bool = True,
     ):
-        CHUNK_SIZE = chunk_size  # pylint: disable=invalid-name
+        CHUNK_SIZE = chunk_size
         grad_weight_acc = torch.zeros_like(student_lm_head_weight)
         grad_inputs_list = []
         grad_bias_acc = (
@@ -298,8 +298,8 @@ class LigerFusedLinearKLTopKLogprobFunction(LigerFusedLinearDistillationBase):
             accumulate_chunk_grads_compiled = accumulate_chunk_grads
 
         # Use the same chunking logic as LigerFusedLinearDistillationBase.forward
-        B, N, D = student_input.shape  # pylint: disable=invalid-name
-        K = target_token_ids.shape[-1]  # pylint: disable=invalid-name
+        B, N, D = student_input.shape
+        K = target_token_ids.shape[-1]
 
         student_input_flat = student_input.reshape(-1, student_input.shape[-1])
         target_token_ids_flat = target_token_ids.reshape(-1, target_token_ids.shape[-1])
diff --git a/src/axolotl/integrations/kd/kernels/models.py b/src/axolotl/integrations/kd/kernels/models.py
index 4319f5f7d..f7b468669 100644
--- a/src/axolotl/integrations/kd/kernels/models.py
+++ b/src/axolotl/integrations/kd/kernels/models.py
@@ -40,10 +40,9 @@ def kldiv_forward_llama_like(
     output_attentions: Optional[bool] = None,
     output_hidden_states: Optional[bool] = None,
     cache_position: Optional[torch.LongTensor] = None,
-    logits_to_keep: Union[int, torch.Tensor] = 0,  # pylint: disable=unused-argument
+    logits_to_keep: Union[int, torch.Tensor] = 0,
     **kwargs: Unpack[TransformersKwargs],  # type: ignore[misc]
 ) -> CausalLMOutputWithPast:
-    # pylint: disable=duplicate-code
     output_attentions = (
         output_attentions
         if output_attentions is not None
diff --git a/src/axolotl/integrations/kd/topk_logprob/forward_kl.py b/src/axolotl/integrations/kd/topk_logprob/forward_kl.py
index 74184455f..b79ba26f3 100644
--- a/src/axolotl/integrations/kd/topk_logprob/forward_kl.py
+++ b/src/axolotl/integrations/kd/topk_logprob/forward_kl.py
@@ -15,6 +15,7 @@
 """
 loss for top_k KL divergence
 """
+
 import torch
 from torch import nn
 
@@ -117,7 +118,6 @@ class ChunkedTopKKDLoss(nn.Module):
         target_mask: torch.Tensor,  # [B, seq_len, K]
         num_items_in_batch: int = -1,  # optional batch size for normalization
     ) -> torch.Tensor:
-
         # 1. Split along the "token" dimension (dim=1).
         student_logits_chunks = student_logits.chunk(self.num_output_chunks, dim=1)
         token_ids_chunks = target_token_ids.chunk(self.num_output_chunks, dim=1)
@@ -131,7 +131,11 @@ class ChunkedTopKKDLoss(nn.Module):
 
         # 2. Loop over each chunk and compute a chunk-specific loss.
         for st_chunk, tid_chunk, lp_chunk, msk_chunk in zip(
-            student_logits_chunks, token_ids_chunks, logprobs_chunks, mask_chunks
+            student_logits_chunks,
+            token_ids_chunks,
+            logprobs_chunks,
+            mask_chunks,
+            strict=False,
         ):
             # We pass num_items_in_batch=-1 so that the kd_loss
             # will average over *this chunk's* valid tokens only.
diff --git a/src/axolotl/integrations/kd/trainer.py b/src/axolotl/integrations/kd/trainer.py
index c454b2a2c..7ec43333a 100644
--- a/src/axolotl/integrations/kd/trainer.py
+++ b/src/axolotl/integrations/kd/trainer.py
@@ -21,7 +21,6 @@ from axolotl.core.trainers.base import AxolotlTrainer
 from .kernels.liger import LigerFusedLinearKLTopKLogprobLoss
 
 
-# pylint: disable=too-many-ancestors
 class AxolotlKDTrainer(AxolotlTrainer):
     """
     Custom trainer subclass for Knowledge Distillation (KD)
diff --git a/src/axolotl/integrations/liger/__init__.py b/src/axolotl/integrations/liger/__init__.py
index 86d56be80..c20f4545c 100644
--- a/src/axolotl/integrations/liger/__init__.py
+++ b/src/axolotl/integrations/liger/__init__.py
@@ -18,6 +18,7 @@ Module for the Plugin for LIGER integraton with Axolotl.
 Liger Kernel is the collection of Triton-native kernels for LLM Training.
 It is designed to be performant, correct, and light-weight.
 """
+
 from .args import LigerArgs
 from .plugin import LigerPlugin
 
diff --git a/src/axolotl/integrations/liger/models/base.py b/src/axolotl/integrations/liger/models/base.py
index f3cf4299a..a9dbe9412 100644
--- a/src/axolotl/integrations/liger/models/base.py
+++ b/src/axolotl/integrations/liger/models/base.py
@@ -41,7 +41,6 @@ def lce_forward(
             This is useful when using packed tensor format (single dimension for batch and sequence length).
     """
 
-    # pylint: disable=duplicate-code
     output_attentions = (
         output_attentions
         if output_attentions is not None
@@ -181,7 +180,7 @@ def patch_lce_forward(
         model_cls = getattr(module, f"{model_cls_prefix}ForCausalLM")
 
         model_cls.forward = lce_forward
-    # pylint: disable=duplicate-code
+
     except (ImportError, AttributeError) as e:
         raise RuntimeError(
             f"Could not import ForCausalLM class for model_type: {model_type}. "
diff --git a/src/axolotl/integrations/liger/models/deepseekv2.py b/src/axolotl/integrations/liger/models/deepseekv2.py
index 2f0d2a704..99adce4a7 100644
--- a/src/axolotl/integrations/liger/models/deepseekv2.py
+++ b/src/axolotl/integrations/liger/models/deepseekv2.py
@@ -2,8 +2,6 @@
 DeepseekV2 model with LigerFusedLinearCrossEntropyLoss
 """
 
-# pylint: disable=duplicate-code
-
 from typing import List, Optional, Tuple, Union
 
 import torch
diff --git a/src/axolotl/integrations/liger/models/jamba.py b/src/axolotl/integrations/liger/models/jamba.py
index d25529970..78689e40c 100644
--- a/src/axolotl/integrations/liger/models/jamba.py
+++ b/src/axolotl/integrations/liger/models/jamba.py
@@ -2,8 +2,6 @@
 Jamba model with LigerFusedLinearCrossEntropyLoss
 """
 
-# pylint: disable=duplicate-code
-
 from typing import Optional, Tuple, Union
 
 import torch
diff --git a/src/axolotl/integrations/liger/models/llama4.py b/src/axolotl/integrations/liger/models/llama4.py
index 689823bb6..e51140265 100644
--- a/src/axolotl/integrations/liger/models/llama4.py
+++ b/src/axolotl/integrations/liger/models/llama4.py
@@ -46,7 +46,6 @@ def lce_forward(
     Returns:
     """
 
-    # pylint: disable=duplicate-code
     output_attentions = (
         output_attentions
         if output_attentions is not None
@@ -78,9 +77,7 @@ def lce_forward(
     hidden_states = outputs[0]
 
     if hasattr(self.config, "pretraining_tp") and self.config.pretraining_tp > 1:
-        raise Exception(  # pylint: disable=broad-exception-raised
-            "Liger Kernel does not support pretraining_tp!!"
-        )
+        raise Exception("Liger Kernel does not support pretraining_tp!!")
 
     logits = None
     loss = None
@@ -128,7 +125,7 @@ def apply_liger_kernel_to_llama4(
     rms_norm: bool = False,
     glu_activation: bool = False,
     layer_norm: bool = False,
-    **kwargs,  # pylint: disable=unused-argument
+    **kwargs,
 ) -> None:
     """
     Apply Liger kernels to replace original implementation in HuggingFace Llama models (2 and 3)
@@ -144,15 +141,15 @@ def apply_liger_kernel_to_llama4(
         layer_norm (bool): Whether to apply Liger's LayerNorm. Default is False.
     """
 
-    import transformers.models.llama4.modeling_llama4  # noqa: F401  # pylint: disable=unused-import
+    import transformers.models.llama4.modeling_llama4  # noqa: F401
     from liger_kernel.transformers.functional import liger_cross_entropy
     from liger_kernel.transformers.layer_norm import LigerLayerNorm
     from liger_kernel.transformers.rms_norm import LigerRMSNorm
     from liger_kernel.transformers.swiglu import LigerSwiGLUMLP
 
-    assert not (
-        cross_entropy and fused_linear_cross_entropy
-    ), "cross_entropy and fused_linear_cross_entropy cannot both be True."
+    assert not (cross_entropy and fused_linear_cross_entropy), (
+        "cross_entropy and fused_linear_cross_entropy cannot both be True."
+    )
 
     modeling_llama4 = sys.modules["transformers.models.llama4.modeling_llama4"]
 
@@ -165,7 +162,7 @@ def apply_liger_kernel_to_llama4(
             # clone config to avoid modifying the original
             config = deepcopy(config)
             if intermediate_size:
-                setattr(config, "intermediate_size", intermediate_size)
+                config.intermediate_size = intermediate_size
             return LigerSwiGLUMLP(config, **kwargs)
 
         modeling_llama4.Llama4TextMLP = _liger_swiglu_mlp_wrapper
diff --git a/src/axolotl/integrations/liger/models/qwen3.py b/src/axolotl/integrations/liger/models/qwen3.py
index 1dc19eaf9..b008755da 100644
--- a/src/axolotl/integrations/liger/models/qwen3.py
+++ b/src/axolotl/integrations/liger/models/qwen3.py
@@ -43,7 +43,6 @@ def lce_forward(
     Returns:
     """
 
-    # pylint: disable=duplicate-code
     output_attentions = (
         output_attentions
         if output_attentions is not None
@@ -113,9 +112,8 @@ def apply_liger_kernel_to_qwen3(
     rms_norm: bool = False,
     glu_activation: bool = False,
     layer_norm: bool = False,
-    **kwargs,  # pylint: disable=unused-argument
+    **kwargs,
 ) -> None:
-    # pylint: disable=duplicate-code
     """
     Apply Liger kernels to replace original implementation in HuggingFace Llama models (2 and 3)
 
@@ -130,15 +128,15 @@ def apply_liger_kernel_to_qwen3(
         layer_norm (bool): Whether to apply Liger's LayerNorm. Default is False.
     """
 
-    import transformers.models.qwen3.modeling_qwen3  # noqa: F401  # pylint: disable=unused-import
+    import transformers.models.qwen3.modeling_qwen3  # noqa: F401
     from liger_kernel.transformers.functional import liger_cross_entropy
     from liger_kernel.transformers.layer_norm import LigerLayerNorm
     from liger_kernel.transformers.rms_norm import LigerRMSNorm
     from liger_kernel.transformers.swiglu import LigerSwiGLUMLP
 
-    assert not (
-        cross_entropy and fused_linear_cross_entropy
-    ), "cross_entropy and fused_linear_cross_entropy cannot both be True."
+    assert not (cross_entropy and fused_linear_cross_entropy), (
+        "cross_entropy and fused_linear_cross_entropy cannot both be True."
+    )
 
     modeling_qwen3 = sys.modules["transformers.models.qwen3.modeling_qwen3"]
 
diff --git a/src/axolotl/integrations/liger/models/qwen3_moe.py b/src/axolotl/integrations/liger/models/qwen3_moe.py
index 89bdc5bcc..40bee110c 100644
--- a/src/axolotl/integrations/liger/models/qwen3_moe.py
+++ b/src/axolotl/integrations/liger/models/qwen3_moe.py
@@ -45,7 +45,6 @@ def lce_forward(
     Returns:
     """
 
-    # pylint: disable=duplicate-code
     output_attentions = (
         output_attentions
         if output_attentions is not None
@@ -135,9 +134,8 @@ def apply_liger_kernel_to_qwen3_moe(
     rms_norm: bool = False,
     glu_activation: bool = False,
     layer_norm: bool = False,
-    **kwargs,  # pylint: disable=unused-argument
+    **kwargs,
 ) -> None:
-    # pylint: disable=duplicate-code
     """
     Apply Liger kernels to replace original implementation in HuggingFace Llama models (2 and 3)
 
@@ -152,15 +150,15 @@ def apply_liger_kernel_to_qwen3_moe(
         layer_norm (bool): Whether to apply Liger's LayerNorm. Default is False.
     """
 
-    import transformers.models.qwen3_moe.modeling_qwen3_moe  # noqa: F401  # pylint: disable=unused-import
+    import transformers.models.qwen3_moe.modeling_qwen3_moe  # noqa: F401
     from liger_kernel.transformers.functional import liger_cross_entropy
     from liger_kernel.transformers.layer_norm import LigerLayerNorm
     from liger_kernel.transformers.rms_norm import LigerRMSNorm
     from liger_kernel.transformers.swiglu import LigerSwiGLUMLP
 
-    assert not (
-        cross_entropy and fused_linear_cross_entropy
-    ), "cross_entropy and fused_linear_cross_entropy cannot both be True."
+    assert not (cross_entropy and fused_linear_cross_entropy), (
+        "cross_entropy and fused_linear_cross_entropy cannot both be True."
+    )
 
     modeling_qwen3_moe = sys.modules["transformers.models.qwen3_moe.modeling_qwen3_moe"]
 
@@ -174,7 +172,7 @@ def apply_liger_kernel_to_qwen3_moe(
             # clone config to avoid modifying the original
             config = deepcopy(config)
             if intermediate_size:
-                setattr(config, "intermediate_size", intermediate_size)
+                config.intermediate_size = intermediate_size
             return LigerSwiGLUMLP(config, **kwargs)
 
         modeling_qwen3_moe.Qwen3MoeMLP = _liger_swiglu_mlp_wrapper
diff --git a/src/axolotl/integrations/lm_eval/__init__.py b/src/axolotl/integrations/lm_eval/__init__.py
index 8db4dc634..0ab6b8697 100644
--- a/src/axolotl/integrations/lm_eval/__init__.py
+++ b/src/axolotl/integrations/lm_eval/__init__.py
@@ -7,7 +7,7 @@ import subprocess  # nosec
 from axolotl.integrations.base import BasePlugin
 from axolotl.integrations.lm_eval.cli import build_lm_eval_command
 
-from .args import LMEvalArgs  # pylint: disable=unused-import. # noqa: F401
+from .args import LMEvalArgs as LMEvalArgs
 
 
 class LMEvalPlugin(BasePlugin):
@@ -20,7 +20,6 @@ class LMEvalPlugin(BasePlugin):
 
     def post_train_unload(self, cfg):
         if cfg.lm_eval_post_train:
-            # pylint: disable=duplicate-code
             for lm_eval_args in build_lm_eval_command(
                 cfg.lm_eval_tasks,
                 bfloat16=cfg.bfloat16 or cfg.bf16,
diff --git a/src/axolotl/integrations/lm_eval/cli.py b/src/axolotl/integrations/lm_eval/cli.py
index 19608e1d9..ead82dcb7 100644
--- a/src/axolotl/integrations/lm_eval/cli.py
+++ b/src/axolotl/integrations/lm_eval/cli.py
@@ -99,7 +99,6 @@ def lm_eval(config: str, cloud: Optional[str] = None):
         with open(config, encoding="utf-8") as file:
             cfg: DictDefault = DictDefault(yaml.safe_load(file))
 
-        # pylint: disable=duplicate-code
         for lm_eval_args in build_lm_eval_command(
             cfg.lm_eval_tasks,
             bfloat16=cfg.bfloat16 or cfg.bf16,
diff --git a/src/axolotl/integrations/spectrum/__init__.py b/src/axolotl/integrations/spectrum/__init__.py
index 9f66aef97..5e8f9128d 100644
--- a/src/axolotl/integrations/spectrum/__init__.py
+++ b/src/axolotl/integrations/spectrum/__init__.py
@@ -23,7 +23,7 @@ import requests
 from axolotl.integrations.base import BasePlugin
 from axolotl.utils.logging import get_logger
 
-from .args import SpectrumArgs  # pylint: disable=unused-import. # noqa: F401
+from .args import SpectrumArgs as SpectrumArgs
 
 LOG = get_logger(__name__)
 
@@ -46,7 +46,7 @@ def _generate_unfrozen_params_yaml(snr_data, top_fraction=0.5):
         "^lm_head.weight$",
         "^model.embed_tokens.weight$",
     ]
-    for layer_type, layer_names in top_layers_by_type.items():
+    for _, layer_names in top_layers_by_type.items():
         for layer_name in layer_names:
             unfrozen_parameters.append(layer_name)
     return unfrozen_parameters
@@ -84,7 +84,7 @@ class SpectrumPlugin(BasePlugin):
                 snr_data = json.load(fin)
         except FileNotFoundError:
             pass
-        except Exception as exc:  # pylint: disable=broad-exception-caught
+        except Exception as exc:
             LOG.warning(f"Failed to read SNR data from {snr_path}: {exc}")
 
         if not snr_data:
diff --git a/src/axolotl/integrations/spectrum/args.py b/src/axolotl/integrations/spectrum/args.py
index df5756038..be6ca4bfc 100644
--- a/src/axolotl/integrations/spectrum/args.py
+++ b/src/axolotl/integrations/spectrum/args.py
@@ -15,6 +15,7 @@
 """
 Module for handling Spectrum input arguments.
 """
+
 from typing import Optional
 
 from pydantic import BaseModel, model_validator
diff --git a/src/axolotl/kernels/geglu.py b/src/axolotl/kernels/geglu.py
index 6acbea0d4..ee3260ebd 100644
--- a/src/axolotl/kernels/geglu.py
+++ b/src/axolotl/kernels/geglu.py
@@ -5,8 +5,6 @@ See "GLU Variants Improve Transformer" (https://arxiv.org/abs/2002.05202).
 Credit to `unsloth` (https://unsloth.ai/) for inspiration for this implementation.
 """
 
-# pylint: disable=invalid-name,unnecessary-lambda-assignment,duplicate-code
-
 import torch
 import triton
 import triton.language as tl
diff --git a/src/axolotl/kernels/lora.py b/src/axolotl/kernels/lora.py
index fb45f2aa7..c3356fb90 100644
--- a/src/axolotl/kernels/lora.py
+++ b/src/axolotl/kernels/lora.py
@@ -7,8 +7,6 @@ See "LoRA: Low-Rank Adaptation of Large Language Models"
 Credit to `unsloth` (https://unsloth.ai/) for inspiration for this implementation.
 """
 
-# pylint: disable=invalid-name
-
 from typing import Callable
 
 import torch
diff --git a/src/axolotl/kernels/quantize.py b/src/axolotl/kernels/quantize.py
index b61603fbc..d094f2381 100644
--- a/src/axolotl/kernels/quantize.py
+++ b/src/axolotl/kernels/quantize.py
@@ -1,7 +1,5 @@
 """Dequantization utilities for `bitsandbytes` integration."""
 
-# pylint: disable=invalid-name,global-statement
-
 import ctypes
 
 import bitsandbytes as bnb
diff --git a/src/axolotl/kernels/swiglu.py b/src/axolotl/kernels/swiglu.py
index 43a798edc..b13bcd350 100644
--- a/src/axolotl/kernels/swiglu.py
+++ b/src/axolotl/kernels/swiglu.py
@@ -99,7 +99,6 @@ def _swiglu_bwd_kernel(
     tl.store(up_ptr + offsets, grad_up, mask=mask)  # grad wrt up
 
 
-# pylint: disable=unnecessary-lambda-assignment
 def swiglu_forward(gate: torch.Tensor, up: torch.Tensor) -> torch.Tensor:
     """
     SwiGLU forward pass. Computes SwiGLU activation: `x * sigmoid(x) * up`, where
@@ -128,7 +127,6 @@ def swiglu_forward(gate: torch.Tensor, up: torch.Tensor) -> torch.Tensor:
     return out
 
 
-# pylint: disable=unnecessary-lambda-assignment
 def swiglu_backward(
     grad_output: torch.Tensor, gate: torch.Tensor, up: torch.Tensor
 ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
diff --git a/src/axolotl/loaders/__init__.py b/src/axolotl/loaders/__init__.py
index 3eef75e58..ae99bf16d 100644
--- a/src/axolotl/loaders/__init__.py
+++ b/src/axolotl/loaders/__init__.py
@@ -1,6 +1,5 @@
 """Init for axolotl.loaders module"""
 
-# pylint: disable=unused-import
 # flake8: noqa
 
 from .adapter import load_adapter, load_lora
diff --git a/src/axolotl/loaders/adapter.py b/src/axolotl/loaders/adapter.py
index db28206b6..867e6901c 100644
--- a/src/axolotl/loaders/adapter.py
+++ b/src/axolotl/loaders/adapter.py
@@ -28,14 +28,12 @@ LOG = get_logger(__name__)
 def setup_quantized_meta_for_peft(model: torch.nn.Module):
     """Replaces `quant_state.to` with a dummy function to prevent PEFT from moving `quant_state` to meta device"""
 
-    def temp_to_method(self, *args, **kwargs):  # pylint: disable=unused-argument
+    def temp_to_method(self, *args, **kwargs):
         return self
 
     for param in model.parameters():
         if isinstance(param, Params4bit):
-            param.quant_state._orig_to = (  # pylint: disable=protected-access
-                param.quant_state.to
-            )
+            param.quant_state._orig_to = param.quant_state.to
             param.quant_state.to = types.MethodType(temp_to_method, param.quant_state)
 
 
@@ -43,10 +41,8 @@ def setup_quantized_peft_meta_for_training(model: torch.nn.Module):
     """Replaces dummy `quant_state.to` method with the original function to allow training to continue"""
     for param in model.parameters():
         if isinstance(param, Params4bit) and hasattr(param.quant_state, "_orig_to"):
-            param.quant_state.to = (
-                param.quant_state._orig_to  # pylint: disable=protected-access
-            )
-            param.quant_state._orig_to = None  # pylint: disable=protected-access
+            param.quant_state.to = param.quant_state._orig_to
+            param.quant_state._orig_to = None
 
 
 def find_all_linear_names(model):
diff --git a/src/axolotl/loaders/model.py b/src/axolotl/loaders/model.py
index 53ae428a2..a9507d685 100644
--- a/src/axolotl/loaders/model.py
+++ b/src/axolotl/loaders/model.py
@@ -102,7 +102,7 @@ class ModelLoader:
         *,
         inference: bool = False,
         reference_model: bool = False,
-        **kwargs,  # pylint: disable=unused-argument
+        **kwargs,
     ):
         """Initializes the ModelLoader.
 
@@ -134,7 +134,7 @@ class ModelLoader:
 
         # Init model config
         self.model_config = load_model_config(cfg)
-        self.auto_model_loader = AutoModelForCausalLM  # pylint: disable=invalid-name
+        self.auto_model_loader = AutoModelForCausalLM
 
         # Initialize the patch manager
         self.patch_manager = PatchManager(
@@ -607,27 +607,19 @@ class ModelLoader:
             self.model_kwargs["attn_implementation"] = self.cfg.attn_implementation
         elif self.cfg.flex_attention:
             self.model_kwargs["attn_implementation"] = "flex_attention"
-            self.model_config._attn_implementation = (  # pylint: disable=protected-access
-                "flex_attention"
-            )
+            self.model_config._attn_implementation = "flex_attention"
 
         elif self.cfg.flash_attention:
             if not self.cfg.sample_packing and self.cfg.s2_attention:
                 pass
             self.model_kwargs["attn_implementation"] = "flash_attention_2"
-            self.model_config._attn_implementation = (  # pylint: disable=protected-access
-                "flash_attention_2"
-            )
+            self.model_config._attn_implementation = "flash_attention_2"
         elif self.cfg.sdp_attention:
             self.model_kwargs["attn_implementation"] = "sdpa"
-            self.model_config._attn_implementation = (  # pylint: disable=protected-access
-                "sdpa"
-            )
+            self.model_config._attn_implementation = "sdpa"
         elif self.cfg.eager_attention:
             self.model_kwargs["attn_implementation"] = "eager"
-            self.model_config._attn_implementation = (  # pylint: disable=protected-access
-                "eager"
-            )
+            self.model_config._attn_implementation = "eager"
 
         if self.cfg.low_cpu_mem_usage:
             self.model_kwargs["low_cpu_mem_usage"] = True
@@ -767,7 +759,7 @@ class ModelLoader:
                 )
         elif self.model_type == "MambaLMHeadModel":
             # FIXME this is janky at best and hacked together to make it work
-            MambaLMHeadModel = fix_mamba_attn_for_loss()  # pylint: disable=invalid-name
+            MambaLMHeadModel = fix_mamba_attn_for_loss()
 
             self.model_kwargs["dtype"] = self.model_kwargs["torch_dtype"]
             self.model_kwargs["device"] = torch.cuda.current_device()
@@ -816,7 +808,6 @@ class ModelLoader:
         if is_deepspeed_zero3_enabled():
             skip_move_to_device = True
 
-        # pylint: disable=protected-access
         if self.cfg.tensor_parallel_size > 1:
             # workaround for upstream 4.54.0 not setting _tp_size or _device_mesh
             # TODO(wing): remove once 4.54.1 is released
diff --git a/src/axolotl/loaders/tokenizer.py b/src/axolotl/loaders/tokenizer.py
index 0a486d023..dcc255938 100644
--- a/src/axolotl/loaders/tokenizer.py
+++ b/src/axolotl/loaders/tokenizer.py
@@ -50,7 +50,7 @@ def modify_tokenizer_files(
     tokenizer_dir = os.path.join(output_dir, "tokenizer")
     os.makedirs(tokenizer_dir, exist_ok=True)
 
-    if is_local_main_process():  # pylint: disable=too-many-nested-blocks
+    if is_local_main_process():
         # Load the tokenizer
         temp_tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, use_fast=True)
 
@@ -73,9 +73,9 @@ def modify_tokenizer_files(
                 for token_id, new_value in token_id_mappings.items():
                     token_id_str = str(token_id)
                     if token_id_str in config_data["added_tokens_decoder"]:
-                        config_data["added_tokens_decoder"][token_id_str][
-                            "content"
-                        ] = new_value
+                        config_data["added_tokens_decoder"][token_id_str]["content"] = (
+                            new_value
+                        )
                     else:
                         raise ValueError(
                             f"Token ID {token_id_str} not found in added_tokens_decoder"
@@ -215,7 +215,7 @@ def load_tokenizer(cfg: DictDefault) -> PreTrainedTokenizer:
         for k, val in special_tokens.items():
             # check if new special token is not already in tokenizer and
             # is adapter training to make sure lora_modules_to_save is set
-            # pylint: disable=too-many-boolean-expressions
+
             if (
                 (getattr(tokenizer, k) is None or getattr(tokenizer, k) != val)
                 and (len(tokenizer.encode(val, add_special_tokens=False)) > 2)
diff --git a/src/axolotl/models/mamba/__init__.py b/src/axolotl/models/mamba/__init__.py
index fee88e3a4..d6bb40d99 100644
--- a/src/axolotl/models/mamba/__init__.py
+++ b/src/axolotl/models/mamba/__init__.py
@@ -21,4 +21,4 @@ def fix_mamba_attn_for_loss():
     from .modeling_mamba import MambaLMHeadModel as MambaLMHeadModelFixed
 
     mixer_seq_simple.MambaLMHeadModel = MambaLMHeadModelFixed
-    return mixer_seq_simple.MambaLMHeadModel  # pylint: disable=invalid-name
+    return mixer_seq_simple.MambaLMHeadModel
diff --git a/src/axolotl/models/mamba/modeling_mamba.py b/src/axolotl/models/mamba/modeling_mamba.py
index 70e9c88c8..2cfe11544 100644
--- a/src/axolotl/models/mamba/modeling_mamba.py
+++ b/src/axolotl/models/mamba/modeling_mamba.py
@@ -1,4 +1,3 @@
-# pylint: skip-file
 import os
 from collections import namedtuple
 from functools import partial
@@ -112,7 +111,7 @@ class MambaLMHeadModel(nn.Module, GenerationMixin):
         self,
         save_directory: Union[str, os.PathLike],
         state_dict: Optional[dict] = None,
-        safe_serialization: Optional[bool] = None,  # pylint: disable=unused-argument
+        safe_serialization: Optional[bool] = None,
     ):
         if state_dict is None:
             state_dict = self.state_dict()
diff --git a/src/axolotl/monkeypatch/accelerate/fsdp2.py b/src/axolotl/monkeypatch/accelerate/fsdp2.py
index 66d3d0d2d..3b38a33b7 100644
--- a/src/axolotl/monkeypatch/accelerate/fsdp2.py
+++ b/src/axolotl/monkeypatch/accelerate/fsdp2.py
@@ -130,9 +130,9 @@ def get_state_dict(self, model, unwrap=True):
                         "Deepspeed TP requires deepspeed >= 0.16.4, Please update DeepSpeed via `pip install deepspeed -U`."
                     )
                 state_dict = (
-                    model._consolidated_16bit_state_dict()  # pylint: disable=protected-access
+                    model._consolidated_16bit_state_dict()
                     if tp_sharding
-                    else model._zero3_consolidated_16bit_state_dict()  # pylint: disable=protected-access
+                    else model._zero3_consolidated_16bit_state_dict()
                 )
             else:
                 raise ValueError(
@@ -231,8 +231,7 @@ def fsdp2_prepare_model(accelerator, model: torch.nn.Module) -> torch.nn.Module:
     )
 
     is_type_fsdp = isinstance(model, FSDPModule) or (
-        is_compiled_module(model)
-        and isinstance(model._orig_mod, FSDPModule)  # pylint: disable=protected-access
+        is_compiled_module(model) and isinstance(model._orig_mod, FSDPModule)
     )
     if is_type_fsdp:
         return model
diff --git a/src/axolotl/monkeypatch/accelerate/parallelism_config.py b/src/axolotl/monkeypatch/accelerate/parallelism_config.py
index e3cafc87d..b2157fb6b 100644
--- a/src/axolotl/monkeypatch/accelerate/parallelism_config.py
+++ b/src/axolotl/monkeypatch/accelerate/parallelism_config.py
@@ -2,7 +2,6 @@
 workaround to allow parallelism config for pure CP
 """
 
-# pylint: disable=protected-access
 import os
 import warnings
 
@@ -30,7 +29,7 @@ def _validate_accelerator(self, accelerator):
     allow_parallelism_config = False
 
     if (
-        self.cp_size > 1  # pylint: disable=chained-comparison
+        self.cp_size > 1
         and self.dp_shard_size <= 1
         and os.environ.get("ACCELERATE_ALLOW_CP_STANDALONE", "false").lower() == "true"
     ):
@@ -55,6 +54,7 @@ def _validate_accelerator(self, accelerator):
         warnings.warn(
             "ParallelismConfig has the following warnings:\n" + "\n".join(_warnings),
             UserWarning,
+            stacklevel=2,
         )
 
 
diff --git a/src/axolotl/monkeypatch/attention/flex_attn.py b/src/axolotl/monkeypatch/attention/flex_attn.py
index 98aead832..f59b8abe2 100644
--- a/src/axolotl/monkeypatch/attention/flex_attn.py
+++ b/src/axolotl/monkeypatch/attention/flex_attn.py
@@ -65,11 +65,9 @@ def patch_flex_wrapper(**flex_attn_compile_kwargs):
             return self._compiled_flex_attention
 
     transformers.integrations.flex_attention.WrappedFlexAttention = WrappedFlexAttention
-    setattr(
-        sys.modules["transformers.integrations.flex_attention"],
-        "WrappedFlexAttention",
-        WrappedFlexAttention,
-    )
+    sys.modules[
+        "transformers.integrations.flex_attention"
+    ].WrappedFlexAttention = WrappedFlexAttention
 
 
 def patch_flex_make_mask():
@@ -144,9 +142,7 @@ def patch_flex_make_mask():
         # computation prior to the softmax. For sample packing, we need both the
         # logic for both causal mask and document mask. See PyTorch's official
         # blog post for more details: https://pytorch.org/blog/flexattention/#mask-mods
-        def causal_mask_mod(
-            batch_idx, head_idx, q_idx, kv_idx
-        ):  # pylint: disable=unused-argument
+        def causal_mask_mod(batch_idx, head_idx, q_idx, kv_idx):
             """
             Defines the logic of a block causal mask by combining both a standard causal mask
             and a block diagonal document mask.
@@ -198,14 +194,12 @@ def patch_flex_make_mask():
     for n in tuple(sys.modules):
         if ".modeling_" in n:
             if hasattr(sys.modules[n], "make_flex_block_causal_mask"):
-                sys.modules[n].make_flex_block_causal_mask = (
-                    patched_make_flex_block_causal_mask
-                )
-                setattr(
-                    sys.modules[n],
-                    "make_flex_block_causal_mask",
-                    patched_make_flex_block_causal_mask,
-                )
+                sys.modules[
+                    n
+                ].make_flex_block_causal_mask = patched_make_flex_block_causal_mask
+                sys.modules[
+                    n
+                ].make_flex_block_causal_mask = patched_make_flex_block_causal_mask
 
     transformers.integrations.flex_attention.make_flex_block_causal_mask = (
         patched_make_flex_block_causal_mask
diff --git a/src/axolotl/monkeypatch/attention/xformers.py b/src/axolotl/monkeypatch/attention/xformers.py
index 5901963f0..eca95797a 100644
--- a/src/axolotl/monkeypatch/attention/xformers.py
+++ b/src/axolotl/monkeypatch/attention/xformers.py
@@ -23,15 +23,15 @@ def xformers_attention_forward(
     value: torch.Tensor,
     attention_mask: Optional[torch.Tensor] = None,
     position_ids: Optional[torch.LongTensor] = None,
-    dropout: float = 0.0,  # pylint: disable=unused-argument
-    scaling: Optional[float] = None,  # pylint: disable=unused-argument
-    sliding_window: Optional[int] = None,  # pylint: disable=unused-argument
-    softcap: Optional[float] = None,  # pylint: disable=unused-argument
+    dropout: float = 0.0,
+    scaling: Optional[float] = None,
+    sliding_window: Optional[int] = None,
+    softcap: Optional[float] = None,
     cu_seq_lens_q: Optional[torch.LongTensor] = None,
     cu_seq_lens_k: Optional[torch.LongTensor] = None,
     max_length_q: Optional[int] = None,
-    max_length_k: Optional[int] = None,  # pylint: disable=unused-argument
-    **kwargs,  # pylint: disable=unused-argument
+    max_length_k: Optional[int] = None,
+    **kwargs,
 ):
     # Get dimensions
     # query: [batch, heads, seq_len, hidden_dim]
diff --git a/src/axolotl/monkeypatch/btlm_attn_hijack_flash.py b/src/axolotl/monkeypatch/btlm_attn_hijack_flash.py
index 589980c8b..2c5077392 100644
--- a/src/axolotl/monkeypatch/btlm_attn_hijack_flash.py
+++ b/src/axolotl/monkeypatch/btlm_attn_hijack_flash.py
@@ -25,9 +25,7 @@ def replace_btlm_attn_with_flash_attn(model_name="cerebras/btlm-3b-8k-base"):
         ".configuration_btlm", ".modeling_btlm"
     )
     modeling_btlm = importlib.import_module(module_name)
-    modeling_btlm.BTLMAttention._attn = (  # pylint: disable=protected-access
-        flashattn_attn
-    )
+    modeling_btlm.BTLMAttention._attn = flashattn_attn
 
 
 def flashattn_attn(
@@ -35,9 +33,9 @@ def flashattn_attn(
     query: torch.Tensor,
     key: Optional[torch.Tensor] = None,
     value: Optional[torch.Tensor] = None,
-    attention_mask: Optional[torch.Tensor] = None,  # pylint: disable=unused-argument
+    attention_mask: Optional[torch.Tensor] = None,
     head_mask: Optional[torch.Tensor] = None,
-    position_bias: Optional[torch.Tensor] = None,  # pylint: disable=unused-argument
+    position_bias: Optional[torch.Tensor] = None,
 ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
     softmax_scale = (
         1 / (key.size(-1) ** self.attn_scale_power) if self.scale_attn_weights else None
diff --git a/src/axolotl/monkeypatch/data/batch_dataset_fetcher.py b/src/axolotl/monkeypatch/data/batch_dataset_fetcher.py
index 73bf37b61..c426344a6 100644
--- a/src/axolotl/monkeypatch/data/batch_dataset_fetcher.py
+++ b/src/axolotl/monkeypatch/data/batch_dataset_fetcher.py
@@ -1,7 +1,5 @@
 """Monkey patches for the dataset fetcher to handle batches of packed indexes."""
 
-# pylint: disable=protected-access
-
 import torch
 from torch.utils.data._utils.fetch import _BaseDatasetFetcher
 from torch.utils.data._utils.worker import _worker_loop
diff --git a/src/axolotl/monkeypatch/fsdp2_qlora.py b/src/axolotl/monkeypatch/fsdp2_qlora.py
index 5a4332fff..04d0d1971 100644
--- a/src/axolotl/monkeypatch/fsdp2_qlora.py
+++ b/src/axolotl/monkeypatch/fsdp2_qlora.py
@@ -15,7 +15,6 @@ from axolotl.utils.logging import get_logger
 LOG = get_logger(__name__)
 
 
-# pylint: disable=protected-access
 def apply_init_sharded_param_patch():
     """Apply patch to FSDPParam._init_sharded_param to support Params4bit."""
     from torch.distributed.fsdp._fully_shard._fsdp_param import FSDPParam
@@ -66,14 +65,14 @@ def apply_init_sharded_param_patch():
             if item in patched_source:
                 items_to_import.append(item)
 
-        exec(  # pylint: disable=exec-used  # nosec B102
+        exec(  # nosec B102
             f"from {module_name} import ({', '.join(items_to_import)})",
             globals(),
         )
-        exec(patched_source, globals())  # pylint: disable=exec-used  # nosec B102
+        exec(patched_source, globals())  # nosec B102
 
         # Replace the method
-        FSDPParam._init_sharded_param = patched_init_sharded_param  # pylint: disable=undefined-variable  # noqa: F821
+        FSDPParam._init_sharded_param = patched_init_sharded_param
         LOG.info("Successfully applied FSDP _init_sharded_param patch")
     else:
         LOG.warning("Could not find target code for _init_sharded_param patching")
@@ -131,14 +130,14 @@ def apply_init_unsharded_param_patch():
             if item in patched_source:
                 items_to_import.append(item)
 
-        exec(  # pylint: disable=exec-used  # nosec B102
+        exec(  # nosec B102
             f"from {module_name} import ({', '.join(items_to_import)})",
             globals(),
         )
-        exec(patched_source, globals())  # pylint: disable=exec-used  # nosec B102
+        exec(patched_source, globals())  # nosec B102
 
         # Replace the method
-        FSDPParam.init_unsharded_param = patched_init_unsharded_param  # pylint: disable=undefined-variable  # noqa: F821
+        FSDPParam.init_unsharded_param = patched_init_unsharded_param
         LOG.info("Successfully applied FSDP init_unsharded_param patch")
     else:
         LOG.warning("Could not find target code for patching")
diff --git a/src/axolotl/monkeypatch/gradient_checkpointing/__init__.py b/src/axolotl/monkeypatch/gradient_checkpointing/__init__.py
index 3b090d5e5..b58bbb67c 100644
--- a/src/axolotl/monkeypatch/gradient_checkpointing/__init__.py
+++ b/src/axolotl/monkeypatch/gradient_checkpointing/__init__.py
@@ -25,9 +25,7 @@ else:
         return False
 
 
-def hf_grad_checkpoint_offload_wrapper(
-    decoder_layer, *args, use_reentrant=None
-):  # pylint: disable=unused-argument
+def hf_grad_checkpoint_offload_wrapper(decoder_layer, *args, use_reentrant=None):
     if uses_gc_layers(decoder_layer):
         return CPU_Offloaded_Gradient_Checkpointer.apply(
             decoder_layer,
@@ -44,9 +42,7 @@ def hf_grad_checkpoint_offload_wrapper(
     )
 
 
-def hf_grad_checkpoint_disk_offload_wrapper(
-    decoder_layer, *args, use_reentrant=None
-):  # pylint: disable=unused-argument
+def hf_grad_checkpoint_disk_offload_wrapper(decoder_layer, *args, use_reentrant=None):
     if uses_gc_layers(decoder_layer):
         return Disco.apply(
             decoder_layer,
diff --git a/src/axolotl/monkeypatch/gradient_checkpointing/offload_cpu.py b/src/axolotl/monkeypatch/gradient_checkpointing/offload_cpu.py
index bbcfb91e6..8d06f172d 100644
--- a/src/axolotl/monkeypatch/gradient_checkpointing/offload_cpu.py
+++ b/src/axolotl/monkeypatch/gradient_checkpointing/offload_cpu.py
@@ -35,9 +35,7 @@ else:
     torch_cuda_amp_custom_bwd = torch.amp.custom_bwd(device_type="cuda")
 
 
-class CPU_Offloaded_Gradient_Checkpointer(  # pylint: disable=invalid-name
-    torch.autograd.Function
-):
+class CPU_Offloaded_Gradient_Checkpointer(torch.autograd.Function):
     """
     Saves VRAM by smartly offloading to RAM.
     Tiny hit to performance, since we mask the movement via non blocking calls.
@@ -66,6 +64,4 @@ class CPU_Offloaded_Gradient_Checkpointer(  # pylint: disable=invalid-name
         return (
             None,
             hidden_states.grad,
-        ) + (
-            None,
-        ) * len(ctx.args)
+        ) + (None,) * len(ctx.args)
diff --git a/src/axolotl/monkeypatch/gradient_checkpointing/offload_disk.py b/src/axolotl/monkeypatch/gradient_checkpointing/offload_disk.py
index 792d3c6ef..220799fbf 100644
--- a/src/axolotl/monkeypatch/gradient_checkpointing/offload_disk.py
+++ b/src/axolotl/monkeypatch/gradient_checkpointing/offload_disk.py
@@ -62,9 +62,9 @@ class DiskOffloadManager:
 
         # Track tensor paths and their status
         self.tensor_paths: deque = deque()  # Ordered history of tensor paths (LIFO)
-        self.file_locks: Dict[str, threading.Lock] = (
-            {}
-        )  # Maps file_path -> threading.Lock()
+        self.file_locks: Dict[
+            str, threading.Lock
+        ] = {}  # Maps file_path -> threading.Lock()
         # Maps file_path -> status ("saving", "ready", "prefetching", "loaded", "deleted")
         self.file_status: Dict[str, str] = {}
 
@@ -236,7 +236,7 @@ class DiskOffloadManager:
             self.tensor_paths.append(file_path)
 
         # Acquire semaphore to limit concurrent save operations
-        self.save_semaphore.acquire()  # pylint: disable=consider-using-with
+        self.save_semaphore.acquire()
         # Queue tensor for saving in background
         self.save_queue.put((tensor.detach(), file_path))
 
diff --git a/src/axolotl/monkeypatch/llama_attn_hijack_flash.py b/src/axolotl/monkeypatch/llama_attn_hijack_flash.py
index 1316b5374..3953cb138 100644
--- a/src/axolotl/monkeypatch/llama_attn_hijack_flash.py
+++ b/src/axolotl/monkeypatch/llama_attn_hijack_flash.py
@@ -2,6 +2,7 @@
 
 # copied from https://github.com/lm-sys/FastChat/blob/main/fastchat/train/llama_flash_attn_monkey_patch.py
 
+import importlib.util
 import warnings
 from typing import Optional, Tuple
 
@@ -19,7 +20,7 @@ from axolotl.monkeypatch.utils import set_module_name
 from axolotl.utils.logging import get_logger
 
 try:
-    from flash_attn.flash_attn_interface import (  # pylint: disable=ungrouped-imports
+    from flash_attn.flash_attn_interface import (
         flash_attn_varlen_qkvpacked_func,
     )
 except ImportError:
@@ -32,12 +33,7 @@ LOG = get_logger(__name__)
 
 
 def is_xformers_available() -> bool:
-    try:
-        import xformers  # pylint: disable=unused-import  # noqa: F401
-
-        return True
-    except ImportError:
-        return False
+    return importlib.util.find_spec("xformers") is not None
 
 
 def is_xformers_swiglu_available() -> bool:
@@ -83,7 +79,7 @@ def patch_fa_llama_cross_entropy():
         num_items_in_batch: int = None,
         ignore_index: int = -100,
         **kwargs,
-    ):  # pylint: disable=unused-argument
+    ):
         reduction = "sum" if num_items_in_batch is not None else "mean"
         loss, _ = flash_attn_cross_entropy_loss(
             source, target, ignore_index=ignore_index
@@ -120,9 +116,7 @@ def replace_llama_attn_with_flash_attn(
     rms_norm: Optional[bool] = False,
     use_shifted_sparse_attn: Optional[bool] = False,
 ):
-    transformers.models.llama.modeling_llama.LlamaModel._prepare_decoder_attention_mask = (  # pylint: disable=protected-access
-        _prepare_decoder_attention_mask
-    )
+    transformers.models.llama.modeling_llama.LlamaModel._prepare_decoder_attention_mask = _prepare_decoder_attention_mask
     if use_shifted_sparse_attn:
         transformers.models.llama.modeling_llama.LlamaAttention.forward = (
             flashattn_forward_with_s2attn
@@ -145,7 +139,7 @@ def _prepare_decoder_attention_mask(
     input_shape,
     inputs_embeds,
     past_key_values_length,
-):  # pylint: disable=unused-argument
+):
     # [bsz, seq_len]
     return attention_mask
 
@@ -161,9 +155,9 @@ def flashattn_forward_with_s2attn(
     past_key_value: Optional[Tuple[torch.Tensor]] = None,
     output_attentions: bool = False,
     use_cache: bool = False,
-    padding_mask: Optional[torch.LongTensor] = None,  # pylint: disable=unused-argument
-    cu_seqlens: Optional[torch.Tensor] = None,  # pylint: disable=unused-argument
-    max_seqlen: Optional[torch.Tensor] = None,  # pylint: disable=unused-argument
+    padding_mask: Optional[torch.LongTensor] = None,
+    cu_seqlens: Optional[torch.Tensor] = None,
+    max_seqlen: Optional[torch.Tensor] = None,
 ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
     """Input shape: Batch x Time x Channel
 
@@ -176,7 +170,8 @@ def flashattn_forward_with_s2attn(
     """
     if output_attentions:
         warnings.warn(
-            "Output attentions is not supported for patched `LlamaAttention`, returning `None` instead."
+            "Output attentions is not supported for patched `LlamaAttention`, returning `None` instead.",
+            stacklevel=2,
         )
 
     bsz, q_len, _ = hidden_states.size()
@@ -198,7 +193,6 @@ def flashattn_forward_with_s2attn(
     )
     # [bsz, q_len, nh, hd]
     # [bsz, nh, q_len, hd]
-    # pylint: disable=duplicate-code
 
     cos, sin = self.rotary_emb(value_states, position_ids=position_ids)
     query_states, key_states = apply_rotary_pos_emb(
@@ -244,9 +238,7 @@ def flashattn_forward_with_s2attn(
         .permute(0, 3, 1, 2, 4, 5)
         .reshape(bsz * 2, q_len, 3, self.num_heads // 2, self.head_dim)
     )
-    x = rearrange(  # pylint: disable=invalid-name
-        qkv, "b s three h d -> b s (three h d)"
-    )
+    x = rearrange(qkv, "b s three h d -> b s (three h d)")
     x_unpad, indices, cu_q_lens, max_s = unpad_input(x, key_padding_mask)
     cu_q_len_tmp = torch.arange(
         0, max_s, group_size, device=key_padding_mask.device, dtype=cu_q_lens.dtype
diff --git a/src/axolotl/monkeypatch/llama_attn_hijack_xformers.py b/src/axolotl/monkeypatch/llama_attn_hijack_xformers.py
index 28223eee3..332242e2c 100644
--- a/src/axolotl/monkeypatch/llama_attn_hijack_xformers.py
+++ b/src/axolotl/monkeypatch/llama_attn_hijack_xformers.py
@@ -32,10 +32,9 @@ def xformers_forward(
     past_key_value: Optional[Tuple[torch.Tensor]] = None,
     output_attentions: bool = False,
     use_cache: bool = False,
-    padding_mask: Optional[torch.LongTensor] = None,  # pylint: disable=unused-argument
-    **kwargs,  # pylint: disable=unused-argument
+    padding_mask: Optional[torch.LongTensor] = None,
+    **kwargs,
 ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
-    # pylint: disable=duplicate-code
     bsz, q_len, _ = hidden_states.size()
 
     if not hasattr(self, "pretraining_tp"):
@@ -102,7 +101,8 @@ def xformers_forward(
 
     if output_attentions:
         warnings.warn(
-            "Output attentions is not supported for patched `LlamaAttention`, returning `None` instead."
+            "Output attentions is not supported for patched `LlamaAttention`, returning `None` instead.",
+            stacklevel=2,
         )
 
     #
diff --git a/src/axolotl/monkeypatch/llama_expand_mask.py b/src/axolotl/monkeypatch/llama_expand_mask.py
index 0277c212a..5cfb7818e 100644
--- a/src/axolotl/monkeypatch/llama_expand_mask.py
+++ b/src/axolotl/monkeypatch/llama_expand_mask.py
@@ -21,6 +21,4 @@ def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int]
 def hijack_expand_mask():
     import transformers
 
-    transformers.models.llama.modeling_llama._expand_mask = (  # pylint: disable=protected-access
-        _expand_mask
-    )
+    transformers.models.llama.modeling_llama._expand_mask = _expand_mask
diff --git a/src/axolotl/monkeypatch/llama_patch_multipack.py b/src/axolotl/monkeypatch/llama_patch_multipack.py
index cfd525367..8d234881f 100644
--- a/src/axolotl/monkeypatch/llama_patch_multipack.py
+++ b/src/axolotl/monkeypatch/llama_patch_multipack.py
@@ -12,15 +12,15 @@ def hijack_llama_prepare_4d_mask():
     from transformers import modeling_attn_mask_utils
     from transformers.models.llama import modeling_llama
 
-    modeling_llama._prepare_4d_causal_attention_mask_for_sdpa = (  # pylint: disable=protected-access
+    modeling_llama._prepare_4d_causal_attention_mask_for_sdpa = (
         patched_prepare_4d_causal_attention_mask_for_sdpa
     )
-    modeling_attn_mask_utils._prepare_4d_causal_attention_mask_for_sdpa = (  # pylint: disable=protected-access
+    modeling_attn_mask_utils._prepare_4d_causal_attention_mask_for_sdpa = (
         patched_prepare_4d_causal_attention_mask_for_sdpa
     )
-    modeling_llama._prepare_4d_causal_attention_mask = (  # pylint: disable=protected-access
+    modeling_llama._prepare_4d_causal_attention_mask = (
         patched_prepare_4d_causal_attention_mask
     )
-    modeling_attn_mask_utils._prepare_4d_causal_attention_mask = (  # pylint: disable=protected-access
+    modeling_attn_mask_utils._prepare_4d_causal_attention_mask = (
         patched_prepare_4d_causal_attention_mask
     )
diff --git a/src/axolotl/monkeypatch/lora_kernels.py b/src/axolotl/monkeypatch/lora_kernels.py
index be1e1f2ff..ef5174ba2 100644
--- a/src/axolotl/monkeypatch/lora_kernels.py
+++ b/src/axolotl/monkeypatch/lora_kernels.py
@@ -30,48 +30,36 @@ QKV_PATCHES = [
     query_states = self.q_proj(hidden_states).view(hidden_shape).transpose(1, 2)
     key_states = self.k_proj(hidden_states).view(hidden_shape).transpose(1, 2)
     value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2)
-""".lstrip(
-            "\n"
-        ),
+""".lstrip("\n"),
         """
     query_states, key_states, value_states = self.apply_qkv(hidden_states)
     query_states = query_states.view(hidden_shape).transpose(1, 2)
     key_states = key_states.view(hidden_shape).transpose(1, 2)
     value_states = value_states.view(hidden_shape).transpose(1, 2)
-""".lstrip(
-            "\n"
-        ),
+""".lstrip("\n"),
     ),
     (
         """
     query_states = self.q_norm(self.q_proj(hidden_states).view(hidden_shape)).transpose(1, 2)
     key_states = self.k_norm(self.k_proj(hidden_states).view(hidden_shape)).transpose(1, 2)
     value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2)
-""".lstrip(
-            "\n"
-        ),
+""".lstrip("\n"),
         """
     query_states, key_states, value_states = self.apply_qkv(hidden_states)
     query_states = self.q_norm(query_states.view(hidden_shape)).transpose(1, 2)
     key_states = self.k_norm(key_states.view(hidden_shape)).transpose(1, 2)
     value_states = value_states.view(hidden_shape).transpose(1, 2)
-""".lstrip(
-            "\n"
-        ),
+""".lstrip("\n"),
     ),
 ]
 
 ORIGINAL_O_CODE = """
     attn_output = self.o_proj(attn_output)
-""".lstrip(
-    "\n"
-)
+""".lstrip("\n")
 
 PATCHED_O_CODE = """
     attn_output = self.apply_o(attn_output)
-""".lstrip(
-    "\n"
-)
+""".lstrip("\n")
 
 SUPPORTED_ACTIVATIONS = ["silu", "gelu"]
 APPLY_FN_MAPPING = {
@@ -176,7 +164,6 @@ def get_attention_cls_from_config(cfg: DictDefault) -> Type[nn.Module]:
         ) from e
 
 
-# pylint: disable=protected-access
 def patch_self_attn_lora(cfg: DictDefault):
     """
     Given an `axolotl` config, this method patches the inferred attention class forward
@@ -203,9 +190,9 @@ def patch_self_attn_lora(cfg: DictDefault):
     attention_cls._original_forward = self_attn_forward
     self_attn_forward, _ = detab_code(self_attn_forward)
 
-    assert any(
-        qkv_options[0] in self_attn_forward for qkv_options in QKV_PATCHES
-    ), "Original QKV code not found"
+    assert any(qkv_options[0] in self_attn_forward for qkv_options in QKV_PATCHES), (
+        "Original QKV code not found"
+    )
     assert ORIGINAL_O_CODE in self_attn_forward, "Original O code not found"
 
     for qkv_orig, qkv_patched in QKV_PATCHES:
@@ -231,16 +218,14 @@ def patch_self_attn_lora(cfg: DictDefault):
         if item in self_attn_forward:
             items_to_import.append(item)
 
-    exec(  # pylint: disable=exec-used  # nosec B102
+    exec(
         f"from {module_name} import ({', '.join(items_to_import)})",
         globals(),
     )
-    exec(self_attn_forward, globals())  # pylint: disable=exec-used  # nosec B102
+    exec(self_attn_forward, globals())
 
     LOG.info(f"Patched attention class with LoRA optims: {attention_cls.__name__}")
-    attention_cls.forward = (
-        axolotl_attn_forward  # pylint: disable=undefined-variable  # noqa: F821
-    )
+    attention_cls.forward = axolotl_attn_forward
 
 
 def find_self_attn_in_layer(
@@ -277,9 +262,13 @@ def find_mlp_in_layer(
                 layer.feedforward.experts.gate_projs,
                 layer.feedforward.experts.up_projs,
                 layer.feedforward.experts.down_projs,
+                strict=False,
             ):
-                yield gate_proj, up_proj, down_proj, FakeMLP(
-                    gate_proj, up_proj, down_proj
+                yield (
+                    gate_proj,
+                    up_proj,
+                    down_proj,
+                    FakeMLP(gate_proj, up_proj, down_proj),
                 )
 
 
@@ -337,9 +326,9 @@ def apply_lora_kernel_patches(
 
     # Get active LoRA adapter config
     if hasattr(model, "active_adapters"):
-        assert (
-            len(model.active_adapters) == 1
-        ), "Axolotl currently does not support LoRA Triton kernels for multiple adapters"
+        assert len(model.active_adapters) == 1, (
+            "Axolotl currently does not support LoRA Triton kernels for multiple adapters"
+        )
         active_adapter = model.active_adapters[0]
     else:
         active_adapter = model.active_adapter
diff --git a/src/axolotl/monkeypatch/loss/chunked.py b/src/axolotl/monkeypatch/loss/chunked.py
index 0a9d0de82..26a52f898 100644
--- a/src/axolotl/monkeypatch/loss/chunked.py
+++ b/src/axolotl/monkeypatch/loss/chunked.py
@@ -25,7 +25,7 @@ class CEWithChunkedOutputLoss(torch.nn.Module):
         self,
         logits: torch.Tensor,
         labels: torch.Tensor,
-        normalize: bool = True,  # pylint: disable=unused-argument
+        normalize: bool = True,
     ) -> torch.Tensor:
         """
         Upcast logits to fp32 and compute cross entropy loss.
@@ -63,7 +63,7 @@ class CEWithChunkedOutputLoss(torch.nn.Module):
 
         # compute one chunk at a time
         total_loss = 0.0
-        for logits_chunk, labels_chunk in zip(logits, labels):
+        for logits_chunk, labels_chunk in zip(logits, labels, strict=False):
             total_loss += self.compute_cross_entropy(logits_chunk, labels_chunk)
 
         if reduction == "sum":
@@ -88,9 +88,9 @@ def get_causal_lm_loss(num_output_chunks: int = 8, ignore_index: int = -100):
         num_items_in_batch: int = None,
         ignore_index: int = -100,
         **kwargs,
-    ):  # pylint: disable=unused-argument
+    ):
         reduction = "sum" if num_items_in_batch is not None else "mean"
-        logit_chunks = [  # pylint: disable=unnecessary-comprehension
+        logit_chunks = [
             chunk for chunk in source.chunk(loss_fn_ce.num_output_chunks, dim=1)
         ]
         loss = loss_fn_ce(logit_chunks, target, reduction=reduction)
@@ -101,7 +101,7 @@ def get_causal_lm_loss(num_output_chunks: int = 8, ignore_index: int = -100):
     def for_causal_lm_chunked_loss(
         logits,
         labels,
-        vocab_size: int = None,  # pylint: disable=unused-argument
+        vocab_size: int = None,
         num_items_in_batch: Optional[int] = None,
         ignore_index: int = -100,
         shift_labels: Optional[torch.Tensor] = None,
diff --git a/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py b/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py
index e1be424a3..0994da91c 100644
--- a/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py
+++ b/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py
@@ -1,7 +1,5 @@
 """Flash attention monkey patch for mistral model"""
 
-# pylint: disable=duplicate-code
-
 from functools import partial
 
 import transformers
diff --git a/src/axolotl/monkeypatch/mixtral/__init__.py b/src/axolotl/monkeypatch/mixtral/__init__.py
index 5b8054000..b353b12cf 100644
--- a/src/axolotl/monkeypatch/mixtral/__init__.py
+++ b/src/axolotl/monkeypatch/mixtral/__init__.py
@@ -31,14 +31,12 @@ def patch_mixtral_moe_forward_zero3() -> None:
         topk_weight = topk_weight.to(hidden_states.dtype)
 
         hidden_states = hidden_states.repeat_interleave(self.top_k, dim=0)
-        y = torch.empty_like(hidden_states)  # pylint: disable=invalid-name
+        y = torch.empty_like(hidden_states)
         flat_topk_idx = topk_idx.view(-1)
         for i in range(self.num_experts):
             expert = self.experts[i]
             y[flat_topk_idx == i] = expert(hidden_states[flat_topk_idx == i])
-        y = (  # pylint: disable=invalid-name
-            y.view(*topk_weight.shape, -1) * topk_weight.unsqueeze(-1)
-        ).sum(dim=1)
+        y = (y.view(*topk_weight.shape, -1) * topk_weight.unsqueeze(-1)).sum(dim=1)
         final_hidden_states = y.reshape(batch_size, sequence_length, hidden_dim)
         return final_hidden_states, router_logits
 
diff --git a/src/axolotl/monkeypatch/models/llama4/modeling.py b/src/axolotl/monkeypatch/models/llama4/modeling.py
index 4127793e7..0fc8f5699 100644
--- a/src/axolotl/monkeypatch/models/llama4/modeling.py
+++ b/src/axolotl/monkeypatch/models/llama4/modeling.py
@@ -95,18 +95,12 @@ def patch_llama4_linearized_modeling():
 
     old_lamma_4_text_experts = modeling_llama4.Llama4TextExperts
     modeling_llama4.Llama4TextExperts = Llama4TextExperts
-    setattr(
-        sys.modules["transformers.models.llama4"],
-        "Llama4TextExperts",
-        Llama4TextExperts,
-    )
+    sys.modules["transformers.models.llama4"].Llama4TextExperts = Llama4TextExperts
 
     def unpatch():
         modeling_llama4.Llama4TextExperts = old_lamma_4_text_experts
-        setattr(
-            sys.modules["transformers.models.llama4"],
-            "Llama4TextExperts",
-            old_lamma_4_text_experts,
-        )
+        sys.modules[
+            "transformers.models.llama4"
+        ].Llama4TextExperts = old_lamma_4_text_experts
 
     return unpatch
diff --git a/src/axolotl/monkeypatch/multipack.py b/src/axolotl/monkeypatch/multipack.py
index 7df9877d7..e4f9ca2be 100644
--- a/src/axolotl/monkeypatch/multipack.py
+++ b/src/axolotl/monkeypatch/multipack.py
@@ -49,9 +49,7 @@ def patch_for_multipack(model_type, model_name=None, has_remote_code=False):
         assert hasattr(
             transformers.modeling_flash_attention_utils, "_get_unpad_data"
         ), "transformers api changed for _get_unpad_data for flash attention"
-        transformers.modeling_flash_attention_utils._get_unpad_data = (  # pylint: disable=protected-access
-            get_unpad_data
-        )
+        transformers.modeling_flash_attention_utils._get_unpad_data = get_unpad_data
 
     if model_type == "mixtral" and is_deepspeed_zero3_enabled():
         patch_mixtral_moe_forward_zero3()
@@ -67,6 +65,4 @@ def patch_remote(model_name):
     module_name = ".".join(parts)
     modeling_arch = importlib.import_module(module_name)
     if hasattr(modeling_arch, "_get_unpad_data"):
-        modeling_arch._get_unpad_data = (  # pylint: disable=protected-access
-            get_unpad_data
-        )
+        modeling_arch._get_unpad_data = get_unpad_data
diff --git a/src/axolotl/monkeypatch/peft/utils.py b/src/axolotl/monkeypatch/peft/utils.py
index 0c571fbd2..d1011f5eb 100644
--- a/src/axolotl/monkeypatch/peft/utils.py
+++ b/src/axolotl/monkeypatch/peft/utils.py
@@ -49,9 +49,7 @@ def patch_peft_prep_code():
         prep_code = get_peft_prep_code()
     except OSError:
         return
-    peft.utils.other._original_create_accelerator_and_postprocess = (  # pylint: disable=protected-access
-        prep_code
-    )
+    peft.utils.other._original_create_accelerator_and_postprocess = prep_code
     prep_code, _ = detab_code(prep_code)
     if ORIGINAL_PREPARE_CODE not in prep_code:
         return
@@ -68,11 +66,15 @@ def patch_peft_prep_code():
         if item in prep_code:
             items_to_import.append(item)
 
-    exec(  # pylint: disable=exec-used  # nosec B102
+    exec(
         "from peft.utils.other import (" + ", ".join(x for x in items_to_import) + ")",
         globals(),
     )
-    exec(prep_code, globals())  # pylint: disable=exec-used  # nosec B102
+    exec(prep_code, globals())
     LOG.info("patching prepare_model_for_kbit_training to allow for overrides")
-    peft.utils.other.prepare_model_for_kbit_training = fixed_prepare_model_for_kbit_training  # pylint: disable=protected-access  # pylint: disable=undefined-variable  # noqa: F821
-    axolotl.loaders.model.prepare_model_for_kbit_training = fixed_prepare_model_for_kbit_training  # pylint: disable=protected-access  # pylint: disable=undefined-variable  # noqa: F821
+    peft.utils.other.prepare_model_for_kbit_training = (
+        fixed_prepare_model_for_kbit_training
+    )
+    axolotl.loaders.model.prepare_model_for_kbit_training = (
+        fixed_prepare_model_for_kbit_training
+    )
diff --git a/src/axolotl/monkeypatch/relora.py b/src/axolotl/monkeypatch/relora.py
index 0028a0cf6..a01d850b3 100644
--- a/src/axolotl/monkeypatch/relora.py
+++ b/src/axolotl/monkeypatch/relora.py
@@ -91,9 +91,9 @@ class ReLoRACallback(TrainerCallback):
         if not os.path.exists(self.last_full_model):
             self.last_full_model = str(Path(snapshot_download(cfg.base_model)))
 
-        assert os.path.exists(
-            self.last_full_model
-        ), "for ReLORA base_model must be a local path"
+        assert os.path.exists(self.last_full_model), (
+            "for ReLORA base_model must be a local path"
+        )
 
         self.num_lora_restarts = 0
         self.need_full_save = False
@@ -293,7 +293,6 @@ def find_lora_modules(model: peft.LoraModel) -> Dict[str, peft.tuners.lora.LoraL
     key_list = [key for key, _ in model.model.named_modules() if "lora" not in key]
     for key in key_list:
         try:
-            # pylint: disable=protected-access
             _parent, target, _target_name = peft.utils._get_submodules(model.model, key)
         except AttributeError:
             continue
@@ -341,7 +340,7 @@ def merge_and_save(
     modules = find_lora_modules(model)
 
     if not quantized:
-        for module_name, target in modules.items():
+        for _, target in modules.items():
             active_adapter = target.active_adapter
             if isinstance(active_adapter, list):
                 active_adapter = active_adapter[0]
diff --git a/src/axolotl/monkeypatch/ring_attn/__init__.py b/src/axolotl/monkeypatch/ring_attn/__init__.py
index 736378b16..1c14776c9 100644
--- a/src/axolotl/monkeypatch/ring_attn/__init__.py
+++ b/src/axolotl/monkeypatch/ring_attn/__init__.py
@@ -1,6 +1,5 @@
 """Init for ring attention monkeypatch module"""
 
-# pylint: disable=unused-import
 # flake8: noqa
 
 from .patch import (
diff --git a/src/axolotl/monkeypatch/ring_attn/adapters/batch.py b/src/axolotl/monkeypatch/ring_attn/adapters/batch.py
index 607b4dd71..74d33ed4a 100644
--- a/src/axolotl/monkeypatch/ring_attn/adapters/batch.py
+++ b/src/axolotl/monkeypatch/ring_attn/adapters/batch.py
@@ -7,8 +7,6 @@ Our implementation closely follows the structure of that module, but we've minif
 somewhat to support only the latest versions of transformers.
 """
 
-# pylint: disable=protected-access,cyclic-import
-
 import os
 from typing import Callable
 
@@ -20,7 +18,7 @@ from ring_flash_attn import ring_flash_attn_func
 from ring_flash_attn.adapters.hf_adapter import check_params
 from transformers.modeling_flash_attention_utils import is_flash_attn_greater_or_equal
 
-try:  # pylint: disable=duplicate-code
+try:
     from transformers.modeling_flash_attention_utils import _flash_supports_window
 except ImportError:
     try:
@@ -59,7 +57,7 @@ def create_flash_attn_forward_varlen_llama3(
     """
 
     # transformers 4.48+
-    # pylint: disable=unused-argument
+
     def _flash_attention_forward(
         query_states: torch.Tensor,
         key_states: torch.Tensor,
diff --git a/src/axolotl/monkeypatch/ring_attn/patch.py b/src/axolotl/monkeypatch/ring_attn/patch.py
index ea0f9dd02..e1fd10b3a 100644
--- a/src/axolotl/monkeypatch/ring_attn/patch.py
+++ b/src/axolotl/monkeypatch/ring_attn/patch.py
@@ -15,7 +15,7 @@ import torch
 import torch.distributed as dist
 from torch.distributed import DeviceMesh
 
-try:  # pylint: disable=duplicate-code
+try:
     from transformers.modeling_flash_attention_utils import _flash_supports_window
 except ImportError:
     try:
@@ -43,7 +43,7 @@ def get_ring_attn_group() -> dist.ProcessGroup:
 
 def set_ring_attn_group(ring_attn_group: dist.ProcessGroup | None):
     """Setter for ring attention group on this rank."""
-    global RING_ATTN_GROUP  # pylint: disable=global-statement
+    global RING_ATTN_GROUP
     RING_ATTN_GROUP = ring_attn_group
 
 
@@ -57,29 +57,24 @@ def create_ring_flash_attention_forward(
         query_states: torch.Tensor,
         key_states: torch.Tensor,
         value_states: torch.Tensor,
-        attention_mask: torch.Tensor,  # pylint: disable=unused-argument
+        attention_mask: torch.Tensor,
         query_length: int,
         is_causal: bool,
         dropout: float = 0.0,
-        position_ids: Optional[torch.Tensor] = None,  # pylint: disable=unused-argument
+        position_ids: Optional[torch.Tensor] = None,
         softmax_scale: Optional[float] = None,
         sliding_window: Optional[int] = None,
         use_top_left_mask: bool = False,
         softcap: Optional[float] = None,
         deterministic: bool = None,
-        cu_seq_lens_q: Optional[
-            torch.LongTensor
-        ] = None,  # pylint: disable=unused-argument
-        cu_seq_lens_k: Optional[
-            torch.LongTensor
-        ] = None,  # pylint: disable=unused-argument
-        max_length_q: Optional[int] = None,  # pylint: disable=unused-argument
-        max_length_k: Optional[int] = None,  # pylint: disable=unused-argument
-        target_dtype: Optional[torch.dtype] = None,  # pylint: disable=unused-argument
-        attn_implementation: Optional[str] = None,  # pylint: disable=unused-argument
-        **kwargs,  # pylint: disable=unused-argument
+        cu_seq_lens_q: Optional[torch.LongTensor] = None,
+        cu_seq_lens_k: Optional[torch.LongTensor] = None,
+        max_length_q: Optional[int] = None,
+        max_length_k: Optional[int] = None,
+        target_dtype: Optional[torch.dtype] = None,
+        attn_implementation: Optional[str] = None,
+        **kwargs,
     ):
-        # pylint: disable=duplicate-code
         if not use_top_left_mask:
             causal = is_causal
         else:
@@ -101,9 +96,9 @@ def create_ring_flash_attention_forward(
         if deterministic is None:
             deterministic = os.environ.get("FLASH_ATTENTION_DETERMINISTIC", "0") == "1"
         flash_kwargs["deterministic"] = deterministic
-        assert (
-            softcap is None
-        ), "llama3_flash_attn_varlen_func does not support softcap yet."
+        assert softcap is None, (
+            "llama3_flash_attn_varlen_func does not support softcap yet."
+        )
         # flash_kwargs["softcap"] = softcap
         flash_kwargs["group"] = process_group
 
@@ -193,7 +188,7 @@ def register_ring_attn_from_device_mesh(
         # fmt: off
         import ring_flash_attn.adapters.hf_adapter
 
-        from ring_flash_attn.adapters.hf_adapter import (  # isort: skip  # pylint: disable=unused-import
+        from ring_flash_attn.adapters.hf_adapter import (  # isort: skip
             create_ring_flash_attention_forward as create_ring_flash_attention_forward_orig,
         )
 
diff --git a/src/axolotl/monkeypatch/stablelm_attn_hijack_flash.py b/src/axolotl/monkeypatch/stablelm_attn_hijack_flash.py
index 85454fe2e..0fa6d6424 100644
--- a/src/axolotl/monkeypatch/stablelm_attn_hijack_flash.py
+++ b/src/axolotl/monkeypatch/stablelm_attn_hijack_flash.py
@@ -16,8 +16,8 @@
 # This code is based off the following work:
 # https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
 # https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt_neox/modeling_gpt_neox.py
-# pylint: disable=duplicate-code
 """PyTorch StableLM Epoch model."""
+
 import importlib
 import math
 from typing import Optional, Tuple, Union
@@ -26,7 +26,7 @@ import torch
 import torch.utils.checkpoint
 from accelerate import init_empty_weights
 from einops import rearrange
-from flash_attn.flash_attn_interface import (  # pylint: disable=ungrouped-imports
+from flash_attn.flash_attn_interface import (
     flash_attn_varlen_qkvpacked_func,
 )
 from torch import nn
@@ -49,27 +49,21 @@ def replace_stablelm_attn_with_flash_attn(model_name="stabilityai/stablelm-3b-4e
         ".configuration_stablelm_epoch", ".modeling_stablelm_epoch"
     )
     modeling_stablelm = importlib.import_module(module_name)
-    modeling_stablelm.Attention.forward = (  # pylint: disable=protected-access
-        flashattn_attn
-    )
-    modeling_stablelm.StableLMEpochModel.forward = (  # pylint: disable=protected-access
-        stablelm_model_forward
-    )
-    modeling_stablelm.DecoderLayer.forward = (  # pylint: disable=protected-access
-        decoder_layer_forward
-    )
+    modeling_stablelm.Attention.forward = flashattn_attn
+    modeling_stablelm.StableLMEpochModel.forward = stablelm_model_forward
+    modeling_stablelm.DecoderLayer.forward = decoder_layer_forward
 
 
 def rotate_half(x: torch.Tensor):
     """Rotates half the hidden dims of the input."""
-    # pylint: disable=invalid-name
+
     x1, x2 = torch.chunk(x, 2, dim=-1)
     return torch.cat((-x2, x1), dim=-1)
 
 
 def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
     # The first two dimensions of cos and sin are always 1, so we can `squeeze` them.
-    # pylint: disable=invalid-name
+
     cos = cos.squeeze(1).squeeze(0)  # [seq_len, dim]
     sin = sin.squeeze(1).squeeze(0)  # [seq_len, dim]
     cos = cos[position_ids].unsqueeze(1)  # [batch_size, 1, seq_len, dim]
@@ -99,7 +93,7 @@ def flashattn_attn(
     attention_mask: torch.FloatTensor,
     position_ids: torch.LongTensor,
     past_key_value: Optional[Tuple[torch.Tensor]] = None,
-    output_attentions: Optional[bool] = False,  # pylint: disable=unused-argument
+    output_attentions: Optional[bool] = False,
     use_cache: Optional[bool] = False,
     cu_seqlens: Optional[torch.Tensor] = None,
     max_seqlen: Optional[torch.Tensor] = None,
@@ -216,7 +210,6 @@ def decoder_layer_forward(
 ) -> Union[
     Tuple[torch.Tensor], Optional[Tuple[torch.Tensor, Tuple[torch.FloatTensor, ...]]]
 ]:
-    # pylint: disable=duplicate-code
     residual = hidden_states
 
     hidden_states = self.input_layernorm(hidden_states)
@@ -263,7 +256,6 @@ def stablelm_model_forward(
     output_hidden_states: Optional[bool] = None,
     return_dict: Optional[bool] = None,
 ) -> Union[Tuple, BaseModelOutputWithPast]:
-    # pylint: disable=duplicate-code
     output_attentions = (
         output_attentions
         if output_attentions is not None
@@ -326,13 +318,11 @@ def stablelm_model_forward(
             dtype=torch.bool,
             device=inputs_embeds.device,
         )
-    attention_mask = (
-        self._prepare_decoder_attention_mask(  # pylint: disable=protected-access
-            attention_mask,
-            (batch_size, seq_length),
-            inputs_embeds,
-            past_key_values_length,
-        )
+    attention_mask = self._prepare_decoder_attention_mask(
+        attention_mask,
+        (batch_size, seq_length),
+        inputs_embeds,
+        past_key_values_length,
     )
 
     hidden_states = inputs_embeds
diff --git a/src/axolotl/monkeypatch/tiled_mlp/patch.py b/src/axolotl/monkeypatch/tiled_mlp/patch.py
index 419c73104..7cdc6d3a3 100644
--- a/src/axolotl/monkeypatch/tiled_mlp/patch.py
+++ b/src/axolotl/monkeypatch/tiled_mlp/patch.py
@@ -40,7 +40,6 @@ def patch_tiled_mlp(model_type, use_original_mlp=True, cfg_num_shards=None):
         is_distributed = int(os.environ.get("WORLD_SIZE", 1)) > 1
 
         def tiled_mlp_forward(self, x):
-            # pylint: disable=protected-access
             input_shape = x.shape
             seqlen = input_shape[-2]
             hidden = input_shape[-1]
@@ -79,14 +78,13 @@ def patch_tiled_mlp(model_type, use_original_mlp=True, cfg_num_shards=None):
             return down_res
 
         mlp_cls.forward = tiled_mlp_forward
-        mlp_cls._compute_params = []  # pylint: disable=protected-access
-        mlp_cls._tiled_mlp_dist_impl = None  # pylint: disable=protected-access
+        mlp_cls._compute_params = []
+        mlp_cls._tiled_mlp_dist_impl = None
         LOG.info(
             f"Successfully monkey-patched TiledMLP for model_type: {model_type}",
             main_process_only=True,
         )
     except (ImportError, AttributeError) as e:
         raise RuntimeError(
-            f"Could not import MLP class for model_type: {model_type}. "
-            f"Error: {str(e)}"
+            f"Could not import MLP class for model_type: {model_type}. Error: {str(e)}"
         ) from e
diff --git a/src/axolotl/monkeypatch/trainer/lr.py b/src/axolotl/monkeypatch/trainer/lr.py
index 9afc23c46..c33674cee 100644
--- a/src/axolotl/monkeypatch/trainer/lr.py
+++ b/src/axolotl/monkeypatch/trainer/lr.py
@@ -39,4 +39,4 @@ def _get_learning_rate(self):
 def patch_trainer_get_lr():
     from transformers.trainer import Trainer
 
-    Trainer._get_learning_rate = _get_learning_rate  # pylint: disable=protected-access
+    Trainer._get_learning_rate = _get_learning_rate
diff --git a/src/axolotl/monkeypatch/trainer_accelerator_args.py b/src/axolotl/monkeypatch/trainer_accelerator_args.py
index 819a66255..9fc6e38c6 100644
--- a/src/axolotl/monkeypatch/trainer_accelerator_args.py
+++ b/src/axolotl/monkeypatch/trainer_accelerator_args.py
@@ -47,9 +47,7 @@ def patch_create_accelerate_code_for_fp8(enable_fsdp_float8_all_gather: bool):
         create_code = get_create_accelerate_code()
     except OSError:
         return
-    Trainer._original_create_accelerator_and_postprocess = (  # pylint: disable=protected-access
-        create_code
-    )
+    Trainer._original_create_accelerator_and_postprocess = create_code
     create_code, _ = detab_code(create_code)
     if ORIGINAL_TRAINER_CODE not in create_code:
         return
@@ -72,12 +70,14 @@ def patch_create_accelerate_code_for_fp8(enable_fsdp_float8_all_gather: bool):
         if item in create_code:
             items_to_import.append(item)
 
-    exec(  # pylint: disable=exec-used  # nosec B102
+    exec(
         "from transformers.trainer import ("
         + ", ".join(x for x in items_to_import)
         + ")",
         globals(),
     )
-    exec(create_code, globals())  # pylint: disable=exec-used  # nosec B102
+    exec(create_code, globals())
     LOG.info("patching create_accelerator_and_postprocess to allow for overrides")
-    Trainer.create_accelerator_and_postprocess = fixed_create_accelerator_and_postprocess  # pylint: disable=protected-access  # pylint: disable=undefined-variable  # noqa: F821
+    Trainer.create_accelerator_and_postprocess = (
+        fixed_create_accelerator_and_postprocess
+    )
diff --git a/src/axolotl/monkeypatch/trainer_fsdp_optim.py b/src/axolotl/monkeypatch/trainer_fsdp_optim.py
index 1c2511524..692f754d7 100644
--- a/src/axolotl/monkeypatch/trainer_fsdp_optim.py
+++ b/src/axolotl/monkeypatch/trainer_fsdp_optim.py
@@ -23,9 +23,7 @@ PATCHED_TRAINER_CODE = """
 
 
 def get_training_loop_code() -> str:
-    training_loop = inspect.getsource(
-        Trainer._inner_training_loop  # pylint: disable=protected-access
-    )
+    training_loop = inspect.getsource(Trainer._inner_training_loop)
     return training_loop
 
 
@@ -44,9 +42,7 @@ def patch_training_loop_for_fsdp():
         training_loop = get_training_loop_code()
     except OSError:
         return
-    Trainer._original_inner_training_loop = (  # pylint: disable=protected-access
-        training_loop
-    )
+    Trainer._original_inner_training_loop = training_loop
     training_loop, _ = detab_code(training_loop)
     if ORIGINAL_TRAINER_CODE not in training_loop:
         return
@@ -66,14 +62,12 @@ def patch_training_loop_for_fsdp():
         if item in training_loop:
             items_to_import.append(item)
 
-    exec(  # pylint: disable=exec-used  # nosec B102
+    exec(
         "from transformers.trainer import ("
         + ", ".join(x for x in items_to_import)
         + ")",
         globals(),
     )
-    exec(training_loop, globals())  # pylint: disable=exec-used  # nosec B102
+    exec(training_loop, globals())
     LOG.info("patching _inner_training_loop for fsdp optimizer save")
-    Trainer._inner_training_loop = (  # pylint: disable=protected-access
-        _fixed_inner_training_loop  # pylint: disable=undefined-variable  # noqa: F821
-    )
+    Trainer._inner_training_loop = _fixed_inner_training_loop
diff --git a/src/axolotl/monkeypatch/transformers/trainer_loss_calc.py b/src/axolotl/monkeypatch/transformers/trainer_loss_calc.py
index 75f4158b3..012c699fa 100644
--- a/src/axolotl/monkeypatch/transformers/trainer_loss_calc.py
+++ b/src/axolotl/monkeypatch/transformers/trainer_loss_calc.py
@@ -52,7 +52,6 @@ def check_evaluation_loop_is_fsdp2_patchable() -> bool:
     return ORIGINAL_FSDP2_CODE in evaluation_loop_source
 
 
-# pylint: disable=protected-access
 def patch_evaluation_loop(patch_fsdp2: bool):
     """Patch the evaluation_loop method."""
     # Check if already patched
@@ -101,16 +100,14 @@ def patch_evaluation_loop(patch_fsdp2: bool):
             items_to_import.append(item)
 
     # Execute the imports and patched method
-    exec(  # pylint: disable=exec-used  # nosec B102
+    exec(
         f"from {module_name} import ({', '.join(items_to_import)})",
         globals(),
     )
-    exec(evaluation_loop_source, globals())  # pylint: disable=exec-used  # nosec B102
+    exec(evaluation_loop_source, globals())
 
     LOG.info("Patched Trainer.evaluation_loop with nanmean loss calculation")
-    Trainer.evaluation_loop = (
-        axolotl_evaluation_loop  # pylint: disable=undefined-variable  # noqa: F821
-    )
+    Trainer.evaluation_loop = axolotl_evaluation_loop
 
 
 def check_maybe_log_save_evaluate_is_patchable() -> bool:
@@ -118,7 +115,6 @@ def check_maybe_log_save_evaluate_is_patchable() -> bool:
     return ORIGINAL_MAYBE_CODE in maybe_log_source
 
 
-# pylint: disable=protected-access
 def patch_maybe_log_save_evaluate():
     """Patch the _maybe_log_save_evaluate method."""
     # Check if already patched
@@ -155,11 +151,11 @@ def patch_maybe_log_save_evaluate():
             items_to_import.append(item)
 
     # Execute the imports and patched method
-    exec(  # pylint: disable=exec-used  # nosec B102
+    exec(
         f"from {module_name} import ({', '.join(items_to_import)})",
         globals(),
     )
-    exec(maybe_log_source, globals())  # pylint: disable=exec-used  # nosec B102
+    exec(maybe_log_source, globals())
 
     LOG.info("Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation")
-    Trainer._maybe_log_save_evaluate = axolotl_maybe_log_save_evaluate  # pylint: disable=undefined-variable  # noqa: F821
+    Trainer._maybe_log_save_evaluate = axolotl_maybe_log_save_evaluate
diff --git a/src/axolotl/monkeypatch/unsloth_.py b/src/axolotl/monkeypatch/unsloth_.py
index 146047e95..59f32c6f5 100644
--- a/src/axolotl/monkeypatch/unsloth_.py
+++ b/src/axolotl/monkeypatch/unsloth_.py
@@ -17,27 +17,19 @@ ORIGINAL_QKV_CODE = """
     query_states = self.q_proj(hidden_states)
     key_states = self.k_proj(hidden_states)
     value_states = self.v_proj(hidden_states)
-""".lstrip(
-    "\n"
-)
+""".lstrip("\n")
 
 PATCHED_QKV_CODE = """
     query_states, key_states, value_states = self.apply_qkv(self, hidden_states)
-""".lstrip(
-    "\n"
-)
+""".lstrip("\n")
 
 ORIGINAL_O_CODE = """
     attn_output = self.o_proj(attn_output)
-""".lstrip(
-    "\n"
-)
+""".lstrip("\n")
 
 PATCHED_O_CODE = """
     attn_output = self.apply_o(self, attn_output)
-""".lstrip(
-    "\n"
-)
+""".lstrip("\n")
 
 
 def original_apply_qkv(self, hidden_states):
@@ -66,13 +58,13 @@ def check_self_attn_is_patchable() -> bool:
 def integrate_cross_entropy_loss_patch(model_type: str = "llama") -> None:
     from unsloth.kernels.cross_entropy_loss import fast_cross_entropy_loss
 
-    def UnslothForCausalLMLoss(  # pylint: disable=invalid-name
+    def UnslothForCausalLMLoss(
         logits,
         labels,
-        vocab_size: int,  # pylint: disable=unused-argument
+        vocab_size: int,
         num_items_in_batch: int = None,
-        ignore_index: int = -100,  # pylint: disable=unused-argument
-        **kwargs,  # pylint: disable=unused-argument
+        ignore_index: int = -100,
+        **kwargs,
     ):
         # Upcast to float if we need to compute the loss to avoid potential precision issues
         logits = logits.float()
@@ -93,18 +85,16 @@ def integrate_cross_entropy_loss_patch(model_type: str = "llama") -> None:
         raise ValueError("Unsupported model type")
 
 
-self_attn_lora_patched = False  # pylint: disable=invalid-name
+self_attn_lora_patched = False
 
 
 def patch_self_attn_lora():
-    global self_attn_lora_patched  # pylint: disable=global-statement
+    global self_attn_lora_patched
     if self_attn_lora_patched:
         # prevent patching multiple times
         return
     self_attn_forward = get_self_attn_code()
-    LlamaFlashAttention2._original_forward = (  # pylint: disable=protected-access
-        self_attn_forward
-    )
+    LlamaFlashAttention2._original_forward = self_attn_forward
     self_attn_forward, _ = detab_code(self_attn_forward)
     assert ORIGINAL_QKV_CODE in self_attn_forward, "Original qkv code not found"
     assert ORIGINAL_O_CODE in self_attn_forward, "Original o code not found"
@@ -125,27 +115,25 @@ def patch_self_attn_lora():
         if item in self_attn_forward:
             items_to_import.append(item)
 
-    exec(  # pylint: disable=exec-used  # nosec B102
+    exec(
         "from transformers.models.llama.modeling_llama import ("
         + ", ".join(x for x in items_to_import)
         + ")",
         globals(),
     )
-    exec(self_attn_forward, globals())  # pylint: disable=exec-used  # nosec B102
+    exec(self_attn_forward, globals())
     self_attn_lora_patched = True
     LOG.info("patching unsloth attn lora")
-    LlamaFlashAttention2.forward = (
-        unsloth_attn_forward  # pylint: disable=undefined-variable  # noqa: F821
-    )
+    LlamaFlashAttention2.forward = unsloth_attn_forward
 
 
 def integrate_rope_embeddings():
     import transformers.models.llama.modeling_llama
     from unsloth.kernels.rope_embedding import fast_rope_embedding
 
-    def apply_rotary_pos_emb(  # pylint: disable=unused-argument
-        q,  # pylint: disable=invalid-name
-        k,  # pylint: disable=invalid-name
+    def apply_rotary_pos_emb(
+        q,
+        k,
         cos,
         sin,
         position_ids=None,
diff --git a/src/axolotl/monkeypatch/xformers_/__init__.py b/src/axolotl/monkeypatch/xformers_/__init__.py
index a052ea49e..6f5b43f77 100644
--- a/src/axolotl/monkeypatch/xformers_/__init__.py
+++ b/src/axolotl/monkeypatch/xformers_/__init__.py
@@ -36,7 +36,7 @@ class FusedMLP(torch.nn.Module):
         self.swiglu.w3.weight.data = down_proj.weight.data
 
     def _post_training(self, model, name):
-        w1, w2 = torch.split(  # pylint: disable=invalid-name
+        w1, w2 = torch.split(
             self.swiglu.w12.weight.data, self.config.intermediate_size, dim=0
         )
 
@@ -48,5 +48,5 @@ class FusedMLP(torch.nn.Module):
 
         set_module_name(model, name, new_mlp)
 
-    def forward(self, x: torch.Tensor) -> torch.Tensor:  # pylint: disable=invalid-name
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         return self.swiglu(x)
diff --git a/src/axolotl/processing_strategies.py b/src/axolotl/processing_strategies.py
index 31597d5a6..4b06eb4c8 100644
--- a/src/axolotl/processing_strategies.py
+++ b/src/axolotl/processing_strategies.py
@@ -156,9 +156,9 @@ class ProcessingStrategy:
                 image_value = load_image(image_value)
 
                 if self.image_size is not None:
-                    assert hasattr(
-                        image_value, "resize"
-                    ), "Image does not have a resize method"
+                    assert hasattr(image_value, "resize"), (
+                        "Image does not have a resize method"
+                    )
 
                     if isinstance(self.image_size, tuple):
                         image_value = image_value.resize(
diff --git a/src/axolotl/prompt_strategies/__init__.py b/src/axolotl/prompt_strategies/__init__.py
index cf936481e..d9936b9ae 100644
--- a/src/axolotl/prompt_strategies/__init__.py
+++ b/src/axolotl/prompt_strategies/__init__.py
@@ -48,6 +48,6 @@ def load(strategy, tokenizer, cfg, ds_cfg, processor=None):
         return func(tokenizer, cfg, **load_kwargs)
     except ModuleNotFoundError:
         return None
-    except Exception as exc:  # pylint: disable=broad-exception-caught
+    except Exception as exc:
         LOG.error(f"Failed to load prompt strategy `{strategy}`: {str(exc)}")
         raise exc
diff --git a/src/axolotl/prompt_strategies/alpaca_chat.py b/src/axolotl/prompt_strategies/alpaca_chat.py
index 975fee889..391ba6072 100644
--- a/src/axolotl/prompt_strategies/alpaca_chat.py
+++ b/src/axolotl/prompt_strategies/alpaca_chat.py
@@ -39,7 +39,7 @@ class AlpacaChatPrompter(AlpacaPrompter):
     system_prompt = "Below is an instruction from a USER that describes a task, paired with an input that provides further context. The ASSISTANT writes a response that concisely and appropriately completes the request.\n\n"
     system_no_input_prompt = "Below is an instruction from a USER that describes a task. The ASSISTANT writes a response that appropriately and concisely completes the request.\n\n"
 
-    def __init__(self):  # pylint: disable=super-init-not-called
+    def __init__(self):
         self.prompt_style = PromptStyle.CHAT.value
         self.match_prompt_style()
 
@@ -54,7 +54,7 @@ class NoSystemPrompter(AlpacaPrompter):
     turn_format = "{instruction} {input} "
     turn_no_input_format = "{instruction} "
 
-    def __init__(self):  # pylint: disable=super-init-not-called
+    def __init__(self):
         pass
 
 
diff --git a/src/axolotl/prompt_strategies/alpaca_w_system.py b/src/axolotl/prompt_strategies/alpaca_w_system.py
index 6873c8e08..808ba517e 100644
--- a/src/axolotl/prompt_strategies/alpaca_w_system.py
+++ b/src/axolotl/prompt_strategies/alpaca_w_system.py
@@ -22,10 +22,9 @@ class InstructionWSystemPromptTokenizingStrategy(PromptTokenizingStrategy):
         )
 
     def tokenize_prompt(self, prompt):
-        # pylint: disable=duplicate-code
         (
             instruction,
-            input,  # pylint: disable=redefined-builtin
+            input,
             response,
             system,
         ) = self.parse_instruction_fields(prompt)
@@ -64,7 +63,7 @@ class SystemDataPrompter(AlpacaPrompter):
         self,
         system: str,
         instruction: str,
-        input: Union[None, str] = None,  # pylint: disable=redefined-builtin
+        input: Union[None, str] = None,
         output: Union[None, str] = None,
     ) -> Generator[str, None, None]:
         # returns the full prompt from instruction and optional input
@@ -93,7 +92,6 @@ class OpenOrcaSystemDataPrompter(SystemDataPrompter):
     """
 
     def match_prompt_style(self):
-        # pylint: disable=duplicate-code
         if self.prompt_style == PromptStyle.INSTRUCT.value:
             self.turn_format = "### Human:\n{instruction}\n### Additional Context:\n{input}\n### Assistant:\n"
             self.turn_no_input_format = "### Human:\n{instruction}\n### Assistant:\n"
diff --git a/src/axolotl/prompt_strategies/base.py b/src/axolotl/prompt_strategies/base.py
index 370a51a95..45a3ffda9 100644
--- a/src/axolotl/prompt_strategies/base.py
+++ b/src/axolotl/prompt_strategies/base.py
@@ -29,6 +29,6 @@ def load(strategy, cfg, module_base=None, **kwargs):
         mod = importlib.import_module(strategy, module_base)
         func = getattr(mod, load_fn)
         return func(cfg, **kwargs)
-    except Exception:  # pylint: disable=broad-exception-caught
+    except Exception:
         LOG.warning(f"unable to load strategy {strategy}")
         return None
diff --git a/src/axolotl/prompt_strategies/bradley_terry/__init__.py b/src/axolotl/prompt_strategies/bradley_terry/__init__.py
index 7530aee19..7336edc71 100644
--- a/src/axolotl/prompt_strategies/bradley_terry/__init__.py
+++ b/src/axolotl/prompt_strategies/bradley_terry/__init__.py
@@ -10,7 +10,6 @@ LOG = get_logger(__name__)
 
 
 def load(strategy, tokenizer, cfg, ds_cfg):
-    # pylint: disable=duplicate-code
     try:
         load_fn = "load"
         if strategy.split(".")[-1].startswith("load_"):
@@ -30,6 +29,6 @@ def load(strategy, tokenizer, cfg, ds_cfg):
         return func(tokenizer, cfg, **load_kwargs)
     except ModuleNotFoundError:
         return None
-    except Exception as exc:  # pylint: disable=broad-exception-caught
+    except Exception as exc:
         LOG.error(f"Failed to load prompt strategy `{strategy}`: {str(exc)}")
         return None
diff --git a/src/axolotl/prompt_strategies/bradley_terry/chat_template.py b/src/axolotl/prompt_strategies/bradley_terry/chat_template.py
index e655f85a1..fd0d76f51 100644
--- a/src/axolotl/prompt_strategies/bradley_terry/chat_template.py
+++ b/src/axolotl/prompt_strategies/bradley_terry/chat_template.py
@@ -34,7 +34,6 @@ class BTChatTemplateStrategy(ChatTemplateStrategy):
 
         max_length = self.prompter.max_length
 
-        # pylint: disable=duplicate-code
         prompt["messages"] = []
         if prompt["system"]:
             prompt["messages"].append({"role": "system", "content": prompt["system"]})
@@ -52,7 +51,6 @@ class BTChatTemplateStrategy(ChatTemplateStrategy):
                 :max_length
             ]
 
-        # pylint: disable=duplicate-code
         prompt["messages"] = []
         if prompt["system"]:
             prompt["messages"].append({"role": "system", "content": prompt["system"]})
diff --git a/src/axolotl/prompt_strategies/bradley_terry/llama3.py b/src/axolotl/prompt_strategies/bradley_terry/llama3.py
index 1d586fd5f..5548d882e 100644
--- a/src/axolotl/prompt_strategies/bradley_terry/llama3.py
+++ b/src/axolotl/prompt_strategies/bradley_terry/llama3.py
@@ -6,7 +6,7 @@ chatml transforms for datasets with system, input, chosen, rejected to match lla
 def icr(
     cfg,
     **kwargs,
-):  # pylint: disable=possibly-unused-variable,unused-argument
+):
     """
     chatml transforms for datasets with system, input, chosen, rejected
     ex. https://huggingface.co/datasets/argilla/distilabel-intel-orca-dpo-pairs
diff --git a/src/axolotl/prompt_strategies/chat_template.py b/src/axolotl/prompt_strategies/chat_template.py
index f927b7fcb..cb3e3dfb1 100644
--- a/src/axolotl/prompt_strategies/chat_template.py
+++ b/src/axolotl/prompt_strategies/chat_template.py
@@ -2,8 +2,6 @@
 HF Chat Templates prompt strategy
 """
 
-# pylint: disable=too-many-lines
-
 from collections import defaultdict
 from typing import TYPE_CHECKING, Any, Dict, List, Set, Union
 
@@ -402,9 +400,9 @@ class ChatTemplateStrategy(PromptTokenizingStrategy):
         feature_names = list(prompt.keys())
 
         # Process each prompt individually
-        for row in zip(*prompt.values()):
+        for row in zip(*prompt.values(), strict=False):
             tokenized_prompt = self._tokenize_single_prompt(
-                dict(zip(feature_names, row))
+                dict(zip(feature_names, row, strict=False))
             )
             for key, val in tokenized_prompt.items():
                 res[key].append(val)
@@ -431,9 +429,7 @@ class ChatTemplateStrategy(PromptTokenizingStrategy):
                 add_generation_prompt=True,
                 images=images,
             )
-            tokenized_res = self.prompter.build_prompt(
-                turns, images=images
-            )  # type: ignore
+            tokenized_res = self.prompter.build_prompt(turns, images=images)  # type: ignore
             tokenized_prompt = {}
             if isinstance(tokenized_res, list):
                 input_ids = prompt_ids + tokenized_res[len(prompt_ids) :]
@@ -613,7 +609,6 @@ class ChatTemplateStrategy(PromptTokenizingStrategy):
         """
         Locate the starting and ending indices of the specified turn in a conversation.
         """
-        # pylint: disable=too-many-return-statements
 
         if turn_idx >= len(turns):
             raise ValueError(f"Turn index {turn_idx} out of range")
@@ -850,7 +845,7 @@ class MistralStrategy(ChatTemplateStrategy):
         split_thinking: bool | None = False,
     ):
         # Call the parent's parent __init__ (PromptTokenizingStrategy) to skip ChatTemplateStrategy's validation
-        # pylint: disable=non-parent-init-called,super-init-not-called
+
         PromptTokenizingStrategy.__init__(
             self, prompter, tokenizer, train_on_inputs, sequence_len
         )
diff --git a/src/axolotl/prompt_strategies/completion.py b/src/axolotl/prompt_strategies/completion.py
index 62a4b90b2..f43f25793 100644
--- a/src/axolotl/prompt_strategies/completion.py
+++ b/src/axolotl/prompt_strategies/completion.py
@@ -42,8 +42,8 @@ class CompletionPromptTokenizingStrategy(InstructionPromptTokenizingStrategy):
     def tokenize_prompt(self, prompt):
         res = defaultdict(lambda: [])
         feature_names = list(prompt.keys())
-        for row in zip(*prompt.values()):
-            prompt_row = dict(zip(feature_names, row))
+        for row in zip(*prompt.values(), strict=False):
+            prompt_row = dict(zip(feature_names, row, strict=False))
             (
                 instruction,
                 _,
@@ -59,9 +59,7 @@ class CompletionPromptTokenizingStrategy(InstructionPromptTokenizingStrategy):
 
         return dict(res)
 
-    def _build_full_prompt(
-        self, instruction, input, response
-    ):  # pylint: disable=redefined-builtin
+    def _build_full_prompt(self, instruction, input, response):
         return next(iter(self.prompter.build_prompt(instruction, input, response)))
 
 
@@ -73,8 +71,8 @@ class CompletionPrompter:
     def build_prompt(
         self,
         instruction: str,
-        input=None,  # pylint: disable=redefined-builtin, unused-argument
-        output=None,  # pylint: disable=unused-argument
+        input=None,
+        output=None,
     ) -> Generator[str, None, None]:
         yield instruction
 
diff --git a/src/axolotl/prompt_strategies/context_qa.py b/src/axolotl/prompt_strategies/context_qa.py
index aac44e0b2..09e96d26e 100644
--- a/src/axolotl/prompt_strategies/context_qa.py
+++ b/src/axolotl/prompt_strategies/context_qa.py
@@ -86,7 +86,6 @@ class ContextV2Prompter(AlpacaPrompter):
     system_no_input_prompt = ""
 
     def match_prompt_style(self):
-        # pylint: disable=duplicate-code
         self.turn_format = "{instruction}\n{input}"
         self.turn_no_input_format = "{instruction}"
         self.system_format = "{system}"
diff --git a/src/axolotl/prompt_strategies/creative_acr.py b/src/axolotl/prompt_strategies/creative_acr.py
index ea67034b3..3e016e30e 100644
--- a/src/axolotl/prompt_strategies/creative_acr.py
+++ b/src/axolotl/prompt_strategies/creative_acr.py
@@ -134,9 +134,7 @@ class CreativePrompterBase:
     def build_prompt(
         self,
         instruction: str,
-        input: Union[  # pylint: disable=redefined-builtin, unused-argument
-            None, str
-        ] = None,
+        input: Union[None, str] = None,
         output: Union[None, str] = None,
     ) -> Generator[str, None, None]:
         if self.system_prompt:
diff --git a/src/axolotl/prompt_strategies/dpo/chat_template.py b/src/axolotl/prompt_strategies/dpo/chat_template.py
index 786770885..85c4d2182 100644
--- a/src/axolotl/prompt_strategies/dpo/chat_template.py
+++ b/src/axolotl/prompt_strategies/dpo/chat_template.py
@@ -6,9 +6,7 @@ from axolotl.utils.chat_templates import extract_chat_template_args, get_chat_te
 from axolotl.utils.schemas.utils import handle_legacy_message_fields_logic
 
 
-def default(
-    cfg, dataset_idx=0, **kwargs
-):  # pylint: disable=possibly-unused-variable,unused-argument
+def default(cfg, dataset_idx=0, **kwargs):
     ds_cfg = cfg["datasets"][dataset_idx]
     ds_cfg = handle_legacy_message_fields_logic(ds_cfg)
 
diff --git a/src/axolotl/prompt_strategies/dpo/chatml.py b/src/axolotl/prompt_strategies/dpo/chatml.py
index 34a54aaa0..8614708eb 100644
--- a/src/axolotl/prompt_strategies/dpo/chatml.py
+++ b/src/axolotl/prompt_strategies/dpo/chatml.py
@@ -6,7 +6,7 @@ DPO strategies for chatml
 def default(
     cfg,
     **kwargs,
-):  # pylint: disable=possibly-unused-variable,unused-argument
+):
     def transform_fn(sample):
         if "prompt" in sample.keys():
             prompt_key = "prompt"
@@ -46,7 +46,7 @@ def default(
 def argilla_chat(
     cfg,
     **kwargs,
-):  # pylint: disable=possibly-unused-variable,unused-argument
+):
     """
     for argilla/dpo-mix-7k conversations
     """
@@ -65,7 +65,7 @@ def argilla_chat(
 def icr(
     cfg,
     **kwargs,
-):  # pylint: disable=possibly-unused-variable,unused-argument
+):
     """
     chatml transforms for datasets with system, input, chosen, rejected
     ex. https://huggingface.co/datasets/argilla/distilabel-intel-orca-dpo-pairs
@@ -88,7 +88,7 @@ def icr(
     return transform_fn
 
 
-def intel(cfg, **kwargs):  # pylint: disable=possibly-unused-variable,unused-argument
+def intel(cfg, **kwargs):
     """
     For Intel Orca DPO Pairs
     """
@@ -110,9 +110,7 @@ def intel(cfg, **kwargs):  # pylint: disable=possibly-unused-variable,unused-arg
     return transform_fn
 
 
-def prompt_pairs(
-    cfg, **kwargs
-):  # pylint: disable=possibly-unused-variable,unused-argument
+def prompt_pairs(cfg, **kwargs):
     def transform_fn(sample):
         if "system" in sample and sample["system"]:
             sample["prompt"] = (
@@ -130,7 +128,7 @@ def prompt_pairs(
     return transform_fn
 
 
-def ultra(cfg, **kwargs):  # pylint: disable=possibly-unused-variable,unused-argument
+def ultra(cfg, **kwargs):
     """
     for ultrafeedback binarized conversations
     """
diff --git a/src/axolotl/prompt_strategies/dpo/llama3.py b/src/axolotl/prompt_strategies/dpo/llama3.py
index eed420017..c13ff55e4 100644
--- a/src/axolotl/prompt_strategies/dpo/llama3.py
+++ b/src/axolotl/prompt_strategies/dpo/llama3.py
@@ -6,9 +6,8 @@ DPO strategies for llama-3 chat template
 def default(
     cfg,
     **kwargs,
-):  # pylint: disable=possibly-unused-variable,unused-argument
+):
     def transform_fn(sample):
-        # pylint: disable=duplicate-code
         if "prompt" in sample.keys():
             prompt_key = "prompt"
         elif "input" in sample.keys():
@@ -47,7 +46,7 @@ def default(
 def argilla_chat(
     cfg,
     **kwargs,
-):  # pylint: disable=possibly-unused-variable,unused-argument
+):
     """
     for argilla/dpo-mix-7k conversations
     """
@@ -66,7 +65,7 @@ def argilla_chat(
 def icr(
     cfg,
     **kwargs,
-):  # pylint: disable=possibly-unused-variable,unused-argument
+):
     """
     chatml transforms for datasets with system, input, chosen, rejected
     ex. https://huggingface.co/datasets/argilla/distilabel-intel-orca-dpo-pairs
@@ -89,7 +88,7 @@ def icr(
     return transform_fn
 
 
-def intel(cfg, **kwargs):  # pylint: disable=possibly-unused-variable,unused-argument
+def intel(cfg, **kwargs):
     """
     For Intel Orca DPO Pairs
     """
@@ -111,9 +110,7 @@ def intel(cfg, **kwargs):  # pylint: disable=possibly-unused-variable,unused-arg
     return transform_fn
 
 
-def prompt_pairs(
-    cfg, **kwargs
-):  # pylint: disable=possibly-unused-variable,unused-argument
+def prompt_pairs(cfg, **kwargs):
     def transform_fn(sample):
         if "system" in sample and sample["system"]:
             sample["prompt"] = (
@@ -131,7 +128,7 @@ def prompt_pairs(
     return transform_fn
 
 
-def ultra(cfg, **kwargs):  # pylint: disable=possibly-unused-variable,unused-argument
+def ultra(cfg, **kwargs):
     """
     for ultrafeedback binarized conversations
     """
diff --git a/src/axolotl/prompt_strategies/dpo/passthrough.py b/src/axolotl/prompt_strategies/dpo/passthrough.py
index 1fcb838db..52b5ceac1 100644
--- a/src/axolotl/prompt_strategies/dpo/passthrough.py
+++ b/src/axolotl/prompt_strategies/dpo/passthrough.py
@@ -3,12 +3,8 @@ DPO prompt strategies passthrough/zero-processing strategy
 """
 
 
-def default(
-    cfg, dataset_idx=0, **kwargs
-):  # pylint: disable=possibly-unused-variable,unused-argument
-    def transform_fn(
-        sample, tokenizer=None
-    ):  # pylint: disable=possibly-unused-variable,unused-argument
+def default(cfg, dataset_idx=0, **kwargs):
+    def transform_fn(sample, tokenizer=None):
         return sample
 
     return transform_fn
diff --git a/src/axolotl/prompt_strategies/dpo/user_defined.py b/src/axolotl/prompt_strategies/dpo/user_defined.py
index cdd9b8c9c..0bcb1d94c 100644
--- a/src/axolotl/prompt_strategies/dpo/user_defined.py
+++ b/src/axolotl/prompt_strategies/dpo/user_defined.py
@@ -3,7 +3,7 @@ User-defined DPO strategies
 """
 
 
-def default(cfg, dataset_idx=0, **kwargs):  # pylint: disable=unused-argument
+def default(cfg, dataset_idx=0, **kwargs):
     ds_cfg = cfg["datasets"][dataset_idx]["type"]
     if not isinstance(ds_cfg, dict):
         raise ValueError(
diff --git a/src/axolotl/prompt_strategies/dpo/zephyr.py b/src/axolotl/prompt_strategies/dpo/zephyr.py
index 9eb895009..781227181 100644
--- a/src/axolotl/prompt_strategies/dpo/zephyr.py
+++ b/src/axolotl/prompt_strategies/dpo/zephyr.py
@@ -3,14 +3,11 @@ DPO strategies for zephyr
 """
 
 
-def nectar(cfg, **kwargs):  # pylint: disable=possibly-unused-variable,unused-argument
+def nectar(cfg, **kwargs):
     def transform_fn(sample):
         data = {}
         data["prompt"] = (
-            "<|system|>\n</s>\n"
-            "<|user|>\n"
-            f"{sample['prompt']}</s>\n"
-            "<|assistant|>\n"
+            f"<|system|>\n</s>\n<|user|>\n{sample['prompt']}</s>\n<|assistant|>\n"
         )
         answers = sorted(sample["answers"], key=lambda x: x["rank"])
         data["chosen"] = answers[-1]["answer"]
diff --git a/src/axolotl/prompt_strategies/input_output.py b/src/axolotl/prompt_strategies/input_output.py
index 8be745b20..c84eecffc 100644
--- a/src/axolotl/prompt_strategies/input_output.py
+++ b/src/axolotl/prompt_strategies/input_output.py
@@ -16,7 +16,6 @@ class RawInputOutputStrategy(PromptTokenizingStrategy):
             self.eos_token = self.tokenizer.eos_token
 
     def tokenize_prompt(self, prompt):
-        # pylint: disable=duplicate-code
         input_ids = []
         labels = []
         for label, text in self.prompter.build_prompt(prompt["segments"]):
diff --git a/src/axolotl/prompt_strategies/kto/chatml.py b/src/axolotl/prompt_strategies/kto/chatml.py
index 97ae59ed5..945940f3f 100644
--- a/src/axolotl/prompt_strategies/kto/chatml.py
+++ b/src/axolotl/prompt_strategies/kto/chatml.py
@@ -2,13 +2,11 @@
 KTO strategies for chatml
 """
 
-# pylint: disable=duplicate-code
-
 
 def argilla(
     cfg,
     **kwargs,
-):  # pylint: disable=possibly-unused-variable,unused-argument
+):
     def transform_fn(sample):
         if "system" in sample and sample["system"]:
             sample["prompt"] = (
@@ -28,7 +26,7 @@ def argilla(
 def argilla_chat(
     cfg,
     **kwargs,
-):  # pylint: disable=possibly-unused-variable,unused-argument
+):
     """
     for argilla/kto-mix-15k conversations
     """
@@ -43,7 +41,7 @@ def argilla_chat(
     return transform_fn
 
 
-def intel(cfg, **kwargs):  # pylint: disable=possibly-unused-variable,unused-argument
+def intel(cfg, **kwargs):
     """
     For Intel Orca KTO
     ex: argilla/distilabel-intel-orca-kto
@@ -65,9 +63,7 @@ def intel(cfg, **kwargs):  # pylint: disable=possibly-unused-variable,unused-arg
     return transform_fn
 
 
-def prompt_pairs(
-    cfg, **kwargs
-):  # pylint: disable=possibly-unused-variable,unused-argument
+def prompt_pairs(cfg, **kwargs):
     def transform_fn(sample):
         if "system" in sample and sample["system"]:
             sample["prompt"] = (
@@ -84,7 +80,7 @@ def prompt_pairs(
     return transform_fn
 
 
-def ultra(cfg, **kwargs):  # pylint: disable=possibly-unused-variable,unused-argument
+def ultra(cfg, **kwargs):
     """
     for ultrafeedback binarized conversations
     ex: argilla/ultrafeedback-binarized-preferences-cleaned-kto
diff --git a/src/axolotl/prompt_strategies/kto/llama3.py b/src/axolotl/prompt_strategies/kto/llama3.py
index fde3c2ed4..9061f6f5e 100644
--- a/src/axolotl/prompt_strategies/kto/llama3.py
+++ b/src/axolotl/prompt_strategies/kto/llama3.py
@@ -2,13 +2,11 @@
 KTO strategies for llama-3 chat template
 """
 
-# pylint: disable=duplicate-code
-
 
 def argilla(
     cfg,
     **kwargs,
-):  # pylint: disable=possibly-unused-variable,unused-argument
+):
     def transform_fn(sample):
         if "system" in sample and sample["system"]:
             sample["prompt"] = (
@@ -28,7 +26,7 @@ def argilla(
 def argilla_chat(
     cfg,
     **kwargs,
-):  # pylint: disable=possibly-unused-variable,unused-argument
+):
     """
     for argilla/kto-mix-15k conversations
     """
@@ -43,7 +41,7 @@ def argilla_chat(
     return transform_fn
 
 
-def intel(cfg, **kwargs):  # pylint: disable=possibly-unused-variable,unused-argument
+def intel(cfg, **kwargs):
     """
     For Intel Orca KTO
     ex: argilla/distilabel-intel-orca-kto
@@ -65,9 +63,7 @@ def intel(cfg, **kwargs):  # pylint: disable=possibly-unused-variable,unused-arg
     return transform_fn
 
 
-def prompt_pairs(
-    cfg, **kwargs
-):  # pylint: disable=possibly-unused-variable,unused-argument
+def prompt_pairs(cfg, **kwargs):
     def transform_fn(sample):
         if "system" in sample and sample["system"]:
             sample["prompt"] = (
@@ -84,7 +80,7 @@ def prompt_pairs(
     return transform_fn
 
 
-def ultra(cfg, **kwargs):  # pylint: disable=possibly-unused-variable,unused-argument
+def ultra(cfg, **kwargs):
     """
     for ultrafeedback binarized conversations
     ex: argilla/ultrafeedback-binarized-preferences-cleaned-kto
diff --git a/src/axolotl/prompt_strategies/kto/user_defined.py b/src/axolotl/prompt_strategies/kto/user_defined.py
index 7c68a3000..e26683cde 100644
--- a/src/axolotl/prompt_strategies/kto/user_defined.py
+++ b/src/axolotl/prompt_strategies/kto/user_defined.py
@@ -2,10 +2,8 @@
 User-defined KTO strategies
 """
 
-# pylint: disable=duplicate-code
 
-
-def default(cfg, dataset_idx=0, **kwargs):  # pylint: disable=unused-argument
+def default(cfg, dataset_idx=0, **kwargs):
     ds_cfg = cfg["datasets"][dataset_idx]["type"]
     if not isinstance(ds_cfg, dict):
         raise ValueError(
diff --git a/src/axolotl/prompt_strategies/llama2_chat.py b/src/axolotl/prompt_strategies/llama2_chat.py
index eef2e1d4d..9eff062ec 100644
--- a/src/axolotl/prompt_strategies/llama2_chat.py
+++ b/src/axolotl/prompt_strategies/llama2_chat.py
@@ -153,7 +153,7 @@ class LLama2ChatTokenizingStrategy(PromptTokenizingStrategy):
         }
 
 
-class Llama2ChatPrompter:  # pylint: disable=too-few-public-methods
+class Llama2ChatPrompter:
     """
     A prompter that generates prompts for Llama2 models.
     """
@@ -190,7 +190,7 @@ class Llama2ChatPrompter:  # pylint: disable=too-few-public-methods
             # Skip the first one if it is not from human
             source = source[1:]
 
-        conv.messages = []  # pylint: disable=R0801
+        conv.messages = []
         for j, sentence in enumerate(source):
             role = roles[sentence["from"]]
             assert role == conv.roles[j % 2], ALTERNATING_ASSERTION_FAILED_ROLE
diff --git a/src/axolotl/prompt_strategies/messages/__init__.py b/src/axolotl/prompt_strategies/messages/__init__.py
index 6eae9dfd8..2c920a568 100644
--- a/src/axolotl/prompt_strategies/messages/__init__.py
+++ b/src/axolotl/prompt_strategies/messages/__init__.py
@@ -11,7 +11,7 @@ LOG = get_logger(__name__)
 def load(tokenizer, cfg, ds_cfg, processor=None):
     try:
         strategy = ds_cfg.get("input_transform", "chat")
-        # pylint: disable=duplicate-code
+
         load_fn = "load"
         if strategy.split(".")[-1].startswith("load_"):
             load_fn = strategy.split(".")[-1]
@@ -29,6 +29,6 @@ def load(tokenizer, cfg, ds_cfg, processor=None):
         return func(tokenizer, cfg, **load_kwargs)
     except ModuleNotFoundError:
         return None
-    except Exception as exc:  # pylint: disable=broad-exception-caught
+    except Exception as exc:
         LOG.error(f"Failed to load prompt strategy `{strategy}`: {str(exc)}")
         raise exc
diff --git a/src/axolotl/prompt_strategies/messages/chat.py b/src/axolotl/prompt_strategies/messages/chat.py
index eaed2396a..854d25e42 100644
--- a/src/axolotl/prompt_strategies/messages/chat.py
+++ b/src/axolotl/prompt_strategies/messages/chat.py
@@ -19,7 +19,7 @@ class ChatMessageDatasetWrappingStrategy(DatasetWrappingStrategy):
         processor,
         message_transform=None,
         formatter=None,
-        **kwargs,  # pylint: disable=unused-argument
+        **kwargs,
     ):
         """
         :param processor: tokenizer or image processor
@@ -35,7 +35,7 @@ class ChatMessageDatasetWrappingStrategy(DatasetWrappingStrategy):
         dataset,
         process_count: Optional[int] = None,
         keep_in_memory: Optional[bool] = False,
-        **kwargs,  # pylint: disable=unused-argument
+        **kwargs,
     ):
         self.dataset = TokenizedChatDataset(
             dataset,
@@ -72,9 +72,10 @@ def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None):
         builder_kwargs["message_field_training"] = message_field_training
 
     chat_template = ds_cfg.get("chat_template", cfg.get("chat_template", "chatml"))
-    format_message = (
-        lambda x: x  # noqa E731  # pylint: disable=unnecessary-lambda-assignment
-    )
+
+    def format_message(x):
+        return x
+
     if chat_template == "chatml":
         from axolotl.core.chat.format.chatml import format_message  # noqa F811
     if chat_template.startswith("llama3"):
diff --git a/src/axolotl/prompt_strategies/metharme.py b/src/axolotl/prompt_strategies/metharme.py
index 66da72389..35f1ef3b3 100644
--- a/src/axolotl/prompt_strategies/metharme.py
+++ b/src/axolotl/prompt_strategies/metharme.py
@@ -10,8 +10,6 @@ LOG = get_logger(__name__)
 
 IGNORE_TOKEN_ID = -100
 
-# pylint: disable=duplicate-code
-
 
 class MetharmePromptTokenizingStrategy(InstructionPromptTokenizingStrategy):
     """
@@ -66,7 +64,7 @@ class MetharmePrompter(AlpacaPrompter):
     turn_format = "{instruction}"
     turn_no_input_format = "{instruction}"
 
-    def __init__(self, *args, **kwargs):  # pylint: disable=super-init-not-called
+    def __init__(self, *args, **kwargs):
         pass
 
 
diff --git a/src/axolotl/prompt_strategies/orpo/chat_template.py b/src/axolotl/prompt_strategies/orpo/chat_template.py
index fdee28ea1..b655bc970 100644
--- a/src/axolotl/prompt_strategies/orpo/chat_template.py
+++ b/src/axolotl/prompt_strategies/orpo/chat_template.py
@@ -23,9 +23,7 @@ class MessageList(BaseModel):
     messages: List[Message]
 
 
-def load(
-    tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None, **kwargs
-):  # pylint: disable=possibly-unused-variable,unused-argument
+def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None, **kwargs):
     """
     chatml transforms for datasets with system, input, chosen, rejected
     """
@@ -219,29 +217,38 @@ class ORPOPrompter(Prompter):
         for message in message_list.messages:
             conversation.append(message.model_dump())
             if message.role == "system":
-                yield self.tokenizer.apply_chat_template(
-                    conversation,
-                    add_generation_prompt=False,
-                    chat_template=self.chat_template,
-                    tokenize=False,
-                ), False
+                yield (
+                    self.tokenizer.apply_chat_template(
+                        conversation,
+                        add_generation_prompt=False,
+                        chat_template=self.chat_template,
+                        tokenize=False,
+                    ),
+                    False,
+                )
             if message.role == "user":
-                yield self.tokenizer.apply_chat_template(
-                    conversation,
-                    add_generation_prompt=True,
-                    chat_template=self.chat_template,
-                    tokenize=False,
-                ), False
+                yield (
+                    self.tokenizer.apply_chat_template(
+                        conversation,
+                        add_generation_prompt=True,
+                        chat_template=self.chat_template,
+                        tokenize=False,
+                    ),
+                    False,
+                )
             if message.role == "assistant":
-                yield self.tokenizer.apply_chat_template(
-                    conversation,
-                    add_generation_prompt=False,
-                    chat_template=self.chat_template,
-                    tokenize=False,
-                ), True
+                yield (
+                    self.tokenizer.apply_chat_template(
+                        conversation,
+                        add_generation_prompt=False,
+                        chat_template=self.chat_template,
+                        tokenize=False,
+                    ),
+                    True,
+                )
 
 
-def argilla(cfg, **kwargs):  # pylint: disable=possibly-unused-variable,unused-argument
+def argilla(cfg, **kwargs):
     dataset_parser = ORPODatasetParsingStrategy()
 
     def transform_fn(sample, tokenizer=None):
diff --git a/src/axolotl/prompt_strategies/pygmalion.py b/src/axolotl/prompt_strategies/pygmalion.py
index 51f92f397..8c53a5f27 100644
--- a/src/axolotl/prompt_strategies/pygmalion.py
+++ b/src/axolotl/prompt_strategies/pygmalion.py
@@ -69,7 +69,6 @@ class PygmalionPromptTokenizingStrategy(PromptTokenizingStrategy):
                 LOG.warning(f"unknown role in conversation: {role}")
                 res = defaultdict(lambda: [])
 
-            # pylint: disable=duplicate-code
             result, current_len = parse_tokenized_to_result(
                 result,
                 current_len,
@@ -89,7 +88,10 @@ class PygmalionPrompter:
         pass
 
     def build_prompt(
-        self, source, *args, **kwargs  # pylint: disable=unused-argument
+        self,
+        source,
+        *args,
+        **kwargs,
     ) -> Generator[Tuple[str, str], None, None]:
         for msg in source:
             yield msg["role"], msg["value"]
diff --git a/src/axolotl/prompt_strategies/stepwise_supervised.py b/src/axolotl/prompt_strategies/stepwise_supervised.py
index 8be7c35e3..9175126e7 100644
--- a/src/axolotl/prompt_strategies/stepwise_supervised.py
+++ b/src/axolotl/prompt_strategies/stepwise_supervised.py
@@ -66,7 +66,7 @@ class StepwiseSupervisedPromptTokenizingStrategy:
         # Create step-wise labels
         labels = [
             [IGNORE_INDEX] * (len(completion) - 1) + [label]  # type: ignore
-            for completion, label in zip(completions_ids, labels)
+            for completion, label in zip(completions_ids, labels, strict=False)
         ]
 
         # Join all steps
diff --git a/src/axolotl/prompt_strategies/user_defined.py b/src/axolotl/prompt_strategies/user_defined.py
index e20e80c3a..0bff514e7 100644
--- a/src/axolotl/prompt_strategies/user_defined.py
+++ b/src/axolotl/prompt_strategies/user_defined.py
@@ -83,16 +83,12 @@ def load(tokenizer, cfg, ds_cfg: Optional[UserDefinedDatasetConfig] = None):
         cfg.sequence_len,
     )
 
-    setattr(
-        strat,
-        "parse_instruction_fields",
-        partial(
-            parse_instruction_fields,
-            ds_cfg.field_instruction,
-            ds_cfg.field_input,
-            ds_cfg.field_output,
-            ds_cfg.field_system,
-            system_prompt,
-        ),
+    strat.parse_instruction_fields = partial(  # type: ignore[method-assign]
+        parse_instruction_fields,
+        ds_cfg.field_instruction,
+        ds_cfg.field_input,
+        ds_cfg.field_output,
+        ds_cfg.field_system,
+        system_prompt,
     )
     return strat
diff --git a/src/axolotl/prompt_tokenizers.py b/src/axolotl/prompt_tokenizers.py
index 9ca645de3..2bf9ec763 100644
--- a/src/axolotl/prompt_tokenizers.py
+++ b/src/axolotl/prompt_tokenizers.py
@@ -118,7 +118,7 @@ class InstructionPromptTokenizingStrategy(PromptTokenizingStrategy):
     def tokenize_prompt(self, prompt):
         (
             instruction,
-            input,  # pylint: disable=redefined-builtin
+            input,
             response,
         ) = self.parse_instruction_fields(prompt)
         user_prompt = next(
@@ -144,7 +144,10 @@ class InstructionPromptTokenizingStrategy(PromptTokenizingStrategy):
         return tokenized_prompt
 
     def _build_full_prompt(
-        self, instruction, input, response  # pylint: disable=redefined-builtin
+        self,
+        instruction,
+        input,
+        response,
     ):
         return next(
             iter(
@@ -257,10 +260,9 @@ class ReflectionPromptTokenizingStrategy(PromptTokenizingStrategy):
         raise NotImplementedError
 
     def tokenize_prompt(self, prompt):
-        # pylint: disable=duplicate-code
         (
             instruction,
-            input,  # pylint: disable=redefined-builtin
+            input,
             output,
             reflection,
             corrected,
@@ -287,9 +289,7 @@ class ReflectionPromptTokenizingStrategy(PromptTokenizingStrategy):
 
         return tokenized_full_prompt
 
-    def _build_full_prompt(
-        self, instruction, input, output, reflection, corrected
-    ):  # pylint: disable=redefined-builtin
+    def _build_full_prompt(self, instruction, input, output, reflection, corrected):
         return next(
             iter(
                 self.prompter.build_prompt(
diff --git a/src/axolotl/prompters.py b/src/axolotl/prompters.py
index d29da075e..9543996f7 100644
--- a/src/axolotl/prompters.py
+++ b/src/axolotl/prompters.py
@@ -46,7 +46,6 @@ class AlpacaPrompter(Prompter):
         self.match_prompt_style()
 
     def match_prompt_style(self):
-        # pylint: disable=duplicate-code
         if self.prompt_style == PromptStyle.INSTRUCT.value:
             self.turn_format = "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
             self.turn_no_input_format = (
@@ -93,7 +92,7 @@ class AlpacaPrompter(Prompter):
     def build_prompt(
         self,
         instruction: str,
-        input: Union[None, str] = None,  # pylint: disable=redefined-builtin
+        input: Union[None, str] = None,
         output: Union[None, str] = None,
     ) -> Generator[str, None, None]:
         yield self._build_result(instruction, input, output)
@@ -218,7 +217,7 @@ class ReflectAlpacaPrompter(Prompter):
     def _build_result(
         self,
         instruction: str,
-        input: Union[None, str] = None,  # pylint: disable=redefined-builtin
+        input: Union[None, str] = None,
         output: Union[None, str] = None,
         reflection: Union[None, str] = None,
         corrected: Union[None, str] = None,
@@ -242,12 +241,11 @@ class ReflectAlpacaPrompter(Prompter):
     def build_prompt(
         self,
         instruction: str,
-        input: Union[None, str] = None,  # pylint: disable=redefined-builtin
+        input: Union[None, str] = None,
         output: Union[None, str] = None,
         reflection: Union[None, str] = None,
         corrected: Union[None, str] = None,
     ) -> Generator[str, None, None]:
-        # pylint: disable=duplicate-code
         yield self._build_result(
             instruction,
             input,
diff --git a/src/axolotl/train.py b/src/axolotl/train.py
index dd39cc228..e409d4a11 100644
--- a/src/axolotl/train.py
+++ b/src/axolotl/train.py
@@ -230,7 +230,7 @@ def save_trained_model(
     # Post training module hooks
     for name, module in model.named_modules():
         if hasattr(module, "_post_training"):
-            module._post_training(model, name)  # pylint: disable=protected-access
+            module._post_training(model, name)
 
     # handle QAT
     if cfg.qat:
@@ -253,9 +253,7 @@ def save_trained_model(
             # final model weights have already been saved by `ReLoRACallback.on_train_end`
             return
 
-    if (  # pylint: disable=too-many-nested-blocks
-        trainer.is_fsdp_enabled or cfg.fsdp_config
-    ):
+    if trainer.is_fsdp_enabled or cfg.fsdp_config:
         if cfg.fsdp_config or cfg.fsdp:
             if cfg.fsdp_config.final_state_dict_type:
                 state_dict_type = cfg.fsdp_config.final_state_dict_type
@@ -438,7 +436,7 @@ def setup_model_card(cfg: DictDefault):
     badge_markdown = """[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)"""
     transformers.modelcard.AUTOGENERATED_TRAINER_COMMENT += f"\n{badge_markdown}"
 
-    if getattr(cfg, "axolotl_config_path"):
+    if cfg.axolotl_config_path:
         raw_axolotl_cfg = Path(cfg.axolotl_config_path)
         version = importlib.metadata.version("axolotl")
         if raw_axolotl_cfg.is_file():
@@ -489,7 +487,9 @@ def handle_untrained_tokens_fix(
         )
 
 
-def setup_model_and_trainer(cfg: DictDefault, dataset_meta: TrainDatasetMeta) -> tuple[
+def setup_model_and_trainer(
+    cfg: DictDefault, dataset_meta: TrainDatasetMeta
+) -> tuple[
     "HFRLTrainerBuilder" | "HFCausalTrainerBuilder",
     PeftModel | PreTrainedModel,
     PreTrainedTokenizer,
diff --git a/src/axolotl/utils/__init__.py b/src/axolotl/utils/__init__.py
index e669413f8..e5050116a 100644
--- a/src/axolotl/utils/__init__.py
+++ b/src/axolotl/utils/__init__.py
@@ -17,7 +17,6 @@ def is_comet_available():
     return importlib.util.find_spec("comet_ml") is not None
 
 
-# pylint: disable=duplicate-code
 def get_pytorch_version() -> tuple[int, int, int]:
     """
     Get Pytorch version as a tuple of (major, minor, patch).
diff --git a/src/axolotl/utils/callbacks/__init__.py b/src/axolotl/utils/callbacks/__init__.py
index d3f3126b5..6c5512223 100644
--- a/src/axolotl/utils/callbacks/__init__.py
+++ b/src/axolotl/utils/callbacks/__init__.py
@@ -56,9 +56,7 @@ IGNORE_INDEX = -100
 LOG = get_logger(__name__)
 
 
-class SaveBetterTransformerModelCallback(
-    TrainerCallback
-):  # pylint: disable=too-few-public-methods
+class SaveBetterTransformerModelCallback(TrainerCallback):
     """Callback to save the BetterTransformer wrapped model"""
 
     def on_step_end(
@@ -103,7 +101,7 @@ class LossWatchDogCallback(TrainerCallback):
 
     def on_step_end(
         self,
-        args: TrainingArguments,  # pylint: disable=unused-argument
+        args: TrainingArguments,
         state: TrainerState,
         control: TrainerControl,
         **_kwargs,
@@ -126,7 +124,7 @@ class SaveModelOnFirstStepCallback(TrainerCallback):
 
     def on_step_end(
         self,
-        args: TrainingArguments,  # pylint: disable=unused-argument
+        args: TrainingArguments,
         state: TrainerState,
         control: TrainerControl,
         **_kwargs,
@@ -239,10 +237,10 @@ def bench_eval_callback_factory(trainer, tokenizer):
         def on_evaluate(
             self,
             args: AxolotlTrainingArguments,
-            state: TrainerState,  # pylint: disable=unused-argument
-            control: TrainerControl,  # pylint: disable=unused-argument
-            metrics: Dict[str, float],  # pylint: disable=unused-argument
-            **kwargs,  # pylint: disable=unused-argument
+            state: TrainerState,
+            control: TrainerControl,
+            metrics: Dict[str, float],
+            **kwargs,
         ):
             data_loader = trainer.get_bench_dataloader(
                 bench_dataset.remove_columns(["input", "subject", "output", "name"])
@@ -272,7 +270,7 @@ def bench_eval_callback_factory(trainer, tokenizer):
             # Extract results by subject.
             bench_name = bench_dataset["name"]
             bench_names: dict = {s: {"refs": [], "preds": []} for s in set(bench_name)}
-            for s, p, r in zip(bench_name, preds, refs):  # pylint: disable=invalid-name
+            for s, p, r in zip(bench_name, preds, refs, strict=False):
                 bench_names[s]["preds"].append(p)
                 bench_names[s]["refs"].append(r)
             barrier()
@@ -310,9 +308,7 @@ def bench_eval_callback_factory(trainer, tokenizer):
                 bench_scores = []
                 bench_refs = []
                 bench_preds = []
-                for (
-                    bench_name
-                ) in combined_bench_names:  # pylint: disable=consider-using-dict-items
+                for bench_name in combined_bench_names:
                     bench_score = accuracy.compute(
                         references=combined_bench_names[bench_name]["refs"],
                         predictions=combined_bench_names[bench_name]["preds"],
@@ -361,18 +357,18 @@ def causal_lm_bench_eval_callback_factory(trainer: Trainer, tokenizer):
                 else:
                     try:
                         metrics[metric] = evaluate.load(metric)
-                    except Exception as exc:  # pylint: disable=broad-exception-caught
+                    except Exception as exc:
                         LOG.warning(f"{metric}: {exc.args}")
             return metrics
 
         def on_evaluate(
             self,
-            args: AxolotlTrainingArguments,  # pylint: disable=unused-argument
+            args: AxolotlTrainingArguments,
             state: TrainerState,
             control: TrainerControl,
-            train_dataloader,  # pylint: disable=unused-argument
+            train_dataloader,
             eval_dataloader,
-            **kwargs,  # pylint: disable=unused-argument
+            **kwargs,
         ):
             trainer.model_wrapped.eval()
 
@@ -380,7 +376,6 @@ def causal_lm_bench_eval_callback_factory(trainer: Trainer, tokenizer):
                 self.cfg.device
             )  # Use this instead of trainer.model_wrapped.device as it may return cpu if fsdp offloaded
 
-            # pylint: disable=duplicate-code
             generation_config = GenerationConfig(
                 max_new_tokens=self.cfg.eval_max_new_tokens,
                 bos_token_id=tokenizer.bos_token_id,
@@ -411,9 +406,7 @@ def causal_lm_bench_eval_callback_factory(trainer: Trainer, tokenizer):
                 try:
                     # Only pass the kwargs that are in the metric's feature list
                     metric_kwargs = {
-                        k: kwargs[k]
-                        for k in metric._feature_names()  # pylint: disable=protected-access
-                        if k in kwargs
+                        k: kwargs[k] for k in metric._feature_names() if k in kwargs
                     }
 
                     if isinstance(metric, Perplexity):
@@ -425,7 +418,7 @@ def causal_lm_bench_eval_callback_factory(trainer: Trainer, tokenizer):
                         if "score" in metric_score
                         else metric_score["mean_score"]
                     )
-                except Exception:  # pylint: disable=broad-exception-caught
+                except Exception:
                     traceback.print_exc()
                     LOG.debug(
                         f"Failed to compute metric {metric.name} with kwargs {kwargs.keys()}"
@@ -473,6 +466,7 @@ def causal_lm_bench_eval_callback_factory(trainer: Trainer, tokenizer):
                             batch_input_ids,
                             batch_labels,
                             batch_pos_ids,
+                            strict=False,
                         ):
                             if pos_ids is None:
                                 pos_ranges = [(0, len(input_ids_all) - 1)]
@@ -523,7 +517,7 @@ def causal_lm_bench_eval_callback_factory(trainer: Trainer, tokenizer):
                         prediction_all_tokens = predictions["sequences"].cpu().tolist()
                         prediction_without_prompt_tokens_list = []
                         for prompt_token_ids, prediction_tokens in zip(
-                            prompt_token_ids_list, prediction_all_tokens
+                            prompt_token_ids_list, prediction_all_tokens, strict=False
                         ):
                             prediction_without_prompt_tokens = prediction_tokens[
                                 len(prompt_token_ids) :
@@ -561,12 +555,12 @@ def log_prediction_callback_factory(trainer: Trainer, tokenizer, logger: str):
 
         def on_evaluate(
             self,
-            args: AxolotlTrainingArguments,  # pylint: disable=unused-argument
+            args: AxolotlTrainingArguments,
             state: TrainerState,
             control: TrainerControl,
-            train_dataloader,  # pylint: disable=unused-argument
+            train_dataloader,
             eval_dataloader,
-            **kwargs,  # pylint: disable=unused-argument
+            **kwargs,
         ):
             eval_table_size = self.cfg.eval_table_size
 
@@ -576,7 +570,6 @@ def log_prediction_callback_factory(trainer: Trainer, tokenizer, logger: str):
             trainer.model.eval()
             device = torch.device(self.cfg.device)
 
-            # pylint: disable=duplicate-code
             generation_config = GenerationConfig(
                 max_new_tokens=self.cfg.eval_max_new_tokens,
                 bos_token_id=tokenizer.bos_token_id,
@@ -644,6 +637,7 @@ def log_prediction_callback_factory(trainer: Trainer, tokenizer, logger: str):
                         batch_labels,
                         batch_pos_ids,
                         batch_logits,
+                        strict=False,
                     ):
                         if pos_ids is None:
                             pos_ranges = [(0, len(input_ids_all) - 1)]
@@ -697,7 +691,7 @@ def log_prediction_callback_factory(trainer: Trainer, tokenizer, logger: str):
                     prediction_all_tokens = predictions["sequences"].cpu().tolist()
                     prediction_without_prompt_tokens_list = []
                     for prompt_token_ids, prediction_tokens in zip(
-                        prompt_token_ids_list, prediction_all_tokens
+                        prompt_token_ids_list, prediction_all_tokens, strict=False
                     ):
                         prediction_without_prompt_tokens = prediction_tokens[
                             len(prompt_token_ids) :
@@ -716,7 +710,11 @@ def log_prediction_callback_factory(trainer: Trainer, tokenizer, logger: str):
                         prediction_text,
                         pred_step_text,
                     ) in zip(
-                        prompt_texts, completion_texts, predicted_texts, pred_step_texts
+                        prompt_texts,
+                        completion_texts,
+                        predicted_texts,
+                        pred_step_texts,
+                        strict=False,
                     ):
                         table_data["id"].append(row_index)
                         table_data["Prompt"].append(prompt_text)
@@ -774,10 +772,10 @@ class SaveAxolotlConfigtoWandBCallback(TrainerCallback):
 
     def on_train_begin(
         self,
-        args: AxolotlTrainingArguments,  # pylint: disable=unused-argument
-        state: TrainerState,  # pylint: disable=unused-argument
+        args: AxolotlTrainingArguments,
+        state: TrainerState,
         control: TrainerControl,
-        **kwargs,  # pylint: disable=unused-argument
+        **kwargs,
     ):
         if state.is_world_process_zero:
             try:
@@ -845,19 +843,30 @@ class GCCallback(TrainerCallback):
         gc.collect()
 
     def on_train_begin(
-        self, args, state, control, **kwargs  # pylint: disable=unused-argument
+        self,
+        args,
+        state,
+        control,
+        **kwargs,
     ):
         self._gc()
 
     def on_step_begin(
-        self, args, state, control, **kwargs  # pylint: disable=unused-argument
+        self,
+        args,
+        state,
+        control,
+        **kwargs,
     ):
-        # pylint: disable=consider-using-in
         if self.next_gc_on_begin_step == state.global_step or state.global_step == 0:
             self._gc()
 
     def on_step_end(
-        self, args, state, control, **kwargs  # pylint: disable=unused-argument
+        self,
+        args,
+        state,
+        control,
+        **kwargs,
     ):
         if control.should_evaluate:
             # automatically GC before evals so the eval memory spike from the CEL doesn't OOM the trainer
@@ -879,7 +888,11 @@ class GCCallback(TrainerCallback):
                 self._gc()
 
     def on_epoch_end(
-        self, args, state, control, **kwargs  # pylint: disable=unused-argument
+        self,
+        args,
+        state,
+        control,
+        **kwargs,
     ):
         self._gc()
 
@@ -892,16 +905,12 @@ def colab_inference_post_train_callback(trainer: Trainer):
             self.gpu_name = torch.cuda.get_device_name(0)
             self.cfg = cfg
 
-        def on_train_end(
-            self, args, state, control, **kwargs
-        ):  # pylint: disable=unused-argument
+        def on_train_end(self, args, state, control, **kwargs):
             """
             handle T4 gpu, we need to convert attention to eager for inference
             """
             if "Tesla T4" in self.gpu_name and self.cfg.xformers_attention:
-                trainer.model.config._attn_implementation = (  # pylint: disable=protected-access
-                    "eager"
-                )
+                trainer.model.config._attn_implementation = "eager"
             trainer.model.gradient_checkpointing_disable()
             trainer.model.config.use_cache = True
             trainer.model.eval()
diff --git a/src/axolotl/utils/callbacks/comet_.py b/src/axolotl/utils/callbacks/comet_.py
index 7dce95145..cd3bcf70e 100644
--- a/src/axolotl/utils/callbacks/comet_.py
+++ b/src/axolotl/utils/callbacks/comet_.py
@@ -22,10 +22,10 @@ class SaveAxolotlConfigtoCometCallback(TrainerCallback):
 
     def on_train_begin(
         self,
-        args: "AxolotlTrainingArguments",  # pylint: disable=unused-argument
-        state: TrainerState,  # pylint: disable=unused-argument
+        args: "AxolotlTrainingArguments",
+        state: TrainerState,
         control: TrainerControl,
-        **kwargs,  # pylint: disable=unused-argument
+        **kwargs,
     ):
         if is_main_process():
             try:
diff --git a/src/axolotl/utils/callbacks/lisa.py b/src/axolotl/utils/callbacks/lisa.py
index 348cdf2da..03f189d80 100644
--- a/src/axolotl/utils/callbacks/lisa.py
+++ b/src/axolotl/utils/callbacks/lisa.py
@@ -55,9 +55,7 @@ def lisa_callback_factory(trainer: "AxolotlTrainer"):
                 for param in layer.parameters():
                     param.requires_grad = False
 
-        def on_step_begin(
-            self, args, state, control, **kwargs
-        ):  # pylint: disable=unused-argument
+        def on_step_begin(self, args, state, control, **kwargs):
             # Check if it's time to switch active layers, including at step 0
             if state.global_step % self.step_interval == 0 or state.global_step == 1:
                 self.switch_active_layers()
diff --git a/src/axolotl/utils/callbacks/mlflow_.py b/src/axolotl/utils/callbacks/mlflow_.py
index ac72f5e6d..30120a87d 100644
--- a/src/axolotl/utils/callbacks/mlflow_.py
+++ b/src/axolotl/utils/callbacks/mlflow_.py
@@ -23,7 +23,6 @@ def should_log_artifacts() -> bool:
 
 
 class SaveAxolotlConfigtoMlflowCallback(TrainerCallback):
-    # pylint: disable=duplicate-code
     """Callback to save axolotl config to mlflow"""
 
     def __init__(self, axolotl_config_path):
@@ -31,10 +30,10 @@ class SaveAxolotlConfigtoMlflowCallback(TrainerCallback):
 
     def on_train_begin(
         self,
-        args: "AxolotlTrainingArguments",  # pylint: disable=unused-argument
-        state: TrainerState,  # pylint: disable=unused-argument
+        args: "AxolotlTrainingArguments",
+        state: TrainerState,
         control: TrainerControl,
-        **kwargs,  # pylint: disable=unused-argument
+        **kwargs,
     ):
         if is_main_process():
             try:
diff --git a/src/axolotl/utils/callbacks/profiler.py b/src/axolotl/utils/callbacks/profiler.py
index d26b7f9dd..2cf5e0f4f 100644
--- a/src/axolotl/utils/callbacks/profiler.py
+++ b/src/axolotl/utils/callbacks/profiler.py
@@ -26,58 +26,50 @@ class PytorchProfilerCallback(TrainerCallback):
         if profiler_steps_start == 0:
             # start recording memory allocations before everything is allocated, because if we start
             # at the beginning of step 0, we won't have any memory allocations in the traces
-            torch.cuda.memory._record_memory_history(  # pylint: disable=protected-access
-                enabled="all"
-            )
+            torch.cuda.memory._record_memory_history(enabled="all")
             profiler_steps_start = -1
         self.profiler_steps_start = profiler_steps_start
 
-    def on_step_begin(  # pylint: disable=unused-argument
+    def on_step_begin(
         self,
-        args: TrainingArguments,  # pylint: disable=unused-argument
+        args: TrainingArguments,
         state: TrainerState,
-        control: TrainerControl,  # pylint: disable=unused-argument
-        **kwargs,  # pylint: disable=unused-argument
+        control: TrainerControl,
+        **kwargs,
     ):
         if state.global_step == self.profiler_steps_start:
-            torch.cuda.memory._record_memory_history(  # pylint: disable=protected-access
-                enabled="all"
-            )
+            torch.cuda.memory._record_memory_history(enabled="all")
 
-    def on_step_end(  # pylint: disable=unused-argument
+    def on_step_end(
         self,
-        args: TrainingArguments,  # pylint: disable=unused-argument
+        args: TrainingArguments,
         state: TrainerState,
-        control: TrainerControl,  # pylint: disable=unused-argument
-        **kwargs,  # pylint: disable=unused-argument
+        control: TrainerControl,
+        **kwargs,
     ):
         if state.global_step == self.profiler_steps_end:
-            snapshot = torch.cuda.memory._snapshot()  # pylint: disable=protected-access
+            snapshot = torch.cuda.memory._snapshot()
             with open(Path(args.output_dir) / "snapshot.pickle", "wb") as fout:
                 dump(snapshot, fout)
 
             # tell CUDA to stop recording memory allocations now
-            torch.cuda.memory._record_memory_history(  # pylint: disable=protected-access
-                enabled=None
-            )
+            torch.cuda.memory._record_memory_history(enabled=None)
 
-    def on_train_end(  # pylint: disable=unused-argument
+    def on_train_end(
         self,
-        args: TrainingArguments,  # pylint: disable=unused-argument
+        args: TrainingArguments,
         state: TrainerState,
-        control: TrainerControl,  # pylint: disable=unused-argument
-        **kwargs,  # pylint: disable=unused-argument
+        control: TrainerControl,
+        **kwargs,
     ):
         # make sure to record if we happen to have more steps than steps to profile
         if (
             state.global_step >= self.profiler_steps_start
             and state.global_step < self.profiler_steps_end
         ):
-            snapshot = torch.cuda.memory._snapshot()  # pylint: disable=protected-access
+            snapshot = torch.cuda.memory._snapshot()
             with open(Path(args.output_dir) / "snapshot.pickle", "wb") as fout:
                 dump(snapshot, fout)
 
             # tell CUDA to stop recording memory allocations now
-            torch.cuda.memory._record_memory_history(  # pylint: disable=protected-access
-                enabled=None
-            )
+            torch.cuda.memory._record_memory_history(enabled=None)
diff --git a/src/axolotl/utils/callbacks/qat.py b/src/axolotl/utils/callbacks/qat.py
index cf4d9a937..70746d6be 100644
--- a/src/axolotl/utils/callbacks/qat.py
+++ b/src/axolotl/utils/callbacks/qat.py
@@ -38,9 +38,7 @@ class QATCallback(TrainerCallback):
     def __init__(self, cfg: QATConfig):
         self.cfg = cfg
 
-    def on_step_begin(
-        self, args, state, control, model, **kwargs
-    ):  # pylint: disable=unused-argument
+    def on_step_begin(self, args, state, control, model, **kwargs):
         if self.cfg.fake_quant_after_n_steps is not None:
             if state.global_step == 0:
                 LOG.info(f"Disabling fake quantization at step {state.global_step}")
diff --git a/src/axolotl/utils/config/__init__.py b/src/axolotl/utils/config/__init__.py
index c9613c39b..534d7c4a4 100644
--- a/src/axolotl/utils/config/__init__.py
+++ b/src/axolotl/utils/config/__init__.py
@@ -37,7 +37,7 @@ def choose_device(cfg):
                 return f"npu:{cfg.local_rank}"
 
             raise SystemError("No CUDA/mps/npu device found")
-        except Exception:  # pylint: disable=broad-exception-caught
+        except Exception:
             return "cpu"
 
     cfg.device = get_device()
@@ -266,8 +266,8 @@ def validate_config(
 
     if cfg.plugins:
         (
-            AxolotlConfigWCapabilities,  # pylint: disable=invalid-name
-            AxolotlInputConfig,  # pylint: disable=invalid-name
+            AxolotlConfigWCapabilities,
+            AxolotlInputConfig,
         ) = merge_input_args()
 
     # Convert datasets to proper format if needed
diff --git a/src/axolotl/utils/ctx_managers/__init__.py b/src/axolotl/utils/ctx_managers/__init__.py
index e544621b5..6ffda9e55 100644
--- a/src/axolotl/utils/ctx_managers/__init__.py
+++ b/src/axolotl/utils/ctx_managers/__init__.py
@@ -1,6 +1,5 @@
 """Init for context manager submodule"""
 
-# pylint: disable=unused-import
 # flake8: noqa
 
 from .sequence_parallel import SequenceParallelContextManager
diff --git a/src/axolotl/utils/ctx_managers/sequence_parallel.py b/src/axolotl/utils/ctx_managers/sequence_parallel.py
index 029d991dd..1ec91ae2a 100644
--- a/src/axolotl/utils/ctx_managers/sequence_parallel.py
+++ b/src/axolotl/utils/ctx_managers/sequence_parallel.py
@@ -26,7 +26,7 @@ def apply_sequence_parallelism(
     local_rank: int,
     local_world_size: int,
     gradient_accumulation_steps: int,
-    ring_attn_func: RingAttnFunc,  # pylint: disable=unused-argument
+    ring_attn_func: RingAttnFunc,
 ) -> tuple[dict[str, torch.Tensor], int, int]:
     """
     Apply sequence parallelism slicing to a batch.
diff --git a/src/axolotl/utils/data/pretraining.py b/src/axolotl/utils/data/pretraining.py
index f3422f990..72c5536e9 100644
--- a/src/axolotl/utils/data/pretraining.py
+++ b/src/axolotl/utils/data/pretraining.py
@@ -67,7 +67,7 @@ def encode_pretraining(
     buffer_labels = torch.tensor([], dtype=torch.long)
     buffer_attention_mask = torch.tensor([], dtype=torch.long)
 
-    for ids, labels, mask in zip(input_ids, targets, attention_mask):
+    for ids, labels, mask in zip(input_ids, targets, attention_mask, strict=False):
         if buffer_input_ids.numel() == max_tokens:
             new_input_ids.append(buffer_input_ids)
             new_labels.append(buffer_labels)
@@ -247,7 +247,6 @@ def encode_packed_pretraining(
     batch_size: int = 4,
     multipack_attn: Optional[bool] = True,
 ) -> Dict[str, List]:
-    # pylint: disable=duplicate-code
     # tokenize all the examples
     # rows get split with stride (overlap)
     train_dataset = ds_wrapper(dataset=Dataset.from_dict(examples))[0]
diff --git a/src/axolotl/utils/data/rl.py b/src/axolotl/utils/data/rl.py
index 6fd539758..d371c9acb 100644
--- a/src/axolotl/utils/data/rl.py
+++ b/src/axolotl/utils/data/rl.py
@@ -255,7 +255,6 @@ def _load_split(cfg: DictDefault, split: Literal["train", "test"]) -> Dataset:
     return dataset
 
 
-# pylint: disable=duplicate-code
 def _load_or_create_dataset_split(
     cfg: DictDefault, tokenizer: PreTrainedTokenizer, split: Literal["train", "test"]
 ) -> Dataset:
diff --git a/src/axolotl/utils/data/shared.py b/src/axolotl/utils/data/shared.py
index 21c8e472b..1d7d37f15 100644
--- a/src/axolotl/utils/data/shared.py
+++ b/src/axolotl/utils/data/shared.py
@@ -337,7 +337,7 @@ def generate_split_fingerprints(
     dataset: Dataset, val_set_size: int | float, seed: int
 ) -> tuple[str, str]:
     """Generate consistent fingerprints for train/test splits."""
-    fingerprint = dataset._fingerprint  # pylint: disable=protected-access
+    fingerprint = dataset._fingerprint
 
     train_hash_input = f"{fingerprint}|{val_set_size}|train|{seed}"
     test_hash_input = f"{fingerprint}|{val_set_size}|test|{seed}"
@@ -497,7 +497,7 @@ def try_load_from_hub(
             token=cfg.hf_use_auth_token,
         )
         return dataset[split]
-    except Exception:  # pylint: disable=broad-except # nosec
+    except Exception:
         LOG.info("Unable to find prepared dataset in HuggingFace Hub")
         return None
 
diff --git a/src/axolotl/utils/data/utils.py b/src/axolotl/utils/data/utils.py
index 856a609c7..4868576a0 100644
--- a/src/axolotl/utils/data/utils.py
+++ b/src/axolotl/utils/data/utils.py
@@ -44,7 +44,7 @@ def retry_on_request_exceptions(
 
     def decorator(func):
         @functools.wraps(func)
-        def wrapper(*args, **kwargs):  # pylint: disable=inconsistent-return-statements
+        def wrapper(*args, **kwargs):
             for attempt in range(max_retries):
                 try:
                     return func(*args, **kwargs)
diff --git a/src/axolotl/utils/data/wrappers.py b/src/axolotl/utils/data/wrappers.py
index b6dc42c71..cb9e2c6b4 100644
--- a/src/axolotl/utils/data/wrappers.py
+++ b/src/axolotl/utils/data/wrappers.py
@@ -54,7 +54,6 @@ def handle_unknown_dataset_strategy(dataset_config: DictDefault) -> NoReturn:
     raise ValueError(error_message)
 
 
-# pylint: disable=too-many-return-statements
 def get_dataset_wrapper(
     dataset_config: DictDefault,
     tokenizer: PreTrainedTokenizer,
@@ -62,7 +61,7 @@ def get_dataset_wrapper(
     dataset_base_type: str | None,
     dataset: Dataset | IterableDataset,
     dataset_prompt_style: str | None = None,
-    processor: ProcessorMixin | None = None,  # pylint: disable=unused-argument
+    processor: ProcessorMixin | None = None,
 ) -> tuple[Dataset | IterableDataset, Prompter | None]:
     """Create an appropriate dataset wrapper and prompter based on dataset
     configuration.
diff --git a/src/axolotl/utils/dict.py b/src/axolotl/utils/dict.py
index c2670dfeb..7d146c7a9 100644
--- a/src/axolotl/utils/dict.py
+++ b/src/axolotl/utils/dict.py
@@ -17,15 +17,15 @@ class DictDefault(Dict):
     def __setitem__(self, name, value):
         # workaround for pickle/unpickle issues and __frozen not being available
         try:
-            isFrozen = hasattr(  # pylint: disable=invalid-name
+            isFrozen = hasattr(self, "__frozen") and object.__getattribute__(
                 self, "__frozen"
-            ) and object.__getattribute__(self, "__frozen")
+            )
         except AttributeError:
-            isFrozen = False  # pylint: disable=invalid-name
+            isFrozen = False
 
         if isFrozen and name not in super().keys():
             raise KeyError(name)
-        super(Dict, self).__setitem__(name, value)  # pylint: disable=bad-super-call
+        super(Dict, self).__setitem__(name, value)
         try:
             p = object.__getattribute__(self, "__parent")
             key = object.__getattribute__(self, "__key")
diff --git a/src/axolotl/utils/distributed.py b/src/axolotl/utils/distributed.py
index 48771fd97..840772d91 100644
--- a/src/axolotl/utils/distributed.py
+++ b/src/axolotl/utils/distributed.py
@@ -15,7 +15,7 @@ from transformers.utils.import_utils import (
     is_torch_npu_available,
 )
 
-distributed_state = None  # pylint: disable=invalid-name
+distributed_state = None
 
 
 def get_device_type() -> torch.device:
@@ -48,7 +48,7 @@ def get_current_device() -> int:
 
 
 def init_distributed_state():
-    global distributed_state  # pylint: disable=global-statement
+    global distributed_state
     if distributed_state is None:
         timeout = int(os.environ.get("AXOLOTL_NCCL_TIMEOUT", 1800))
         try:
@@ -137,7 +137,7 @@ def zero_first(is_main: bool):
         barrier()
 
 
-def gather_scalar_from_all_ranks(fn, world_size=1):  # pylint: disable=invalid-name
+def gather_scalar_from_all_ranks(fn, world_size=1):
     """
     Run a callable 'fn' on all ranks and gather the results on the specified rank.
 
@@ -201,7 +201,7 @@ def broadcast_dict(vals: dict):
     return vals
 
 
-def compute_and_broadcast(fn):  # pylint: disable=invalid-name
+def compute_and_broadcast(fn):
     """
     Compute a value using the function 'fn' only on the specified rank (default is 0).
     The value is then broadcasted to all other ranks.
@@ -234,7 +234,7 @@ def compute_and_broadcast(fn):  # pylint: disable=invalid-name
     return float(value_tensor.item())
 
 
-def gather_from_all_ranks(fn, world_size=1):  # pylint: disable=invalid-name
+def gather_from_all_ranks(fn, world_size=1):
     """
     Run a callable 'fn' on all ranks and gather the results on the specified rank.
 
diff --git a/src/axolotl/utils/environment.py b/src/axolotl/utils/environment.py
index 3c83c87cb..751f7e253 100644
--- a/src/axolotl/utils/environment.py
+++ b/src/axolotl/utils/environment.py
@@ -26,7 +26,7 @@ def check_cuda_p2p_ib_support():
                 for unsupported_device in unsupported_devices
             ):
                 return False
-    except Exception:  # pylint: disable=broad-except # nosec
+    except Exception:  # nosec B110
         pass
     return True
 
diff --git a/src/axolotl/utils/lora.py b/src/axolotl/utils/lora.py
index 759c17ac2..6ae481b6b 100644
--- a/src/axolotl/utils/lora.py
+++ b/src/axolotl/utils/lora.py
@@ -15,6 +15,7 @@
 """
 module to get the state dict of a merged lora model
 """
+
 import torch
 from peft.tuners.tuners_utils import onload_layer
 from peft.utils import ModulesToSaveWrapper, _get_submodules
diff --git a/src/axolotl/utils/mistral/mistral_tokenizer.py b/src/axolotl/utils/mistral/mistral_tokenizer.py
index 61cbdc5b0..0414ece78 100644
--- a/src/axolotl/utils/mistral/mistral_tokenizer.py
+++ b/src/axolotl/utils/mistral/mistral_tokenizer.py
@@ -53,7 +53,7 @@ class HFMistralTokenizer(MistralCommonTokenizer):
         """
         # Check if MistralRequestValidator has a _mode attribute.
         # This is a private API and may change in the future.
-        # pylint: disable=protected-access
+
         from mistral_common.protocol.instruct.validator import MistralRequestValidator
 
         if not (
@@ -74,7 +74,7 @@ class HFMistralTokenizer(MistralCommonTokenizer):
     def apply_chat_template(  # type: ignore
         self,
         conversation: list[dict] | list[list[dict]],
-        chat_template: str | None = None,  # pylint: disable=unused-argument
+        chat_template: str | None = None,
         add_generation_prompt: bool = False,
         **kwargs,
     ) -> str | list[int]:
diff --git a/src/axolotl/utils/model_shard_quant.py b/src/axolotl/utils/model_shard_quant.py
index 5c5006eda..f20a9625e 100644
--- a/src/axolotl/utils/model_shard_quant.py
+++ b/src/axolotl/utils/model_shard_quant.py
@@ -46,13 +46,11 @@ def _replace_linear(
 
         if isinstance(module, torch.nn.Linear) and name not in skip_modules:
             if issubclass(linear_replacement, Linear4bit):
-                model._modules[name] = (  # pylint: disable=protected-access
-                    linear_replacement(
-                        module.in_features,
-                        module.out_features,
-                        module.bias is not None,
-                        **kwargs,
-                    )
+                model._modules[name] = linear_replacement(
+                    module.in_features,
+                    module.out_features,
+                    module.bias is not None,
+                    **kwargs,
                 )
             else:
                 raise ValueError(
@@ -151,7 +149,7 @@ def load_sharded_model(
             model_name,
             use_cache=False,
             torch_dtype=torch.float32,
-            _attn_implementation=model_config._attn_implementation,  # pylint: disable=protected-access
+            _attn_implementation=model_config._attn_implementation,
             trust_remote_code=cfg.trust_remote_code,
         )
         dtype = torch_dtype if not cfg.float32 else None
diff --git a/src/axolotl/utils/optimizers/adopt.py b/src/axolotl/utils/optimizers/adopt.py
index 6f064abbf..20ddfa7ec 100644
--- a/src/axolotl/utils/optimizers/adopt.py
+++ b/src/axolotl/utils/optimizers/adopt.py
@@ -6,7 +6,6 @@ Taniguchi, Shohei and Harada, Keno and Minegishi, Gouki and Oshima, Yuta and Jeo
 """
 
 # mypy: ignore-errors
-# pylint: skip-file
 # flake8: noqa
 # mypy: allow-untyped-decorators
 # mypy: allow-untyped-defs
@@ -288,7 +287,9 @@ def _single_tensor_adopt(
             assert (
                 param.device.type == step_t.device.type
                 and param.device.type in capturable_supported_devices
-            ), f"If capturable=True, params and state_steps must be on supported devices: {capturable_supported_devices}."
+            ), (
+                f"If capturable=True, params and state_steps must be on supported devices: {capturable_supported_devices}."
+            )
 
         step = step_t if capturable or differentiable else _get_value(step_t)
 
@@ -365,7 +366,9 @@ def _multi_tensor_adopt(
             p.device.type == step.device.type
             and p.device.type in capturable_supported_devices
             for p, step in zip(params, state_steps)
-        ), f"If capturable=True, params and state_steps must be on supported devices: {capturable_supported_devices}."
+        ), (
+            f"If capturable=True, params and state_steps must be on supported devices: {capturable_supported_devices}."
+        )
 
     assert grad_scale is None and found_inf is None
 
diff --git a/src/axolotl/utils/samplers/multipack.py b/src/axolotl/utils/samplers/multipack.py
index af62c0a4f..d07988613 100644
--- a/src/axolotl/utils/samplers/multipack.py
+++ b/src/axolotl/utils/samplers/multipack.py
@@ -268,7 +268,7 @@ class MultipackBatchSampler(BatchSampler):
         num_processes: int | None = None,  # Number of processes for parallel packing
         safe_mode: bool = True,  # Conservative packing to prevent training instability
         mp_start_method: str = "fork",
-        **kwargs,  # pylint: disable=unused-argument
+        **kwargs,
     ):
         super().__init__(sampler, batch_size, drop_last)
         self.batch_size = batch_size
@@ -317,9 +317,7 @@ class MultipackBatchSampler(BatchSampler):
             return self._batches
 
         # Get indices from the sampler
-        indices = [  # pylint: disable=unnecessary-comprehension
-            idx for idx in self.sampler
-        ]
+        indices = [idx for idx in self.sampler]
 
         # Get lengths of the selected sequences
         lengths = self.lengths[indices]
@@ -417,7 +415,7 @@ class MultipackBatchSampler(BatchSampler):
 
         # Gather efficiency from all ranks and apply the calculation function
         sample_packing_actual_eff_all = reduce_and_broadcast(
-            lambda: float(self.efficiency()),  # pylint: disable=unnecessary-lambda
+            lambda: float(self.efficiency()),
             calc_sample_packing_eff_est,
         )
 
diff --git a/src/axolotl/utils/schedulers.py b/src/axolotl/utils/schedulers.py
index cdaf92271..83a993089 100644
--- a/src/axolotl/utils/schedulers.py
+++ b/src/axolotl/utils/schedulers.py
@@ -107,9 +107,7 @@ class InterpolatingLogScheduler(LRScheduler):
         self.num_steps = num_steps
         self.min_lr = min_lr
         self.max_lr = max_lr
-        self.q = (max_lr / min_lr) ** (  # pylint: disable=invalid-name
-            1 / (num_steps - 1)
-        )
+        self.q = (max_lr / min_lr) ** (1 / (num_steps - 1))
         super().__init__(optimizer, last_epoch)
 
     def get_lr(self):
@@ -310,7 +308,6 @@ class JaggedLRRestartScheduler(LRScheduler):
         jagged_restart_anneal_steps: int = 1,
         min_lr_scale: float = 0.001,
     ) -> None:
-        # pylint: disable=duplicate-code
         self.inner_schedule = inner_schedule
         self.restarts_steps = jagged_restart_steps
         self.warmup_steps = jagged_restart_warmup_steps
diff --git a/src/axolotl/utils/schemas/config.py b/src/axolotl/utils/schemas/config.py
index a607b3dca..4d660d4b7 100644
--- a/src/axolotl/utils/schemas/config.py
+++ b/src/axolotl/utils/schemas/config.py
@@ -1,7 +1,5 @@
 """Module with Pydantic models for configuration."""
 
-# pylint: disable=too-many-lines
-
 from typing import Annotated, Any, Literal
 
 from annotated_types import MinLen
@@ -51,7 +49,6 @@ from axolotl.utils.schemas.vllm import VllmConfig
 LOG = get_logger(__name__)
 
 
-# pylint: disable=too-many-ancestors
 class AxolotlInputConfig(
     ModelInputConfig,
     ModelOutputConfig,
@@ -124,10 +121,10 @@ class AxolotlInputConfig(
         },
     )
     trl: TRLConfig | None = Field(
-        default_factory=lambda: TRLConfig(),  # pylint: disable=unnecessary-lambda
+        default_factory=lambda: TRLConfig(),
     )
     vllm: VllmConfig | None = Field(
-        default_factory=lambda: VllmConfig(),  # pylint: disable=unnecessary-lambda
+        default_factory=lambda: VllmConfig(),
     )
     qat: QATConfig | None = None
     quantization: PTQConfig | None = None
@@ -1035,7 +1032,6 @@ class AxolotlConfigWCapabilities(AxolotlInputConfig):
 
         return data
 
-    # pylint: disable=duplicate-code
     @model_validator(mode="before")
     @classmethod
     def check_multigpu_unsloth(cls, data):
@@ -1051,7 +1047,6 @@ class AxolotlConfigWCapabilities(AxolotlInputConfig):
                 )
         return data
 
-    # pylint: disable=duplicate-code
     @model_validator(mode="before")
     @classmethod
     def check_multigpu_lora_kernels(cls, data):
diff --git a/src/axolotl/utils/schemas/datasets.py b/src/axolotl/utils/schemas/datasets.py
index d9c8042d4..e32468706 100644
--- a/src/axolotl/utils/schemas/datasets.py
+++ b/src/axolotl/utils/schemas/datasets.py
@@ -203,7 +203,6 @@ class SFTDataset(BaseModel):
 
     @model_validator(mode="before")
     @classmethod
-    # pylint: disable=duplicate-code
     def check_chat_template_config(cls, data):
         if isinstance(data, BaseModel):
             data = data.model_dump()
diff --git a/src/axolotl/utils/schemas/enums.py b/src/axolotl/utils/schemas/enums.py
index cf2a8b484..8f4718aa9 100644
--- a/src/axolotl/utils/schemas/enums.py
+++ b/src/axolotl/utils/schemas/enums.py
@@ -1,7 +1,5 @@
 """Enums for Axolotl input config"""
 
-# pylint: disable=invalid-name
-
 from enum import Enum
 
 import torch
diff --git a/src/axolotl/utils/schemas/training.py b/src/axolotl/utils/schemas/training.py
index b1788dcaa..8e06e82cb 100644
--- a/src/axolotl/utils/schemas/training.py
+++ b/src/axolotl/utils/schemas/training.py
@@ -96,9 +96,9 @@ class HyperparametersConfig(BaseModel):
             "description": "Path to torch distx for optim 'adamw_anyprecision'"
         },
     )
-    lr_scheduler: (SchedulerType | Literal["one_cycle"] | Literal["rex"]) | None = (
-        SchedulerType.COSINE
-    )
+    lr_scheduler: (
+        SchedulerType | Literal["one_cycle"] | Literal["rex"]
+    ) | None = SchedulerType.COSINE
     lr_scheduler_kwargs: dict[str, Any] | None = Field(
         default=None,
         json_schema_extra={
diff --git a/src/axolotl/utils/schemas/validation.py b/src/axolotl/utils/schemas/validation.py
index 217244b01..791894990 100644
--- a/src/axolotl/utils/schemas/validation.py
+++ b/src/axolotl/utils/schemas/validation.py
@@ -1,7 +1,5 @@
 """Module with validation methods for config pydantic model."""
 
-# pylint: disable=too-many-boolean-expressions
-
 import json
 import sys
 import tempfile
@@ -16,7 +14,6 @@ from transformers.utils.import_utils import is_torch_npu_available
 from axolotl.utils.logging import get_logger
 from axolotl.utils.schemas.enums import ChatTemplate, RingAttnFunc, RLType
 
-# pylint: disable=too-many-lines
 
 LOG = get_logger(__name__)
 
@@ -346,7 +343,6 @@ class TrainingValidationMixin:
     @model_validator(mode="after")
     def check_fft_possible_bad_config(self):
         if (
-            # pylint: disable=too-many-boolean-expressions
             not (self.bf16 or self.bfloat16)
             and (self.fp16 or self.float16)
             and not self.adapter
@@ -460,12 +456,12 @@ class TrainingValidationMixin:
     @classmethod
     def check_mistral_common_import(cls, tokenizer_use_mistral_common):
         if tokenizer_use_mistral_common:
-            try:
-                import mistral_common  # noqa: F401 # pylint:disable=unused-import
-            except ImportError as exception:
+            import importlib.util
+
+            if importlib.util.find_spec("mistral_common") is None:
                 raise ImportError(
                     "mistral-common is required for mistral models. Please install it with `pip install axolotl` or `pip install -e .`."
-                ) from exception
+                )
 
         return tokenizer_use_mistral_common
 
@@ -685,7 +681,7 @@ class RLValidationMixin:
         # TODO: SalmanMohammadi
         # Distributed RL with QLoRA + gradient checkpointing
         # and use_reentrant = True is broken upstream in TRL
-        # pylint: disable=too-many-boolean-expressions
+
         if (
             data.get("rl")
             and data.get("gradient_checkpointing")
@@ -1252,26 +1248,19 @@ class ComplexValidationMixin:
                 import transformers.modeling_flash_attention_utils
                 from transformers.utils import is_flash_attn_greater_or_equal
 
-                # pylint: disable=protected-access
                 transformers.modeling_flash_attention_utils._flash_supports_window = (
                     True
                 )
-                setattr(
-                    sys.modules["transformers.modeling_flash_attention_utils"],
-                    "_flash_supports_window",
-                    True,
-                )
-                setattr(
-                    sys.modules["transformers.modeling_flash_attention_utils"],
-                    "_flash_supports_window_size",
-                    True,
-                )
-                setattr(
-                    sys.modules["transformers.modeling_flash_attention_utils"],
-                    "is_flash_attn_greater_or_equal",
-                    is_flash_attn_greater_or_equal,
-                )
-                import ring_flash_attn  # noqa: F401 # pylint:disable=unused-import
+                sys.modules[
+                    "transformers.modeling_flash_attention_utils"
+                ]._flash_supports_window = True
+                sys.modules[
+                    "transformers.modeling_flash_attention_utils"
+                ]._flash_supports_window_size = True
+                sys.modules[
+                    "transformers.modeling_flash_attention_utils"
+                ].is_flash_attn_greater_or_equal = is_flash_attn_greater_or_equal
+                import ring_flash_attn  # noqa: F401  # Required after monkey-patching
             except ImportError as exception:
                 raise ImportError(
                     "context_parallel_size > 1 but ring_flash_attn is not installed. "
@@ -1336,7 +1325,6 @@ class GRPOVllmValidationMixin:
         return self
 
 
-# pylint: disable=too-many-ancestors
 class ValidationMixin(
     DatasetValidationMixin,
     AttentionValidationMixin,
diff --git a/src/axolotl/utils/tokenization.py b/src/axolotl/utils/tokenization.py
index 3526bd5b5..3f44a3429 100644
--- a/src/axolotl/utils/tokenization.py
+++ b/src/axolotl/utils/tokenization.py
@@ -31,7 +31,7 @@ def check_example_labels(example, tokenizer, text_only=False):
     # You can compare the input_ids and labels element-wise
     # Remember to ignore positions with IGNORE_TOKEN_ID (if you use it) or attention_mask equal to 0
     colored_tokens = []
-    for _, (input_id, label_id) in enumerate(zip(input_ids, labels)):
+    for _, (input_id, label_id) in enumerate(zip(input_ids, labels, strict=False)):
         decoded_input_token = tokenizer.decode(input_id)
         # Choose the color based on whether the label has the ignore value or not
         color = "red" if label_id == -100 else ("yellow" if label_id == 0 else "green")
diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py
index e424cb55a..08038cb18 100644
--- a/src/axolotl/utils/trainer.py
+++ b/src/axolotl/utils/trainer.py
@@ -496,7 +496,7 @@ def calculate_total_num_steps(cfg, train_dataset, update=True):
                 return max(estimates)
 
             sample_packing_actual_eff_all = reduce_and_broadcast(
-                lambda: sampler.efficiency(),  # pylint: disable=unnecessary-lambda
+                lambda: sampler.efficiency(),
                 calc_sample_packing_eff_est,
             )
             sample_packing_eff_est = (
diff --git a/src/setuptools_axolotl_dynamic_dependencies.py b/src/setuptools_axolotl_dynamic_dependencies.py
index 02a5b8083..ccd7c72d7 100644
--- a/src/setuptools_axolotl_dynamic_dependencies.py
+++ b/src/setuptools_axolotl_dynamic_dependencies.py
@@ -9,7 +9,6 @@ from importlib.metadata import PackageNotFoundError, version
 from setuptools.command.build_py import build_py as _build_py
 
 
-# pylint: disable=duplicate-code
 def parse_requirements():
     _install_requires = []
     _dependency_links = []
diff --git a/tests/cli/test_cli_evaluate.py b/tests/cli/test_cli_evaluate.py
index a191bf957..e8b88625a 100644
--- a/tests/cli/test_cli_evaluate.py
+++ b/tests/cli/test_cli_evaluate.py
@@ -1,7 +1,5 @@
 """Tests for evaluate CLI command."""
 
-# pylint: disable=duplicate-code
-
 from unittest.mock import patch
 
 from axolotl.cli.main import cli
@@ -31,7 +29,6 @@ class TestEvaluateCommand(BaseCliTest):
         config_path = tmp_path / "config.yml"
         config_path.write_text(valid_test_config)
 
-        # pylint: disable=duplicate-code
         with patch("axolotl.cli.evaluate.do_evaluate") as mock_evaluate:
             result = cli_runner.invoke(
                 cli,
diff --git a/tests/cli/test_cli_inference.py b/tests/cli/test_cli_inference.py
index 3394c189d..807dc7fa3 100644
--- a/tests/cli/test_cli_inference.py
+++ b/tests/cli/test_cli_inference.py
@@ -1,7 +1,5 @@
 """pytest tests for axolotl CLI inference command."""
 
-# pylint: disable=duplicate-code
-
 from unittest.mock import patch
 
 from axolotl.cli.main import cli
diff --git a/tests/cli/test_cli_merge_sharded_fsdp_weights.py b/tests/cli/test_cli_merge_sharded_fsdp_weights.py
index 4f6a973ea..de13b28ed 100644
--- a/tests/cli/test_cli_merge_sharded_fsdp_weights.py
+++ b/tests/cli/test_cli_merge_sharded_fsdp_weights.py
@@ -1,7 +1,5 @@
 """pytest tests for axolotl CLI merge_sharded_fsdp_weights command."""
 
-# pylint: disable=duplicate-code
-
 from unittest.mock import patch
 
 from axolotl.cli.main import cli
diff --git a/tests/cli/test_cli_train.py b/tests/cli/test_cli_train.py
index d4d90f57f..1251ab3c0 100644
--- a/tests/cli/test_cli_train.py
+++ b/tests/cli/test_cli_train.py
@@ -1,7 +1,5 @@
 """Tests for train CLI command."""
 
-# pylint: disable=duplicate-code
-
 from unittest.mock import MagicMock, patch
 
 from axolotl.cli.main import cli
diff --git a/tests/cli/test_utils.py b/tests/cli/test_utils.py
index a3e4e9887..431c35c3c 100644
--- a/tests/cli/test_utils.py
+++ b/tests/cli/test_utils.py
@@ -1,7 +1,5 @@
 """pytest tests for axolotl CLI utils."""
 
-# pylint: disable=redefined-outer-name
-
 import json
 from unittest.mock import Mock, patch
 
@@ -25,7 +23,7 @@ MOCK_TREE_RESPONSE = {
 def mock_responses():
     """Mock responses for API and file downloads"""
 
-    def mock_get(url, timeout=None):  # pylint: disable=unused-argument
+    def mock_get(url, timeout=None):
         response = Mock()
         if "api.github.com" in url:
             response.text = json.dumps(MOCK_TREE_RESPONSE)
@@ -93,21 +91,21 @@ def assert_launcher_args_in_command(
     called_cmd = mock_subprocess_call.call_args.args[0]
 
     # Verify launcher
-    assert (
-        called_cmd[0] == launcher
-    ), f"Expected launcher {launcher}, got {called_cmd[0]}"
+    assert called_cmd[0] == launcher, (
+        f"Expected launcher {launcher}, got {called_cmd[0]}"
+    )
 
     # Verify launcher args are present
     for arg in expected_launcher_args:
-        assert (
-            arg in called_cmd
-        ), f"Expected launcher arg '{arg}' not found in command: {called_cmd}"
+        assert arg in called_cmd, (
+            f"Expected launcher arg '{arg}' not found in command: {called_cmd}"
+        )
 
     # Verify module is present
     assert "-m" in called_cmd, "Expected -m flag for module execution"
-    assert (
-        command_module in called_cmd
-    ), f"Expected module {command_module} not found in command: {called_cmd}"
+    assert command_module in called_cmd, (
+        f"Expected module {command_module} not found in command: {called_cmd}"
+    )
 
 
 def assert_no_launcher_args_contamination(mock_subprocess_call, launcher: str):
@@ -126,17 +124,17 @@ def assert_no_launcher_args_contamination(mock_subprocess_call, launcher: str):
         launch_idx = called_cmd.index("launch")
         m_idx = called_cmd.index("-m")
         launcher_section = called_cmd[launch_idx + 1 : m_idx]
-        assert (
-            len(launcher_section) == 0
-        ), f"Unexpected launcher args found: {launcher_section}"
+        assert len(launcher_section) == 0, (
+            f"Unexpected launcher args found: {launcher_section}"
+        )
     elif launcher == "torchrun":
         # For torchrun, launcher args should be between 'torchrun' and '-m'
         torchrun_idx = called_cmd.index("torchrun")
         m_idx = called_cmd.index("-m")
         launcher_section = called_cmd[torchrun_idx + 1 : m_idx]
-        assert (
-            len(launcher_section) == 0
-        ), f"Unexpected launcher args found: {launcher_section}"
+        assert len(launcher_section) == 0, (
+            f"Unexpected launcher args found: {launcher_section}"
+        )
 
 
 @pytest.fixture
diff --git a/tests/conftest.py b/tests/conftest.py
index 9e1af318d..98847ebad 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -33,10 +33,9 @@ logging.getLogger("filelock").setLevel(logging.CRITICAL)
 
 
 def retry_on_request_exceptions(max_retries=3, delay=1):
-    # pylint: disable=duplicate-code
     def decorator(func):
         @functools.wraps(func)
-        def wrapper(*args, **kwargs):  # pylint: disable=inconsistent-return-statements
+        def wrapper(*args, **kwargs):
             for attempt in range(max_retries):
                 try:
                     return func(*args, **kwargs)
@@ -171,7 +170,7 @@ def download_argilla_distilabel_intel_orca_dpo_dataset():
 # @disable_hf_offline
 # def dataset_fozzie_alpaca_dpo_dataset(
 #     download_fozzie_alpaca_dpo_dataset,
-# ):  # pylint: disable=unused-argument,redefined-outer-name
+# ):
 #     return load_dataset("fozziethebeat/alpaca_messages_2k_dpo_test", split="train")
 #
 #
@@ -179,7 +178,7 @@ def download_argilla_distilabel_intel_orca_dpo_dataset():
 # @disable_hf_offline
 # def dataset_fozzie_alpaca_dpo_dataset_rev_ea82cff(
 #     download_fozzie_alpaca_dpo_dataset,
-# ):  # pylint: disable=unused-argument,redefined-outer-name
+# ):
 #     return load_dataset(
 #         "fozziethebeat/alpaca_messages_2k_dpo_test", split="train", revision="ea82cff"
 #     )
@@ -359,7 +358,7 @@ def download_llama32_1b_model_fixture():
 @enable_hf_offline
 def tokenizer_huggyllama(
     download_huggyllama_model_fixture,
-):  # pylint: disable=unused-argument,redefined-outer-name
+):
     tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b")
     tokenizer.pad_token = "</s>"
 
@@ -370,7 +369,7 @@ def tokenizer_huggyllama(
 @enable_hf_offline
 def tokenizer_huggyllama_w_special_tokens(
     tokenizer_huggyllama,
-):  # pylint: disable=redefined-outer-name
+):
     tokenizer_huggyllama.add_special_tokens(
         {
             "bos_token": "<s>",
@@ -386,7 +385,7 @@ def tokenizer_huggyllama_w_special_tokens(
 @enable_hf_offline
 def tokenizer_llama2_7b(
     download_llama2_model_fixture,
-):  # pylint: disable=unused-argument,redefined-outer-name
+):
     tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-hf")
 
     return tokenizer
@@ -396,7 +395,7 @@ def tokenizer_llama2_7b(
 @enable_hf_offline
 def tokenizer_mistral_7b_instruct(
     download_mlx_mistral_7b_model_fixture,
-):  # pylint: disable=unused-argument,redefined-outer-name
+):
     return AutoTokenizer.from_pretrained("casperhansen/mistral-7b-instruct-v0.1-awq")
 
 
@@ -442,9 +441,7 @@ def cleanup_monkeypatches():
     # original_fa2_forward = LlamaFlashAttention2.forward
     original_llama_attn_forward = LlamaAttention.forward
     original_llama_forward = LlamaForCausalLM.forward
-    original_trainer_inner_training_loop = (
-        Trainer._inner_training_loop  # pylint: disable=protected-access
-    )
+    original_trainer_inner_training_loop = Trainer._inner_training_loop
     original_trainer_training_step = Trainer.training_step
     # monkey patches can happen inside the tests
     yield
@@ -452,9 +449,7 @@ def cleanup_monkeypatches():
     # LlamaFlashAttention2.forward = original_fa2_forward
     LlamaAttention.forward = original_llama_attn_forward
     LlamaForCausalLM.forward = original_llama_forward
-    Trainer._inner_training_loop = (  # pylint: disable=protected-access
-        original_trainer_inner_training_loop
-    )
+    Trainer._inner_training_loop = original_trainer_inner_training_loop
     Trainer.training_step = original_trainer_training_step
 
     # Reset other known monkeypatches
@@ -490,7 +485,7 @@ def cleanup_monkeypatches():
 @pytest.fixture
 def dataset_winglian_tiny_shakespeare(
     download_ds_fixture_bundle: Path,
-):  # pylint: disable=redefined-outer-name
+):
     ds_path = download_ds_fixture_bundle / "winglian__tiny-shakespeare"
     return datasets.load_from_disk(ds_path)
 
@@ -498,7 +493,7 @@ def dataset_winglian_tiny_shakespeare(
 @pytest.fixture
 def dataset_tatsu_lab_alpaca(
     download_ds_fixture_bundle: Path,
-):  # pylint: disable=redefined-outer-name
+):
     ds_path = download_ds_fixture_bundle / "tatsu-lab__alpaca"
     return datasets.load_from_disk(ds_path)["train"]
 
@@ -506,7 +501,7 @@ def dataset_tatsu_lab_alpaca(
 @pytest.fixture
 def dataset_mhenrichsen_alpaca_2k_test(
     download_ds_fixture_bundle: Path,
-):  # pylint: disable=redefined-outer-name
+):
     ds_path = download_ds_fixture_bundle / "mhenrichsen__alpaca_2k_test"
     return datasets.load_from_disk(ds_path)["train"]
 
@@ -514,7 +509,7 @@ def dataset_mhenrichsen_alpaca_2k_test(
 @pytest.fixture
 def dataset_argilla_ultrafeedback_binarized_preferences_cleaned(
     download_ds_fixture_bundle: Path,
-):  # pylint: disable=redefined-outer-name
+):
     ds_path = (
         download_ds_fixture_bundle
         / "argilla__ultrafeedback-binarized-preferences-cleaned"
@@ -525,7 +520,7 @@ def dataset_argilla_ultrafeedback_binarized_preferences_cleaned(
 @pytest.fixture
 def dataset_fozziethebeat_alpaca_messages_2k_dpo_test(
     download_ds_fixture_bundle: Path,
-):  # pylint: disable=redefined-outer-name
+):
     ds_path = download_ds_fixture_bundle / "fozziethebeat__alpaca_messages_2k_dpo_test"
     return datasets.load_from_disk(ds_path)["train"]
 
@@ -533,7 +528,7 @@ def dataset_fozziethebeat_alpaca_messages_2k_dpo_test(
 @pytest.fixture
 def dataset_fozziethebeat_alpaca_messages_2k_dpo_test_rev_ea82cff(
     download_ds_fixture_bundle: Path,
-):  # pylint: disable=redefined-outer-name
+):
     ds_path = (
         download_ds_fixture_bundle
         / "fozziethebeat__alpaca_messages_2k_dpo_test__rev_ea82cff"
@@ -557,7 +552,7 @@ def fixture_min_base_cfg():
     )
 
 
-# # pylint: disable=redefined-outer-name,unused-argument
+#
 @pytest.mark.skipif(
     os.environ.get("AXOLOTL_IS_CI_CACHE_PRELOAD", "-1") != "1",
     reason="Not running in CI cache preload",
diff --git a/tests/constants.py b/tests/constants.py
index e024e6920..cd75bd339 100644
--- a/tests/constants.py
+++ b/tests/constants.py
@@ -3,6 +3,7 @@
 This module contains constants and configuration dictionaries used for
 datasets and other utilities in the Axolotl project, specifically for testing.
 """
+
 # Configuration for Alpaca Messages Dataset
 ALPACA_MESSAGES_CONFIG_OG = {
     "path": "fozziethebeat/alpaca_messages_2k_dpo_test",
diff --git a/tests/core/test_builders.py b/tests/core/test_builders.py
index fab01a644..6428aa977 100644
--- a/tests/core/test_builders.py
+++ b/tests/core/test_builders.py
@@ -1,7 +1,5 @@
 """Unit tests for axolotl.core.builders"""
 
-# pylint: disable=protected-access
-
 import sys
 from pathlib import Path
 from unittest.mock import patch
@@ -330,7 +328,6 @@ def rand_reward_func(prompts, completions) -> list[float]:
         )
 
     def test_grpo_training_arguments(self, grpo_cfg, model, tokenizer, tmp_path):
-
         rewards_dir = tmp_path / "rewards_test"
         self._write_rewards_file(rewards_dir)
 
@@ -477,7 +474,7 @@ def rand_reward_func(prompts, completions) -> list[float]:
 
             assert trainer.optimizer_cls_and_kwargs is not None
 
-            from axolotl.contribs.mit.muon import (  # pylint: disable=no-name-in-module
+            from axolotl.contribs.mit.muon import (
                 Muon,
                 MuonOptimizerFactory,
             )
@@ -559,7 +556,7 @@ class TestHFCausalTrainerBuilder:
 
         assert trainer.optimizer_cls_and_kwargs is not None
 
-        from axolotl.contribs.mit.muon import (  # pylint: disable=no-name-in-module
+        from axolotl.contribs.mit.muon import (
             Muon,
             MuonOptimizerFactory,
         )
@@ -599,6 +596,6 @@ class TestTrainerClsPlugin:
         except TypeError as e:
             # Error raised if trainer_cls is None
             assert "'tuple' object has no attribute 'config'" not in str(e)
-        except Exception:  # pylint: disable=broad-exception-caught
+        except Exception:
             # Another error happens, so we passed trainer_cls to builder
             pass
diff --git a/tests/e2e/integrations/test_cut_cross_entropy.py b/tests/e2e/integrations/test_cut_cross_entropy.py
index 34e6c9644..1ba05077c 100644
--- a/tests/e2e/integrations/test_cut_cross_entropy.py
+++ b/tests/e2e/integrations/test_cut_cross_entropy.py
@@ -12,8 +12,6 @@ from axolotl.utils.dict import DictDefault
 
 from ..utils import check_model_output_exists
 
-# pylint: disable=duplicate-code
-
 
 @pytest.fixture()
 def min_cfg(temp_dir):
@@ -53,7 +51,6 @@ class TestCutCrossEntropyIntegration:
     e2e tests for cut_cross_entropy integration with Axolotl
     """
 
-    # pylint: disable=redefined-outer-name
     def test_llama_w_cce(self, min_cfg, temp_dir):
         cfg = DictDefault(min_cfg)
         cfg = validate_config(cfg)
@@ -69,7 +66,6 @@ class TestCutCrossEntropyIntegration:
             train(cfg=cfg, dataset_meta=dataset_meta)
             check_model_output_exists(temp_dir, cfg)
 
-    # pylint: disable=redefined-outer-name
     def test_qwen2_w_cce(self, temp_dir):
         cfg = DictDefault(
             {
diff --git a/tests/e2e/integrations/test_fp8.py b/tests/e2e/integrations/test_fp8.py
index 0302b7e35..7db63cc4d 100644
--- a/tests/e2e/integrations/test_fp8.py
+++ b/tests/e2e/integrations/test_fp8.py
@@ -18,7 +18,7 @@ class FP8IntegrationTestCase:
     @require_torch_2_7_0
     def test_fp8_single_gpu_smoke(self, temp_dir):
         """Smoke test for single GPU FP8 + torch.compile training"""
-        # pylint: disable=duplicate-code
+
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -53,7 +53,6 @@ class FP8IntegrationTestCase:
             }
         )
 
-        # pylint: disable=duplicate-code
         cfg = validate_config(cfg)
         normalize_config(cfg)
         dataset_meta = load_datasets(cfg=cfg)
diff --git a/tests/e2e/integrations/test_hooks.py b/tests/e2e/integrations/test_hooks.py
index 8743efb98..b85505caa 100644
--- a/tests/e2e/integrations/test_hooks.py
+++ b/tests/e2e/integrations/test_hooks.py
@@ -28,85 +28,81 @@ class LogHooksPlugin(BasePlugin):
         except FileNotFoundError:
             pass
 
-    def post_trainer_create(self, cfg, trainer):  # pylint: disable=unused-argument
+    def post_trainer_create(self, cfg, trainer):
         with open(
             self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
         ) as f:
             f.write("post_trainer_create\n")
 
-    def pre_model_load(self, cfg):  # pylint: disable=unused-argument
+    def pre_model_load(self, cfg):
         with open(
             self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
         ) as f:
             f.write("pre_model_load\n")
 
-    def post_model_build(self, cfg, model):  # pylint: disable=unused-argument
+    def post_model_build(self, cfg, model):
         with open(
             self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
         ) as f:
             f.write("post_model_build\n")
 
-    def pre_lora_load(self, cfg, model):  # pylint: disable=unused-argument
+    def pre_lora_load(self, cfg, model):
         with open(
             self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
         ) as f:
             f.write("pre_lora_load\n")
 
-    def post_lora_load(self, cfg, model):  # pylint: disable=unused-argument
+    def post_lora_load(self, cfg, model):
         with open(
             self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
         ) as f:
             f.write("post_lora_load\n")
 
-    def post_model_load(self, cfg, model):  # pylint: disable=unused-argument
+    def post_model_load(self, cfg, model):
         with open(
             self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
         ) as f:
             f.write("post_model_load\n")
 
-    def create_optimizer(self, cfg, trainer):  # pylint: disable=unused-argument
+    def create_optimizer(self, cfg, trainer):
         with open(
             self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
         ) as f:
             f.write("create_optimizer\n")
 
-    def get_trainer_cls(self, cfg):  # pylint: disable=unused-argument
+    def get_trainer_cls(self, cfg):
         with open(
             self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
         ) as f:
             f.write("get_trainer_cls\n")
 
-    def create_lr_scheduler(
-        self, cfg, trainer, optimizer, num_training_steps
-    ):  # pylint: disable=unused-argument
+    def create_lr_scheduler(self, cfg, trainer, optimizer, num_training_steps):
         with open(
             self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
         ) as f:
             f.write("create_lr_scheduler\n")
 
-    def add_callbacks_pre_trainer(self, cfg, model):  # pylint: disable=unused-argument
+    def add_callbacks_pre_trainer(self, cfg, model):
         with open(
             self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
         ) as f:
             f.write("add_callbacks_pre_trainer\n")
         return []
 
-    def add_callbacks_post_trainer(
-        self, cfg, trainer
-    ):  # pylint: disable=unused-argument
+    def add_callbacks_post_trainer(self, cfg, trainer):
         with open(
             self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
         ) as f:
             f.write("add_callbacks_post_trainer\n")
         return []
 
-    def post_train(self, cfg, model):  # pylint: disable=unused-argument
+    def post_train(self, cfg, model):
         with open(
             self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
         ) as f:
             f.write("post_train\n")
 
-    def post_train_unload(self, cfg):  # pylint: disable=unused-argument
+    def post_train_unload(self, cfg):
         with open(
             self.base_dir.joinpath("plugin_hooks.log"), "a", encoding="utf-8"
         ) as f:
@@ -119,7 +115,6 @@ class TestPluginHooks:
     """
 
     def test_plugin_hooks(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/integrations/test_kd.py b/tests/e2e/integrations/test_kd.py
index 1ac3b537e..98383614b 100644
--- a/tests/e2e/integrations/test_kd.py
+++ b/tests/e2e/integrations/test_kd.py
@@ -81,7 +81,7 @@ class TestKnowledgeDistillation:
     @require_torch_2_5_1
     def test_llama_kd(self, temp_dir, kd_min_cfg):
         cfg = DictDefault(kd_min_cfg)
-        # pylint: disable=duplicate-code
+
         # write cfg to yaml file
         Path(temp_dir).mkdir(parents=True, exist_ok=True)
         with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout:
@@ -123,7 +123,7 @@ class TestKnowledgeDistillation:
             }
             | kd_min_cfg
         )
-        # pylint: disable=duplicate-code
+
         # write cfg to yaml file
         Path(temp_dir).mkdir(parents=True, exist_ok=True)
         with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout:
diff --git a/tests/e2e/integrations/test_liger.py b/tests/e2e/integrations/test_liger.py
index b1f5befdd..285969963 100644
--- a/tests/e2e/integrations/test_liger.py
+++ b/tests/e2e/integrations/test_liger.py
@@ -17,7 +17,6 @@ class LigerIntegrationTestCase:
 
     @require_torch_2_4_1
     def test_llama_wo_flce(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -53,7 +52,7 @@ class LigerIntegrationTestCase:
                 "save_first_step": False,
             }
         )
-        # pylint: disable=duplicate-code
+
         cfg = validate_config(cfg)
         prepare_plugins(cfg)
         normalize_config(cfg)
@@ -64,7 +63,6 @@ class LigerIntegrationTestCase:
 
     @require_torch_2_4_1
     def test_llama_w_flce(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -100,7 +98,7 @@ class LigerIntegrationTestCase:
                 "save_first_step": False,
             }
         )
-        # pylint: disable=duplicate-code
+
         cfg = validate_config(cfg)
         prepare_plugins(cfg)
         normalize_config(cfg)
diff --git a/tests/e2e/kernels/test_geglu.py b/tests/e2e/kernels/test_geglu.py
index 4094a8ce7..78ba74c0e 100644
--- a/tests/e2e/kernels/test_geglu.py
+++ b/tests/e2e/kernels/test_geglu.py
@@ -85,6 +85,6 @@ def test_geglu_inplace_preservation():
 
     assert not torch.equal(gate, gate_copy), "Gate should be modified in-place"
     assert not torch.equal(up, up_copy), "Up should be modified in-place"
-    assert not torch.equal(
-        grad_output, grad_copy
-    ), "Grad output should be modified in-place"
+    assert not torch.equal(grad_output, grad_copy), (
+        "Grad output should be modified in-place"
+    )
diff --git a/tests/e2e/kernels/test_lora.py b/tests/e2e/kernels/test_lora.py
index cd6131ff1..9baceb668 100644
--- a/tests/e2e/kernels/test_lora.py
+++ b/tests/e2e/kernels/test_lora.py
@@ -1,7 +1,5 @@
 """Tests for LoRA custom autograd."""
 
-# pylint: disable=invalid-name,redefined-outer-name
-
 import pytest
 import torch
 from bitsandbytes.functional import QuantState
@@ -333,7 +331,7 @@ def test_lora_qkv(sample_tensors):
     X.requires_grad = True
 
     # Test without LoRA adapters
-    # pylint: disable=duplicate-code
+
     Q1, K1, V1 = LoRA_QKV.apply(
         X,
         q_weight,
diff --git a/tests/e2e/kernels/test_quantize.py b/tests/e2e/kernels/test_quantize.py
index ea91407ef..60396584c 100644
--- a/tests/e2e/kernels/test_quantize.py
+++ b/tests/e2e/kernels/test_quantize.py
@@ -1,7 +1,5 @@
 """Tests for quantization utility functions."""
 
-# pylint: disable=invalid-name
-
 import torch
 from bitsandbytes.functional import QuantState
 
diff --git a/tests/e2e/kernels/test_swiglu.py b/tests/e2e/kernels/test_swiglu.py
index 60fdafb79..58d5e04a7 100644
--- a/tests/e2e/kernels/test_swiglu.py
+++ b/tests/e2e/kernels/test_swiglu.py
@@ -1,7 +1,5 @@
 """Tests for SwiGLU activation function Triton kernels."""
 
-# pylint: disable=duplicate-code
-
 import torch
 import torch.nn.functional as F
 
@@ -74,6 +72,6 @@ def test_swiglu_inplace_preservation():
 
     assert not torch.equal(gate, gate_copy), "Gate should be modified in-place"
     assert not torch.equal(up, up_copy), "Up should be modified in-place"
-    assert not torch.equal(
-        grad_output, grad_copy
-    ), "Grad output should be modified in-place"
+    assert not torch.equal(grad_output, grad_copy), (
+        "Grad output should be modified in-place"
+    )
diff --git a/tests/e2e/multigpu/solo/test_flex.py b/tests/e2e/multigpu/solo/test_flex.py
index cbdf8de96..881d75c25 100644
--- a/tests/e2e/multigpu/solo/test_flex.py
+++ b/tests/e2e/multigpu/solo/test_flex.py
@@ -31,7 +31,6 @@ class TestPackedFlex:
 
     @require_torch_2_6_0
     def test_loss_llama(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/multigpu/solo/test_grpo.py b/tests/e2e/multigpu/solo/test_grpo.py
index 92e0f7040..b48eb30e1 100644
--- a/tests/e2e/multigpu/solo/test_grpo.py
+++ b/tests/e2e/multigpu/solo/test_grpo.py
@@ -80,7 +80,7 @@ def start_vllm(
     cmd_env = env.copy()
     cmd_env.update({"VLLM_LOGGING_CONFIG_PATH": vllm_logging_json})
     # start `trl vllm-serve` command in the background and capture the process id
-    process = subprocess.Popen(  # pylint: disable=consider-using-with
+    process = subprocess.Popen(
         cmd,
         env=cmd_env,
         stdout=subprocess.DEVNULL if quiet else subprocess.PIPE,
diff --git a/tests/e2e/multigpu/test_eval.py b/tests/e2e/multigpu/test_eval.py
index 4f86278ff..504659a3a 100644
--- a/tests/e2e/multigpu/test_eval.py
+++ b/tests/e2e/multigpu/test_eval.py
@@ -21,7 +21,6 @@ class TestMultiGPUEval:
     """
 
     def test_eval_sample_packing(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -93,7 +92,6 @@ class TestMultiGPUEval:
         check_tensorboard(temp_dir + "/runs", "eval/loss", 2.5, "Eval Loss is too high")
 
     def test_eval(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/multigpu/test_fp8_fsdp2.py b/tests/e2e/multigpu/test_fp8_fsdp2.py
index f7fa29a31..dc369f3de 100644
--- a/tests/e2e/multigpu/test_fp8_fsdp2.py
+++ b/tests/e2e/multigpu/test_fp8_fsdp2.py
@@ -1,7 +1,5 @@
 """Test module for FP8 mixed precision with FSDP2 multi-GPU functionality."""
 
-# pylint: disable=duplicate-code
-
 import os
 from pathlib import Path
 
@@ -28,9 +26,9 @@ def verify_fp8_training_success(temp_dir):
     assert len(model_files) > 0, "No model files found - training may have failed"
 
     checkpoint_files = list(output_path.glob("checkpoint-*"))
-    assert (
-        len(checkpoint_files) > 0
-    ), "No checkpoint files found - training may have failed"
+    assert len(checkpoint_files) > 0, (
+        "No checkpoint files found - training may have failed"
+    )
 
     tb_log_path = most_recent_subdir(temp_dir + "/runs")
     if tb_log_path:
@@ -42,9 +40,9 @@ def verify_fp8_training_success(temp_dir):
             train_loss_df = df[df.tag == "train/train_loss"]
             if len(train_loss_df) > 0:
                 final_loss = train_loss_df.value.values[-1]
-                assert not torch.isnan(
-                    torch.tensor(final_loss)
-                ), f"Training loss is NaN: {final_loss}"
+                assert not torch.isnan(torch.tensor(final_loss)), (
+                    f"Training loss is NaN: {final_loss}"
+                )
 
 
 class TestFP8FSDP2:
diff --git a/tests/e2e/multigpu/test_fsdp1.py b/tests/e2e/multigpu/test_fsdp1.py
index fe0badbe2..cb92c80b5 100644
--- a/tests/e2e/multigpu/test_fsdp1.py
+++ b/tests/e2e/multigpu/test_fsdp1.py
@@ -1,7 +1,5 @@
 """Test module for FSDP1 multi-GPU functionality."""
 
-# pylint: disable=duplicate-code
-
 import os
 from pathlib import Path
 
@@ -29,9 +27,9 @@ def verify_training_success(temp_dir):
     assert len(model_files) > 0, "No model files found - training may have failed"
 
     checkpoint_files = list(output_path.glob("checkpoint-*"))
-    assert (
-        len(checkpoint_files) > 0
-    ), "No checkpoint files found - training may have failed"
+    assert len(checkpoint_files) > 0, (
+        "No checkpoint files found - training may have failed"
+    )
 
     tb_log_path = most_recent_subdir(temp_dir + "/runs")
     if tb_log_path:
@@ -43,9 +41,9 @@ def verify_training_success(temp_dir):
             train_loss_df = df[df.tag == "train/train_loss"]
             if len(train_loss_df) > 0:
                 final_loss = train_loss_df.value.values[-1]
-                assert not torch.isnan(
-                    torch.tensor(final_loss)
-                ), f"Training loss is NaN: {final_loss}"
+                assert not torch.isnan(torch.tensor(final_loss)), (
+                    f"Training loss is NaN: {final_loss}"
+                )
 
 
 class TestFSDP1:
diff --git a/tests/e2e/multigpu/test_fsdp2.py b/tests/e2e/multigpu/test_fsdp2.py
index 0bb255266..8b7ee710e 100644
--- a/tests/e2e/multigpu/test_fsdp2.py
+++ b/tests/e2e/multigpu/test_fsdp2.py
@@ -1,7 +1,5 @@
 """Test module for FSDP2 multi-GPU functionality."""
 
-# pylint: disable=duplicate-code
-
 import os
 from pathlib import Path
 
@@ -29,9 +27,9 @@ def verify_training_success(temp_dir):
     assert len(model_files) > 0, "No model files found - training may have failed"
 
     checkpoint_files = list(output_path.glob("checkpoint-*"))
-    assert (
-        len(checkpoint_files) > 0
-    ), "No checkpoint files found - training may have failed"
+    assert len(checkpoint_files) > 0, (
+        "No checkpoint files found - training may have failed"
+    )
 
     tb_log_path = most_recent_subdir(temp_dir + "/runs")
     if tb_log_path:
@@ -43,9 +41,9 @@ def verify_training_success(temp_dir):
             train_loss_df = df[df.tag == "train/train_loss"]
             if len(train_loss_df) > 0:
                 final_loss = train_loss_df.value.values[-1]
-                assert not torch.isnan(
-                    torch.tensor(final_loss)
-                ), f"Training loss is NaN: {final_loss}"
+                assert not torch.isnan(torch.tensor(final_loss)), (
+                    f"Training loss is NaN: {final_loss}"
+                )
 
 
 class TestFSDP2:
diff --git a/tests/e2e/multigpu/test_gemma3.py b/tests/e2e/multigpu/test_gemma3.py
index 4a7b101a8..51ec68b11 100644
--- a/tests/e2e/multigpu/test_gemma3.py
+++ b/tests/e2e/multigpu/test_gemma3.py
@@ -29,7 +29,6 @@ class TestMultiGPUGemma3:
     """
 
     def test_lora_ddp_packed(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "axolotl-mirrors/gemma-3-4b-pt",
diff --git a/tests/e2e/multigpu/test_llama.py b/tests/e2e/multigpu/test_llama.py
index aab14dcc4..ad15d628b 100644
--- a/tests/e2e/multigpu/test_llama.py
+++ b/tests/e2e/multigpu/test_llama.py
@@ -35,7 +35,6 @@ class TestMultiGPULlama:
     """
 
     def test_lora_ddp(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -99,7 +98,6 @@ class TestMultiGPULlama:
         [1, 2],
     )
     def test_lora_ddp_packed(self, temp_dir, gradient_accumulation_steps):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -162,7 +160,6 @@ class TestMultiGPULlama:
         )
 
     def test_dpo_lora_ddp(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -242,7 +239,6 @@ class TestMultiGPULlama:
         )
 
     def test_dpo_qlora_ddp(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -326,7 +322,6 @@ class TestMultiGPULlama:
         [1, 2],
     )
     def test_fsdp(self, temp_dir, gradient_accumulation_steps):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -402,7 +397,6 @@ class TestMultiGPULlama:
         ],
     )
     def test_fsdp_packed(self, temp_dir, fsdp_state_dict_type):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -484,7 +478,6 @@ class TestMultiGPULlama:
     def test_fsdp2_packed(
         self, temp_dir, attention_backend, fsdp_reshard_after_forward
     ):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -556,7 +549,6 @@ class TestMultiGPULlama:
         )
 
     def test_fsdp_qlora_prequant_packed(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "axolotl-ai-co/SmolLM2-135M-bnb-nf4-bf16",
@@ -656,7 +648,6 @@ class TestMultiGPULlama:
     def test_ds_zero3_packed(
         self, temp_dir, gradient_accumulation_steps, deepspeed, qlora
     ):
-        # pylint: disable=duplicate-code
         if qlora:
             adapter = {
                 "adapter": "qlora",
@@ -732,7 +723,6 @@ class TestMultiGPULlama:
         [True, False],
     )
     def test_ds_zero2_packed(self, temp_dir, gradient_accumulation_steps, qlora):
-        # pylint: disable=duplicate-code
         if qlora:
             adapter = {
                 "adapter": "qlora",
@@ -809,7 +799,6 @@ class TestMultiGPULlama:
         [True, False],
     )
     def test_ds_zero1_packed(self, temp_dir, gradient_accumulation_steps, qlora):
-        # pylint: disable=duplicate-code
         if qlora:
             adapter = {
                 "adapter": "qlora",
@@ -880,7 +869,6 @@ class TestMultiGPULlama:
         reason="fix untrained tokens brittle with lots of edge cases in latest transformers"
     )
     def test_fix_untrained_tokens(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/multigpu/test_ray.py b/tests/e2e/multigpu/test_ray.py
index 7f1278abf..7c6ea8a1f 100644
--- a/tests/e2e/multigpu/test_ray.py
+++ b/tests/e2e/multigpu/test_ray.py
@@ -26,7 +26,6 @@ class TestMultiGPURay:
 
     @require_torch_lt_2_6_0
     def test_lora_ddp(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -90,7 +89,6 @@ class TestMultiGPURay:
         [1, 2],
     )
     def test_ds_zero2_packed(self, temp_dir, gradient_accumulation_steps):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -150,7 +148,6 @@ class TestMultiGPURay:
         [1, 2],
     )
     def test_sft_fsdp2_packed(self, temp_dir, gradient_accumulation_steps):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/multigpu/test_tp.py b/tests/e2e/multigpu/test_tp.py
index 87a1c6339..9891a0906 100644
--- a/tests/e2e/multigpu/test_tp.py
+++ b/tests/e2e/multigpu/test_tp.py
@@ -19,7 +19,6 @@ class TestTensorParallel:
     )
     @require_torch_2_7_0
     def test_fft_sft(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "Qwen/Qwen2.5-0.5B",
diff --git a/tests/e2e/patched/lora_kernels/test_lora_kernel_patching.py b/tests/e2e/patched/lora_kernels/test_lora_kernel_patching.py
index b4dc5de54..2180eb99d 100644
--- a/tests/e2e/patched/lora_kernels/test_lora_kernel_patching.py
+++ b/tests/e2e/patched/lora_kernels/test_lora_kernel_patching.py
@@ -1,7 +1,5 @@
 """Integration tests for LoRA activation and attention kernels."""
 
-# pylint: disable=redefined-outer-name
-
 from pathlib import Path
 
 import pytest
@@ -88,7 +86,7 @@ def test_attention_patching_integration(model_name, attention_cls):
     cfg = DictDefault({"base_model": model_name})
 
     # Store the original implementation
-    original_forward = getattr(attention_cls, "forward")
+    original_forward = attention_cls.forward
 
     # Apply patch
     patch_self_attn_lora(cfg)
@@ -104,7 +102,7 @@ def test_attention_patching_integration(model_name, attention_cls):
     assert hasattr(attention_cls, "_original_forward")
 
     # Clean up
-    setattr(attention_cls, "forward", original_forward)
+    attention_cls.forward = original_forward
     delattr(attention_cls, "_original_forward")
 
 
@@ -379,9 +377,9 @@ def test_model_architecture(model_config):
 
     # Verify correct activation function
     layer = patched_model.model.model.layers[0]
-    assert (
-        layer.mlp.forward.__func__ is model_config["expected_activation"]
-    ), f"Wrong activation for {model_config['name']}"
+    assert layer.mlp.forward.__func__ is model_config["expected_activation"], (
+        f"Wrong activation for {model_config['name']}"
+    )
 
     # Test forward pass
     inputs = get_test_inputs(model)
@@ -390,12 +388,11 @@ def test_model_architecture(model_config):
         patched_output = patched_model(inputs).logits
 
     # Check outputs match
-    assert torch.allclose(
-        original_output, patched_output, rtol=1e-4
-    ), f"Outputs don't match for {model_config['name']}"
+    assert torch.allclose(original_output, patched_output, rtol=1e-4), (
+        f"Outputs don't match for {model_config['name']}"
+    )
 
 
-# pylint: disable=duplicate-code
 def test_kernel_training_integration(temp_dir):
     """Test model loading with kernel patches enabled."""
     from axolotl.cli.utils import load_model_and_tokenizer
@@ -563,15 +560,13 @@ def test_kernel_training_integration_dropout_non_zero(temp_dir):
     model_loader = ModelLoader(cfg, tokenizer)
 
     # Apply patch
-    model_loader.patch_manager._apply_self_attention_lora_patch()  # pylint: disable=protected-access
+    model_loader.patch_manager._apply_self_attention_lora_patch()
 
     # Verify patch was not applied
     assert attention_cls.forward == original_forward_method
 
     # Apply apply_lora_kernel_patches
-    model_loader.patch_manager._apply_lora_kernel_patch(  # pylint: disable=protected-access
-        model
-    )
+    model_loader.patch_manager._apply_lora_kernel_patch(model)
 
     # Verify patch was not applied
     layers = get_layers(model)
diff --git a/tests/e2e/patched/test_4d_multipack_llama.py b/tests/e2e/patched/test_4d_multipack_llama.py
index 1824443e7..ef28cc406 100644
--- a/tests/e2e/patched/test_4d_multipack_llama.py
+++ b/tests/e2e/patched/test_4d_multipack_llama.py
@@ -19,7 +19,6 @@ class Test4dMultipackLlama(unittest.TestCase):
 
     @with_temp_dir
     def test_sdp_lora_packing(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -67,7 +66,6 @@ class Test4dMultipackLlama(unittest.TestCase):
 
     @with_temp_dir
     def test_torch_lora_packing(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/patched/test_activation_checkpointing.py b/tests/e2e/patched/test_activation_checkpointing.py
index 06e3de274..ddace8ef1 100644
--- a/tests/e2e/patched/test_activation_checkpointing.py
+++ b/tests/e2e/patched/test_activation_checkpointing.py
@@ -32,10 +32,9 @@ class TestActivationCheckpointing:
     def test_activation_checkpointing_offload(
         self,
         temp_dir,
-        fix_checkpoint_after_test,  # pylint: disable=unused-argument,redefined-outer-name
+        fix_checkpoint_after_test,
         gradient_checkpointing,
     ):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/patched/test_cli_integrations.py b/tests/e2e/patched/test_cli_integrations.py
index 6c908faf1..6eba92689 100644
--- a/tests/e2e/patched/test_cli_integrations.py
+++ b/tests/e2e/patched/test_cli_integrations.py
@@ -10,7 +10,6 @@ from axolotl.cli.config import load_cfg
 from axolotl.utils.dict import DictDefault
 
 
-# pylint: disable=duplicate-code
 class TestPluginArgs:
     """
     test class for plugin args loaded from the config file
diff --git a/tests/e2e/patched/test_fa_xentropy.py b/tests/e2e/patched/test_fa_xentropy.py
index 38099b220..9f4699854 100644
--- a/tests/e2e/patched/test_fa_xentropy.py
+++ b/tests/e2e/patched/test_fa_xentropy.py
@@ -23,7 +23,6 @@ class TestFAXentropyLlama:
         [1, 4],
     )
     def test_lora_packing_fa_cross_entropy(self, temp_dir, gradient_accumulation_steps):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/patched/test_falcon_samplepack.py b/tests/e2e/patched/test_falcon_samplepack.py
index ef31b11c7..cc5091403 100644
--- a/tests/e2e/patched/test_falcon_samplepack.py
+++ b/tests/e2e/patched/test_falcon_samplepack.py
@@ -22,7 +22,6 @@ class TestFalconPatched(unittest.TestCase):
     @pytest.mark.skip(reason="no tiny models for testing with safetensors")
     @with_temp_dir
     def test_qlora(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "illuin/tiny-random-FalconForCausalLM",
@@ -71,7 +70,6 @@ class TestFalconPatched(unittest.TestCase):
     @pytest.mark.skip(reason="no tiny models for testing with safetensors")
     @with_temp_dir
     def test_ft(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "illuin/tiny-random-FalconForCausalLM",
diff --git a/tests/e2e/patched/test_flattening.py b/tests/e2e/patched/test_flattening.py
index fdaab558d..2c247d406 100644
--- a/tests/e2e/patched/test_flattening.py
+++ b/tests/e2e/patched/test_flattening.py
@@ -23,7 +23,6 @@ class TestFAFlattening:
         [1, 4],
     )
     def test_lora_packing_flattening(self, temp_dir, gradient_accumulation_steps):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/patched/test_fsdp2_qlora.py b/tests/e2e/patched/test_fsdp2_qlora.py
index ca17b81d1..de9c929e1 100644
--- a/tests/e2e/patched/test_fsdp2_qlora.py
+++ b/tests/e2e/patched/test_fsdp2_qlora.py
@@ -15,7 +15,6 @@ class TestFSDPPatchIntegration:
             apply_init_unsharded_param_patch,
         )
 
-        # pylint: disable=protected-access
         original_init_sharded = FSDPParam._init_sharded_param
         original_init_unsharded = FSDPParam.init_unsharded_param
 
@@ -23,11 +22,9 @@ class TestFSDPPatchIntegration:
         apply_init_sharded_param_patch()
         apply_init_unsharded_param_patch()
 
-        assert (
-            # pylint: disable=protected-access
-            FSDPParam._init_sharded_param
-            != original_init_sharded
-        ), "_init_sharded_param was not patched"
-        assert (
-            FSDPParam.init_unsharded_param != original_init_unsharded
-        ), "init_unsharded_param was not patched"
+        assert FSDPParam._init_sharded_param != original_init_sharded, (
+            "_init_sharded_param was not patched"
+        )
+        assert FSDPParam.init_unsharded_param != original_init_unsharded, (
+            "init_unsharded_param was not patched"
+        )
diff --git a/tests/e2e/patched/test_fused_llama.py b/tests/e2e/patched/test_fused_llama.py
index f0c4f155f..f0c5df18a 100644
--- a/tests/e2e/patched/test_fused_llama.py
+++ b/tests/e2e/patched/test_fused_llama.py
@@ -23,7 +23,6 @@ class TestFusedLlama(unittest.TestCase):
 
     @with_temp_dir
     def test_fft_packing(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/patched/test_llama_s2_attention.py b/tests/e2e/patched/test_llama_s2_attention.py
index ba5556a59..0dd748945 100644
--- a/tests/e2e/patched/test_llama_s2_attention.py
+++ b/tests/e2e/patched/test_llama_s2_attention.py
@@ -22,7 +22,6 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase):
 
     @with_temp_dir
     def test_lora_s2_attn(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -71,7 +70,6 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase):
 
     @with_temp_dir
     def test_fft_s2_attn(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/patched/test_lora_llama_multipack.py b/tests/e2e/patched/test_lora_llama_multipack.py
index fdf6adbc6..1833c750b 100644
--- a/tests/e2e/patched/test_lora_llama_multipack.py
+++ b/tests/e2e/patched/test_lora_llama_multipack.py
@@ -22,7 +22,6 @@ class TestLoraLlama(unittest.TestCase):
 
     @with_temp_dir
     def test_lora_packing(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -73,7 +72,6 @@ class TestLoraLlama(unittest.TestCase):
     @pytest.mark.skipif(not is_auto_gptq_available(), reason="auto-gptq not available")
     @with_temp_dir
     def test_lora_gptq_packed(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "lilmeaty/SmolLM2-135M-Instruct-GPTQ",
diff --git a/tests/e2e/patched/test_mistral_samplepack.py b/tests/e2e/patched/test_mistral_samplepack.py
index bea0f9c68..e03941b07 100644
--- a/tests/e2e/patched/test_mistral_samplepack.py
+++ b/tests/e2e/patched/test_mistral_samplepack.py
@@ -20,7 +20,6 @@ class TestMistral(unittest.TestCase):
     @require_torch_2_6_0
     @with_temp_dir
     def test_lora_packing(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2",
@@ -68,7 +67,6 @@ class TestMistral(unittest.TestCase):
 
     @with_temp_dir
     def test_ft_packing(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2",
diff --git a/tests/e2e/patched/test_mixtral_samplepack.py b/tests/e2e/patched/test_mixtral_samplepack.py
index 09e427abd..3517ff3db 100644
--- a/tests/e2e/patched/test_mixtral_samplepack.py
+++ b/tests/e2e/patched/test_mixtral_samplepack.py
@@ -19,7 +19,6 @@ class TestMixtral(unittest.TestCase):
 
     @with_temp_dir
     def test_qlora(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "hf-internal-testing/Mixtral-tiny",
@@ -64,7 +63,6 @@ class TestMixtral(unittest.TestCase):
 
     @with_temp_dir
     def test_ft(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "hf-internal-testing/Mixtral-tiny",
diff --git a/tests/e2e/patched/test_model_patches.py b/tests/e2e/patched/test_model_patches.py
index b90be23e4..aaaaf5fe2 100644
--- a/tests/e2e/patched/test_model_patches.py
+++ b/tests/e2e/patched/test_model_patches.py
@@ -89,5 +89,5 @@ class TestModelPatches(unittest.TestCase):
 
         assert (
             "torch.jit"
-            in transformers.modeling_flash_attention_utils._get_unpad_data.__module__  # pylint: disable=protected-access
+            in transformers.modeling_flash_attention_utils._get_unpad_data.__module__
         )
diff --git a/tests/e2e/patched/test_peft_embeddings.py b/tests/e2e/patched/test_peft_embeddings.py
index 4769319ae..374ef97d8 100644
--- a/tests/e2e/patched/test_peft_embeddings.py
+++ b/tests/e2e/patched/test_peft_embeddings.py
@@ -15,7 +15,6 @@ class TestLlamaPeftEmbeddings:
     """
 
     def test_peft_embeddings_upcast(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/patched/test_phi_multipack.py b/tests/e2e/patched/test_phi_multipack.py
index 1f0ddd630..77b2d99e5 100644
--- a/tests/e2e/patched/test_phi_multipack.py
+++ b/tests/e2e/patched/test_phi_multipack.py
@@ -19,7 +19,6 @@ class TestPhiMultipack(unittest.TestCase):
 
     @with_temp_dir
     def test_ft_packed(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "microsoft/phi-1_5",
@@ -67,7 +66,6 @@ class TestPhiMultipack(unittest.TestCase):
 
     @with_temp_dir
     def test_qlora_packed(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "microsoft/phi-1_5",
diff --git a/tests/e2e/patched/test_resume.py b/tests/e2e/patched/test_resume.py
index 54b8245ee..747b79dc7 100644
--- a/tests/e2e/patched/test_resume.py
+++ b/tests/e2e/patched/test_resume.py
@@ -22,7 +22,6 @@ class TestResumeLlama:
 
     @require_torch_2_6_0
     def test_resume_lora_packed(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/patched/test_unsloth_qlora.py b/tests/e2e/patched/test_unsloth_qlora.py
index 2c8ee4eb0..bf00e8a5f 100644
--- a/tests/e2e/patched/test_unsloth_qlora.py
+++ b/tests/e2e/patched/test_unsloth_qlora.py
@@ -12,7 +12,6 @@ from axolotl.utils.dict import DictDefault
 from ..utils import check_model_output_exists, check_tensorboard
 
 
-# pylint: disable=duplicate-code
 @pytest.mark.skip(
     reason="Unsloth integration will be broken going into latest transformers"
 )
diff --git a/tests/e2e/solo/test_flex.py b/tests/e2e/solo/test_flex.py
index 76364fc0e..abe8fb69a 100644
--- a/tests/e2e/solo/test_flex.py
+++ b/tests/e2e/solo/test_flex.py
@@ -22,7 +22,6 @@ class TestPackedFlex(unittest.TestCase):
     @require_torch_2_6_0
     @with_temp_dir
     def test_loss_llama(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/solo/test_relora_llama.py b/tests/e2e/solo/test_relora_llama.py
index b399b4680..be77684ba 100644
--- a/tests/e2e/solo/test_relora_llama.py
+++ b/tests/e2e/solo/test_relora_llama.py
@@ -20,7 +20,6 @@ class TestReLoraLlama(unittest.TestCase):
 
     @with_temp_dir
     def test_relora(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -76,9 +75,9 @@ class TestReLoraLlama(unittest.TestCase):
 
         train(cfg=cfg, dataset_meta=dataset_meta)
         check_model_output_exists(Path(temp_dir) / "checkpoint-100/adapter", cfg)
-        assert (
-            Path(temp_dir) / "checkpoint-100/relora/model.safetensors"
-        ).exists(), "Relora model checkpoint not found"
+        assert (Path(temp_dir) / "checkpoint-100/relora/model.safetensors").exists(), (
+            "Relora model checkpoint not found"
+        )
 
         check_tensorboard(
             temp_dir + "/runs", "train/grad_norm", 0.2, "grad_norm is too high"
diff --git a/tests/e2e/test_activation_offloading.py b/tests/e2e/test_activation_offloading.py
index 06c5c0656..9df85ab31 100644
--- a/tests/e2e/test_activation_offloading.py
+++ b/tests/e2e/test_activation_offloading.py
@@ -11,8 +11,6 @@ from axolotl.utils.dict import DictDefault
 
 from .utils import check_model_output_exists
 
-# pylint: disable=duplicate-code
-
 
 class TestActivationOffloading:
     """
@@ -28,7 +26,6 @@ class TestActivationOffloading:
         temp_dir,
         adapter,
     ):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/test_deepseekv3.py b/tests/e2e/test_deepseekv3.py
index e4a47fb0a..e11be8265 100644
--- a/tests/e2e/test_deepseekv3.py
+++ b/tests/e2e/test_deepseekv3.py
@@ -25,7 +25,6 @@ class TestDeepseekV3:
         [True, False],
     )
     def test_lora_deepseekv3(self, temp_dir, sample_packing):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "axolotl-ai-co/DeepSeek-V3-11M",
@@ -83,7 +82,6 @@ class TestDeepseekV3:
         [True, False],
     )
     def test_fft_deepseekv3(self, temp_dir, sample_packing):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "axolotl-ai-co/DeepSeek-V3-11M",
diff --git a/tests/e2e/test_dpo.py b/tests/e2e/test_dpo.py
index a1df69535..8f577ef47 100644
--- a/tests/e2e/test_dpo.py
+++ b/tests/e2e/test_dpo.py
@@ -21,7 +21,6 @@ class TestDPOLlamaLora(unittest.TestCase):
 
     @with_temp_dir
     def test_dpo_lora(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -70,7 +69,6 @@ class TestDPOLlamaLora(unittest.TestCase):
 
     @with_temp_dir
     def test_dpo_nll_lora(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -120,7 +118,6 @@ class TestDPOLlamaLora(unittest.TestCase):
 
     @with_temp_dir
     def test_dpo_use_weighting(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -171,7 +168,6 @@ class TestDPOLlamaLora(unittest.TestCase):
     @pytest.mark.skip("kto_pair no longer supported in trl")
     @with_temp_dir
     def test_kto_pair_lora(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -220,7 +216,6 @@ class TestDPOLlamaLora(unittest.TestCase):
 
     @with_temp_dir
     def test_ipo_lora(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -269,7 +264,6 @@ class TestDPOLlamaLora(unittest.TestCase):
 
     @with_temp_dir
     def test_orpo_lora(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -322,7 +316,6 @@ class TestDPOLlamaLora(unittest.TestCase):
     @pytest.mark.skip(reason="Fix the implementation")
     @with_temp_dir
     def test_kto_lora(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/test_embeddings_lr.py b/tests/e2e/test_embeddings_lr.py
index e4a06ad14..633e449ef 100644
--- a/tests/e2e/test_embeddings_lr.py
+++ b/tests/e2e/test_embeddings_lr.py
@@ -19,7 +19,6 @@ class TestEmbeddingsLrScale(unittest.TestCase):
 
     @with_temp_dir
     def test_train_w_embedding_lr_scale(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -65,7 +64,6 @@ class TestEmbeddingsLrScale(unittest.TestCase):
 
     @with_temp_dir
     def test_train_w_embedding_lr(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/test_evaluate.py b/tests/e2e/test_evaluate.py
index 977497e5e..3b0ab1450 100644
--- a/tests/e2e/test_evaluate.py
+++ b/tests/e2e/test_evaluate.py
@@ -13,7 +13,6 @@ class TestE2eEvaluate:
     """Test cases for evaluate CLI"""
 
     def test_evaluate(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/test_falcon.py b/tests/e2e/test_falcon.py
index 5be6efcf6..1a363fe6a 100644
--- a/tests/e2e/test_falcon.py
+++ b/tests/e2e/test_falcon.py
@@ -22,7 +22,6 @@ class TestFalcon(unittest.TestCase):
     @pytest.mark.skip(reason="no tiny models for testing with safetensors")
     @with_temp_dir
     def test_lora(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "illuin/tiny-random-FalconForCausalLM",
@@ -74,7 +73,6 @@ class TestFalcon(unittest.TestCase):
     @pytest.mark.skip(reason="no tiny models for testing with safetensors")
     @with_temp_dir
     def test_lora_added_vocab(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "illuin/tiny-random-FalconForCausalLM",
@@ -130,7 +128,6 @@ class TestFalcon(unittest.TestCase):
     @pytest.mark.skip(reason="no tiny models for testing with safetensors")
     @with_temp_dir
     def test_ft(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "illuin/tiny-random-FalconForCausalLM",
diff --git a/tests/e2e/test_gemma2.py b/tests/e2e/test_gemma2.py
index c0eba72a7..9e9f1a9cc 100644
--- a/tests/e2e/test_gemma2.py
+++ b/tests/e2e/test_gemma2.py
@@ -22,7 +22,6 @@ class TestGemma2:
         [True, False],
     )
     def test_lora_gemma2(self, temp_dir, sample_packing):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "axolotl-ai-co/gemma-2-33M",
@@ -78,7 +77,6 @@ class TestGemma2:
         [True, False],
     )
     def test_fft_gemma2(self, temp_dir, sample_packing):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "axolotl-ai-co/gemma-2-33M",
diff --git a/tests/e2e/test_gemma3_text.py b/tests/e2e/test_gemma3_text.py
index ef38d028d..6cd999242 100644
--- a/tests/e2e/test_gemma3_text.py
+++ b/tests/e2e/test_gemma3_text.py
@@ -22,7 +22,6 @@ class TestGemma3Text:
         [True, False],
     )
     def test_lora_gemma3_text(self, temp_dir, sample_packing):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "axolotl-ai-co/gemma-3-34M",
@@ -78,7 +77,6 @@ class TestGemma3Text:
         [True, False],
     )
     def test_fft_gemma3_text(self, temp_dir, sample_packing):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "axolotl-ai-co/gemma-3-34M",
diff --git a/tests/e2e/test_imports.py b/tests/e2e/test_imports.py
index 050e4dfb3..4c01e50be 100644
--- a/tests/e2e/test_imports.py
+++ b/tests/e2e/test_imports.py
@@ -11,11 +11,7 @@ class TestImports(unittest.TestCase):
     """
 
     def test_import_causal_trainer(self):
-        from axolotl.core.builders import (  # pylint: disable=unused-import  # noqa: F401
-            HFCausalTrainerBuilder,
-        )
+        pass
 
     def test_import_rl_trainer(self):
-        from axolotl.core.builders import (  # pylint: disable=unused-import  # noqa: F401
-            HFRLTrainerBuilder,
-        )
+        pass
diff --git a/tests/e2e/test_llama.py b/tests/e2e/test_llama.py
index 1e6df0be9..de085cbe2 100644
--- a/tests/e2e/test_llama.py
+++ b/tests/e2e/test_llama.py
@@ -16,7 +16,6 @@ class TestLlama:
     """
 
     def test_fft_trust_remote_code(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -57,7 +56,6 @@ class TestLlama:
         check_model_output_exists(temp_dir, cfg)
 
     def test_fix_untrained_tokens(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -105,7 +103,6 @@ class TestLlama:
         check_model_output_exists(temp_dir, cfg)
 
     def test_fix_untrained_tokens_already_trained(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -150,7 +147,6 @@ class TestLlama:
         check_model_output_exists(temp_dir, cfg)
 
     def test_batch_flattening(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/test_llama_pretrain.py b/tests/e2e/test_llama_pretrain.py
index bd5502300..a041244e7 100644
--- a/tests/e2e/test_llama_pretrain.py
+++ b/tests/e2e/test_llama_pretrain.py
@@ -22,7 +22,6 @@ class TestPretrainLlama:
         ],
     )
     def test_pretrain(self, temp_dir, sample_packing, pretrain_multipack_attn):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/test_llama_vision.py b/tests/e2e/test_llama_vision.py
index 760759bca..0cc927f76 100644
--- a/tests/e2e/test_llama_vision.py
+++ b/tests/e2e/test_llama_vision.py
@@ -19,7 +19,6 @@ class TestLlamaVision(unittest.TestCase):
 
     @with_temp_dir
     def test_lora_llama_vision_text_only_dataset(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "axolotl-ai-co/Llama-3.2-39M-Vision",
@@ -67,7 +66,6 @@ class TestLlamaVision(unittest.TestCase):
 
     @with_temp_dir
     def test_lora_llama_vision_multimodal_dataset(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "axolotl-ai-co/Llama-3.2-39M-Vision",
diff --git a/tests/e2e/test_load_model.py b/tests/e2e/test_load_model.py
index 8fcffeb11..7c5389a58 100644
--- a/tests/e2e/test_load_model.py
+++ b/tests/e2e/test_load_model.py
@@ -56,13 +56,11 @@ class TestLoadModelUtils:
                 "context_parallel_size": 1,
             }
         )
-        self.model_loader = (  # pylint: disable=attribute-defined-outside-init
-            ModelLoader(
-                cfg=self.cfg,
-                tokenizer="",
-                inference=False,
-                reference_model=True,
-            )
+        self.model_loader = ModelLoader(
+            cfg=self.cfg,
+            tokenizer="",
+            inference=False,
+            reference_model=True,
         )
 
     @pytest.mark.parametrize("embedding_modules", ["embed_tokens", "lm_head"])
@@ -74,7 +72,7 @@ class TestLoadModelUtils:
         self, temp_dir, embedding_modules, dist_dtype, before_kbit_train_or_finetune
     ):
         self.cfg.output_dir = temp_dir
-        self.model_loader.tokenizer = load_tokenizer(self.cfg)  # pylint: disable=all
+        self.model_loader.tokenizer = load_tokenizer(self.cfg)
         self.model_loader.load()
         self.model_loader._convert_embedding_modules_dtype(
             embedding_modules, dist_dtype, before_kbit_train_or_finetune
diff --git a/tests/e2e/test_lora_llama.py b/tests/e2e/test_lora_llama.py
index 7e0ff46cf..b6ee393df 100644
--- a/tests/e2e/test_lora_llama.py
+++ b/tests/e2e/test_lora_llama.py
@@ -19,7 +19,6 @@ class TestLoraLlama(unittest.TestCase):
 
     @with_temp_dir
     def test_lora(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/test_mamba.py b/tests/e2e/test_mamba.py
index 73d3bdc26..67935377d 100644
--- a/tests/e2e/test_mamba.py
+++ b/tests/e2e/test_mamba.py
@@ -22,7 +22,6 @@ class TestMamba(unittest.TestCase):
 
     @with_temp_dir
     def test_fft(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "state-spaces/mamba-130m",
diff --git a/tests/e2e/test_mistral.py b/tests/e2e/test_mistral.py
index f47f794e0..08b3b05af 100644
--- a/tests/e2e/test_mistral.py
+++ b/tests/e2e/test_mistral.py
@@ -21,7 +21,6 @@ class TestMistral(unittest.TestCase):
 
     @with_temp_dir
     def test_lora(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2",
@@ -68,7 +67,6 @@ class TestMistral(unittest.TestCase):
 
     @with_temp_dir
     def test_ft(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "trl-internal-testing/tiny-MistralForCausalLM-0.2",
diff --git a/tests/e2e/test_mixtral.py b/tests/e2e/test_mixtral.py
index 3fe2bf70f..c46cf906d 100644
--- a/tests/e2e/test_mixtral.py
+++ b/tests/e2e/test_mixtral.py
@@ -22,7 +22,6 @@ class TestMixtral(unittest.TestCase):
 
     @with_temp_dir
     def test_qlora_w_fa2(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "hf-internal-testing/Mixtral-tiny",
@@ -78,7 +77,6 @@ class TestMixtral(unittest.TestCase):
 
     @with_temp_dir
     def test_qlora_wo_fa2(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "hf-internal-testing/Mixtral-tiny",
@@ -134,7 +132,6 @@ class TestMixtral(unittest.TestCase):
 
     @with_temp_dir
     def test_16bit_lora_w_fa2(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "hf-internal-testing/Mixtral-tiny",
@@ -193,7 +190,6 @@ class TestMixtral(unittest.TestCase):
 
     @with_temp_dir
     def test_16bit_lora_wo_fa2(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "hf-internal-testing/Mixtral-tiny",
@@ -252,7 +248,6 @@ class TestMixtral(unittest.TestCase):
 
     @with_temp_dir
     def test_ft(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "hf-internal-testing/Mixtral-tiny",
diff --git a/tests/e2e/test_optimizers.py b/tests/e2e/test_optimizers.py
index 987d86041..dbea92a5b 100644
--- a/tests/e2e/test_optimizers.py
+++ b/tests/e2e/test_optimizers.py
@@ -25,7 +25,6 @@ class TestCustomOptimizers(unittest.TestCase):
 
     @with_temp_dir
     def test_optimi_adamw(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -71,7 +70,6 @@ class TestCustomOptimizers(unittest.TestCase):
     @with_temp_dir
     @require_torch_2_5_1
     def test_adopt_adamw(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -117,7 +115,6 @@ class TestCustomOptimizers(unittest.TestCase):
     @with_temp_dir
     @require_torch_2_5_1
     def test_muon(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -164,7 +161,6 @@ class TestCustomOptimizers(unittest.TestCase):
     @with_temp_dir
     @require_torch_2_7_0
     def test_dion(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -206,7 +202,6 @@ class TestCustomOptimizers(unittest.TestCase):
 
     @with_temp_dir
     def test_fft_schedule_free_adamw(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -234,7 +229,6 @@ class TestCustomOptimizers(unittest.TestCase):
                 "save_first_step": False,
             }
         )
-        # pylint: disable=duplicate-code
 
         cfg = validate_config(cfg)
         normalize_config(cfg)
@@ -246,7 +240,6 @@ class TestCustomOptimizers(unittest.TestCase):
     @with_temp_dir
     @require_torch_2_6_0
     def test_came_pytorch(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "JackFram/llama-68m",
diff --git a/tests/e2e/test_packing_loss.py b/tests/e2e/test_packing_loss.py
index aec9d95f8..7cb979ce6 100644
--- a/tests/e2e/test_packing_loss.py
+++ b/tests/e2e/test_packing_loss.py
@@ -21,7 +21,6 @@ class TestPackedLlama(unittest.TestCase):
 
     @with_temp_dir
     def test_loss_packed(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/test_phi.py b/tests/e2e/test_phi.py
index ab3a63674..ae2210249 100644
--- a/tests/e2e/test_phi.py
+++ b/tests/e2e/test_phi.py
@@ -19,7 +19,6 @@ class TestPhi(unittest.TestCase):
 
     @with_temp_dir
     def test_phi_ft(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "microsoft/phi-1_5",
@@ -65,7 +64,6 @@ class TestPhi(unittest.TestCase):
 
     @with_temp_dir
     def test_phi_qlora(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "microsoft/phi-1_5",
diff --git a/tests/e2e/test_preprocess.py b/tests/e2e/test_preprocess.py
index 25f42e832..4aa4cb6c2 100644
--- a/tests/e2e/test_preprocess.py
+++ b/tests/e2e/test_preprocess.py
@@ -15,7 +15,7 @@ class TestPreprocess:
 
     def test_w_deepspeed(self, temp_dir):
         """make sure preproces doesn't choke when using deepspeed in the config"""
-        # pylint: disable=duplicate-code
+
         cfg = DictDefault(
             {
                 "base_model": "Qwen/Qwen2.5-0.5B",
diff --git a/tests/e2e/test_process_reward_model_smollm2.py b/tests/e2e/test_process_reward_model_smollm2.py
index bd9eec48b..9d83aabbc 100644
--- a/tests/e2e/test_process_reward_model_smollm2.py
+++ b/tests/e2e/test_process_reward_model_smollm2.py
@@ -19,7 +19,6 @@ class TestProcessRewardSmolLM2(unittest.TestCase):
 
     @with_temp_dir
     def test_prm(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/test_qat.py b/tests/e2e/test_qat.py
index 139ae155a..7d41dfb50 100644
--- a/tests/e2e/test_qat.py
+++ b/tests/e2e/test_qat.py
@@ -18,7 +18,6 @@ class TestQATLlama:
     """
 
     def test_qat(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -68,7 +67,6 @@ class TestQATLlama:
         check_model_output_exists(Path(temp_dir) / "checkpoint-5", cfg)
 
     def test_qat_dpo(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/test_quantization.py b/tests/e2e/test_quantization.py
index 500b7e556..cfbdfec38 100644
--- a/tests/e2e/test_quantization.py
+++ b/tests/e2e/test_quantization.py
@@ -131,7 +131,7 @@ class TestQuantization:
     @require_torch_2_6_0
     def test_prepare_model_for_qat(
         self, model, weight_dtype, activation_dtype, group_size, quantize_embedding
-    ):  # pylint: disable=redefined-outer-name
+    ):
         prepare_model_for_qat(
             model, weight_dtype, group_size, activation_dtype, quantize_embedding
         )
@@ -175,7 +175,7 @@ class TestQuantization:
         group_size,
         quantize_embedding,
         expected_exception,
-    ):  # pylint: disable=redefined-outer-name
+    ):
         if expected_exception:
             with pytest.raises(expected_exception):
                 quantize_model_for_ptq(
@@ -198,11 +198,13 @@ class TestQuantization:
                     if activation_dtype:
                         assert isinstance(
                             child.weight, LinearActivationQuantizedTensor
-                        ), "Linear weight should be quantized with activation quantization"
+                        ), (
+                            "Linear weight should be quantized with activation quantization"
+                        )
                     else:
-                        assert isinstance(
-                            child.weight, AffineQuantizedTensor
-                        ), "Linear weight should be quantized without activation quantization"
+                        assert isinstance(child.weight, AffineQuantizedTensor), (
+                            "Linear weight should be quantized without activation quantization"
+                        )
 
 
 class TestQuantizationCallback:
@@ -217,9 +219,7 @@ class TestQuantizationCallback:
         )
 
     @require_torch_2_6_0
-    def test_qat_callback_fake_quant_after_n_steps(
-        self, model, trainer_state
-    ):  # pylint: disable=redefined-outer-name
+    def test_qat_callback_fake_quant_after_n_steps(self, model, trainer_state):
         cfg = QATConfig(
             weight_dtype="int8",
             activation_dtype="int8",
@@ -269,9 +269,7 @@ class TestQuantizationCallback:
         assert model.lm_head.weight_fake_quantizer.enabled
 
     @require_torch_2_6_0
-    def test_qat_callback_fake_quant_after_n_steps_is_none(
-        self, model, trainer_state
-    ):  # pylint: disable=redefined-outer-name
+    def test_qat_callback_fake_quant_after_n_steps_is_none(self, model, trainer_state):
         cfg = QATConfig(
             weight_dtype="int8",
             activation_dtype="int8",
@@ -314,9 +312,7 @@ class TestConvertQATModelForPTQ:
     """
 
     @require_torch_2_6_0
-    def test_convert_qat_model_for_ptq(
-        self, model
-    ):  # pylint: disable=redefined-outer-name
+    def test_convert_qat_model_for_ptq(self, model):
         config = QATConfig(
             weight_dtype="int8",
             activation_dtype="int8",
diff --git a/tests/e2e/test_qwen.py b/tests/e2e/test_qwen.py
index 59267d14d..1c75d817b 100644
--- a/tests/e2e/test_qwen.py
+++ b/tests/e2e/test_qwen.py
@@ -19,7 +19,6 @@ class TestE2eQwen:
 
     @pytest.mark.parametrize("base_model", ["Qwen/Qwen2-0.5B", "Qwen/Qwen2.5-0.5B"])
     def test_dpo(self, base_model, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": base_model,
diff --git a/tests/e2e/test_reward_model_smollm2.py b/tests/e2e/test_reward_model_smollm2.py
index 82513f99f..cc768b173 100644
--- a/tests/e2e/test_reward_model_smollm2.py
+++ b/tests/e2e/test_reward_model_smollm2.py
@@ -19,7 +19,6 @@ class TestRewardModelLoraSmolLM2(unittest.TestCase):
 
     @with_temp_dir
     def test_rm_lora(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/test_save_first_step.py b/tests/e2e/test_save_first_step.py
index 5bbd2302b..ce2d3f145 100644
--- a/tests/e2e/test_save_first_step.py
+++ b/tests/e2e/test_save_first_step.py
@@ -20,7 +20,6 @@ class TestSaveFirstStepCallback(unittest.TestCase):
 
     @with_temp_dir
     def test_save_first_step(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -61,7 +60,6 @@ class TestSaveFirstStepCallback(unittest.TestCase):
 
     @with_temp_dir
     def test_no_save_first_step(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/test_schedulers.py b/tests/e2e/test_schedulers.py
index 8f7a13aee..5b9c56288 100644
--- a/tests/e2e/test_schedulers.py
+++ b/tests/e2e/test_schedulers.py
@@ -19,7 +19,6 @@ class TestCustomSchedulers(unittest.TestCase):
 
     @with_temp_dir
     def test_rex_scheduler(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/e2e/utils.py b/tests/e2e/utils.py
index 939ed5c1c..7db6cf74e 100644
--- a/tests/e2e/utils.py
+++ b/tests/e2e/utils.py
@@ -2,6 +2,7 @@
 helper utils for tests
 """
 
+import importlib.util
 import os
 import shutil
 import tempfile
@@ -107,12 +108,7 @@ def require_vllm(test_case):
     """
 
     def is_vllm_installed():
-        try:
-            import vllm  # pylint: disable=unused-import  # noqa: F401
-
-            return True
-        except ImportError:
-            return False
+        return importlib.util.find_spec("vllm") is not None
 
     return unittest.skipUnless(
         is_vllm_installed(), "test requires vllm to be installed"
@@ -125,12 +121,7 @@ def require_llmcompressor(test_case):
     """
 
     def is_llmcompressor_installed():
-        try:
-            import llmcompressor  # pylint: disable=unused-import  # noqa: F401
-
-            return True
-        except ImportError:
-            return False
+        return importlib.util.find_spec("llmcompressor") is not None
 
     return unittest.skipUnless(
         is_llmcompressor_installed(), "test requires llmcompressor to be installed"
@@ -159,8 +150,8 @@ def check_tensorboard(
     tb_log_path = most_recent_subdir(temp_run_dir)
     event_file = os.path.join(tb_log_path, sorted(os.listdir(tb_log_path))[0])
     reader = SummaryReader(event_file)
-    df = reader.scalars  # pylint: disable=invalid-name
-    df = df[(df.tag == tag)]  # pylint: disable=invalid-name
+    df = reader.scalars
+    df = df[(df.tag == tag)]
     lt_val = (1 + rtol) * lt_val
     if "%s" in assertion_err:
         assert df.value.values[-1] < lt_val, assertion_err % df.value.values[-1]
diff --git a/tests/hf_offline_utils.py b/tests/hf_offline_utils.py
index 385e61f18..0e4a2f067 100644
--- a/tests/hf_offline_utils.py
+++ b/tests/hf_offline_utils.py
@@ -20,7 +20,7 @@ def reload_modules(hf_hub_offline):
     importlib.reload(huggingface_hub.constants)
     huggingface_hub.constants.HF_HUB_OFFLINE = hf_hub_offline
     importlib.reload(datasets.config)
-    setattr(datasets.config, "HF_HUB_OFFLINE", hf_hub_offline)
+    datasets.config.HF_HUB_OFFLINE = hf_hub_offline
     reset_sessions()
 
 
diff --git a/tests/integrations/test_liger.py b/tests/integrations/test_liger.py
index 5c4bd1028..d7b171ec2 100644
--- a/tests/integrations/test_liger.py
+++ b/tests/integrations/test_liger.py
@@ -10,7 +10,6 @@ from axolotl.utils.config import prepare_plugins, validate_config
 from axolotl.utils.dict import DictDefault
 
 
-# pylint: disable=duplicate-code
 @pytest.fixture(name="minimal_liger_cfg")
 def fixture_cfg():
     return DictDefault(
@@ -30,7 +29,6 @@ def fixture_cfg():
     )
 
 
-# pylint: disable=too-many-public-methods
 class TestValidation:
     """
     Test the validation module for liger
diff --git a/tests/patched/test_validation.py b/tests/patched/test_validation.py
index 677512d3d..21299ed98 100644
--- a/tests/patched/test_validation.py
+++ b/tests/patched/test_validation.py
@@ -1,4 +1,3 @@
-# pylint: disable=too-many-lines
 """Module for testing the validation module"""
 
 import os
@@ -49,7 +48,6 @@ class BaseValidation:
         self._caplog = caplog
 
 
-# pylint: disable=too-many-public-methods
 class TestValidation(BaseValidation):
     """
     Test the validation module
@@ -241,7 +239,7 @@ class TestValidation(BaseValidation):
 
     def test_lr_as_float(self, minimal_cfg):
         cfg = (
-            DictDefault(  # pylint: disable=unsupported-binary-operation
+            DictDefault(
                 {
                     "learning_rate": "5e-5",
                 }
@@ -303,7 +301,7 @@ class TestValidation(BaseValidation):
         )
 
         cfg = (
-            DictDefault(  # pylint: disable=unsupported-binary-operation
+            DictDefault(
                 {
                     "load_in_8bit": True,
                 }
@@ -315,7 +313,7 @@ class TestValidation(BaseValidation):
             validate_config(cfg)
 
         cfg = (
-            DictDefault(  # pylint: disable=unsupported-binary-operation
+            DictDefault(
                 {
                     "gptq": True,
                 }
@@ -327,7 +325,7 @@ class TestValidation(BaseValidation):
             validate_config(cfg)
 
         cfg = (
-            DictDefault(  # pylint: disable=unsupported-binary-operation
+            DictDefault(
                 {
                     "load_in_4bit": False,
                 }
@@ -339,7 +337,7 @@ class TestValidation(BaseValidation):
             validate_config(cfg)
 
         cfg = (
-            DictDefault(  # pylint: disable=unsupported-binary-operation
+            DictDefault(
                 {
                     "load_in_4bit": True,
                 }
@@ -361,7 +359,7 @@ class TestValidation(BaseValidation):
         )
 
         cfg = (
-            DictDefault(  # pylint: disable=unsupported-binary-operation
+            DictDefault(
                 {
                     "load_in_8bit": True,
                 }
@@ -373,7 +371,7 @@ class TestValidation(BaseValidation):
             validate_config(cfg)
 
         cfg = (
-            DictDefault(  # pylint: disable=unsupported-binary-operation
+            DictDefault(
                 {
                     "gptq": True,
                 }
@@ -385,7 +383,7 @@ class TestValidation(BaseValidation):
             validate_config(cfg)
 
         cfg = (
-            DictDefault(  # pylint: disable=unsupported-binary-operation
+            DictDefault(
                 {
                     "load_in_4bit": True,
                 }
diff --git a/tests/prompt_strategies/conftest.py b/tests/prompt_strategies/conftest.py
index 7f942e0ef..12c4bcd93 100644
--- a/tests/prompt_strategies/conftest.py
+++ b/tests/prompt_strategies/conftest.py
@@ -30,7 +30,6 @@ def fixture_assistant_dataset():
 
 @pytest.fixture(name="sharegpt_dataset")
 def fixture_sharegpt_dataset():
-    # pylint: disable=duplicate-code
     return Dataset.from_list(
         [
             {
@@ -47,7 +46,6 @@ def fixture_sharegpt_dataset():
 
 @pytest.fixture(name="basic_dataset")
 def fixture_basic_dataset():
-    # pylint: disable=duplicate-code
     return Dataset.from_list(
         [
             {
@@ -65,7 +63,6 @@ def fixture_basic_dataset():
 
 @pytest.fixture(name="toolcalling_dataset")
 def fixture_toolcalling_dataset():
-    # pylint: disable=duplicate-code
     return Dataset.from_list(
         [
             {
@@ -112,7 +109,7 @@ def fixture_toolcalling_dataset():
 @enable_hf_offline
 def fixture_llama3_tokenizer(
     download_llama3_8b_instruct_model_fixture,
-):  # pylint: disable=unused-argument,redefined-outer-name
+):
     tokenizer = AutoTokenizer.from_pretrained("NousResearch/Meta-Llama-3-8B-Instruct")
 
     return tokenizer
@@ -129,7 +126,7 @@ def fixture_smollm2_tokenizer():
 @enable_hf_offline
 def fixture_mistralv03_tokenizer(
     download_mlx_mistral_7b_model_fixture,
-):  # pylint: disable=unused-argument,redefined-outer-name
+):
     tokenizer = AutoTokenizer.from_pretrained(
         "mlx-community/Mistral-7B-Instruct-v0.3-4bit"
     )
diff --git a/tests/prompt_strategies/messages/test_chat.py b/tests/prompt_strategies/messages/test_chat.py
index a4c2ae67f..f083232a8 100644
--- a/tests/prompt_strategies/messages/test_chat.py
+++ b/tests/prompt_strategies/messages/test_chat.py
@@ -2,7 +2,6 @@
 tests for chat_template prompt strategy
 """
 
-# pylint: disable=duplicate-code
 import unittest
 
 from axolotl.prompt_strategies.messages.chat import load
@@ -53,9 +52,9 @@ class TestMessagesChatLlama3:
         # fmt: on
         LOG.debug(f"Expected input_ids: {expected_input_ids}")
         LOG.debug(f"Actual input_ids: {input_ids}")
-        assert (
-            input_ids == expected_input_ids
-        ), f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+        assert input_ids == expected_input_ids, (
+            f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+        )
 
 
 if __name__ == "__main__":
diff --git a/tests/prompt_strategies/test_alpaca.py b/tests/prompt_strategies/test_alpaca.py
index 78f783747..b96ebce19 100644
--- a/tests/prompt_strategies/test_alpaca.py
+++ b/tests/prompt_strategies/test_alpaca.py
@@ -30,7 +30,6 @@ def fixture_alpaca_dataset():
 @pytest.fixture(name="tokenizer")
 @enable_hf_offline
 def fixture_tokenizer():
-    # pylint: disable=all
     tokenizer = AutoTokenizer.from_pretrained(
         "casperhansen/mistral-7b-instruct-v0.1-awq"
     )
diff --git a/tests/prompt_strategies/test_chat_template_ds_schema_unification.py b/tests/prompt_strategies/test_chat_template_ds_schema_unification.py
index 502efae4b..e8d35e974 100644
--- a/tests/prompt_strategies/test_chat_template_ds_schema_unification.py
+++ b/tests/prompt_strategies/test_chat_template_ds_schema_unification.py
@@ -18,9 +18,7 @@ def fixture_messages_w_tools():
 {"messages":[{"role":"user","content":"move to (0, 1)"},{"role":"assistant","content":"","tool_calls":[{"function":{"name":"move","arguments":{"x":0,"y":1}}}]}],"tools":[{"type":"function","function":{"name":"move","description":"Move to a given location measured in meters","parameters":{"type":"object","properties":{"x":{"type":"number","description":"The x coordinate of the location, negative values are to the left, positive values are to the right"},"y":{"type":"number","description":"The y coordinate of the location, negative values are backward, positive values are forward"}},"required":["x","y"]}}},{"type":"function","function":{"name":"turn","description":"Turn the robot to a given direction","parameters":{"type":"object","properties":{"theta":{"type":"integer","description":"The angle to turn to, in degrees, positive values are counter-clockwise, negative values are clockwise"}},"required":["theta"]}}},{"type":"function","function":{"name":"invalid_prompt","description":"call when the user's prompt is invalid","parameters":{"type":"object","properties":{"message":{"type":"string","description":"why the prompt is invalid"}},"required":["message"]}}}],"add_generation_prompt":false}
 {"messages":[{"role":"user","content":"turn 270 degree"},{"role":"assistant","content":"","tool_calls":[{"function":{"name":"turn","arguments":{"theta": 270}}}]}],"tools":[{"type":"function","function":{"name":"move","description":"Move to a given location measured in meters","parameters":{"type":"object","properties":{"x":{"type":"number","description":"The x coordinate of the location, negative values are to the left, positive values are to the right"},"y":{"type":"number","description":"The y coordinate of the location, negative values are backward, positive values are forward"}},"required":["x","y"]}}},{"type":"function","function":{"name":"turn","description":"Turn the robot to a given direction","parameters":{"type":"object","properties":{"theta":{"type":"integer","description":"The angle to turn to, in degrees, positive values are counter-clockwise, negative values are clockwise"}},"required":["theta"]}}},{"type":"function","function":{"name":"invalid_prompt","description":"call when the user's prompt is invalid","parameters":{"type":"object","properties":{"message":{"type":"string","description":"why the prompt is invalid"}},"required":["message"]}}}],"add_generation_prompt":false}
 {"messages":[{"role":"user","content":"jump high"},{"role":"assistant","content":"","tool_calls":[{"function":{"name":"invalid_prompt","arguments":{"message": "jump is not a valid action"}}}]}],"tools":[{"type":"function","function":{"name":"move","description":"Move to a given location measured in meters","parameters":{"type":"object","properties":{"x":{"type":"number","description":"The x coordinate of the location, negative values are to the left, positive values are to the right"},"y":{"type":"number","description":"The y coordinate of the location, negative values are backward, positive values are forward"}},"required":["x","y"]}}},{"type":"function","function":{"name":"turn","description":"Turn the robot to a given direction","parameters":{"type":"object","properties":{"theta":{"type":"integer","description":"The angle to turn to, in degrees, positive values are counter-clockwise, negative values are clockwise"}},"required":["theta"]}}},{"type":"function","function":{"name":"invalid_prompt","description":"call when the user's prompt is invalid","parameters":{"type":"object","properties":{"message":{"type":"string","description":"why the prompt is invalid"}},"required":["message"]}}}],"add_generation_prompt":false}
-    """.strip().split(
-        "\n"
-    )
+    """.strip().split("\n")
     rows = [json.loads(row) for row in jsons]
     return Dataset.from_list(rows)
 
@@ -28,7 +26,7 @@ def fixture_messages_w_tools():
 @pytest.fixture(name="qwen3_tokenizer")
 def qwen3_tokenizer_fixture(
     download_qwen3_half_billion_model,
-):  # pylint: disable=unused-argument
+):
     tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")
 
     return tokenizer
diff --git a/tests/prompt_strategies/test_chat_templates.py b/tests/prompt_strategies/test_chat_templates.py
index 371ccf616..90e0e274b 100644
--- a/tests/prompt_strategies/test_chat_templates.py
+++ b/tests/prompt_strategies/test_chat_templates.py
@@ -67,9 +67,9 @@ class TestAssistantChatTemplateLlama3:
         # fmt: on
         LOG.debug(f"Expected input_ids: {expected_input_ids}")
         LOG.debug(f"Actual input_ids: {input_ids}")
-        assert (
-            input_ids == expected_input_ids
-        ), f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+        assert input_ids == expected_input_ids, (
+            f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+        )
 
     def test_llama3(self, llama3_tokenizer, assistant_dataset):
         LOG.info("Testing llama-3 with assistant dataset")
@@ -109,9 +109,9 @@ class TestAssistantChatTemplateLlama3:
         # fmt: on
         LOG.debug(f"Expected input_ids: {expected_input_ids}")
         LOG.debug(f"Actual input_ids: {input_ids}")
-        assert (
-            input_ids == expected_input_ids
-        ), f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+        assert input_ids == expected_input_ids, (
+            f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+        )
 
     def test_phi35(self, phi35_tokenizer, assistant_dataset):
         LOG.info("Testing phi-3.5 with assistant dataset")
@@ -161,15 +161,15 @@ class TestAssistantChatTemplateLlama3:
         # fmt: on
         LOG.debug(f"Expected input_ids: {expected_input_ids}")
         LOG.debug(f"Actual input_ids: {input_ids}")
-        assert (
-            input_ids == expected_input_ids
-        ), f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+        assert input_ids == expected_input_ids, (
+            f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+        )
 
         LOG.debug(f"Expected labels : {expected_labels}")
         LOG.debug(f"Actual labels : {labels}")
-        assert (
-            labels == expected_labels
-        ), f"Input IDs mismatch: {labels} != {expected_labels}"
+        assert labels == expected_labels, (
+            f"Input IDs mismatch: {labels} != {expected_labels}"
+        )
 
     def test_llama3_with_training_data(self, llama3_tokenizer, assistant_dataset):
         LOG.info("Testing llama-3 with assistant dataset including training data")
@@ -234,7 +234,7 @@ class TestSharegptChatTemplateLlama3:
 
     def test_llama3_assistant(self, llama3_tokenizer, sharegpt_dataset):
         LOG.info("Testing ShareGPT style datasets with llama-3 assistant prompts")
-        # pylint: disable=duplicate-code
+
         strategy = ChatTemplateStrategy(
             ChatTemplatePrompter(
                 llama3_tokenizer,
@@ -285,16 +285,16 @@ class TestSharegptChatTemplateLlama3:
         LOG.debug(f"Expected labels: {expected_labels}")
         LOG.debug(f"Actual labels: {labels}")
 
-        assert (
-            input_ids == expected_input_ids
-        ), f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
-        assert (
-            labels == expected_labels
-        ), f"Labels mismatch: {labels} != {expected_labels}"
+        assert input_ids == expected_input_ids, (
+            f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+        )
+        assert labels == expected_labels, (
+            f"Labels mismatch: {labels} != {expected_labels}"
+        )
 
     def test_llama3_human(self, llama3_tokenizer, sharegpt_dataset):
         LOG.info("Testing ShareGPT style datasets with llama-3 human prompts")
-        # pylint: disable=duplicate-code
+
         strategy = ChatTemplateStrategy(
             ChatTemplatePrompter(
                 llama3_tokenizer,
@@ -345,16 +345,16 @@ class TestSharegptChatTemplateLlama3:
         LOG.debug(f"Expected labels: {expected_labels}")
         LOG.debug(f"Actual labels: {labels}")
 
-        assert (
-            input_ids == expected_input_ids
-        ), f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
-        assert (
-            labels == expected_labels
-        ), f"Labels mismatch: {labels} != {expected_labels}"
+        assert input_ids == expected_input_ids, (
+            f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+        )
+        assert labels == expected_labels, (
+            f"Labels mismatch: {labels} != {expected_labels}"
+        )
 
     def test_llama3_system_human(self, llama3_tokenizer, basic_dataset):
         LOG.info("Testing ShareGPT style datasets with llama-3 system/human prompts")
-        # pylint: disable=duplicate-code
+
         strategy = ChatTemplateStrategy(
             ChatTemplatePrompter(
                 llama3_tokenizer,
@@ -409,12 +409,12 @@ class TestSharegptChatTemplateLlama3:
         LOG.debug(f"Expected labels: {expected_labels}")
         LOG.debug(f"Actual labels: {labels}")
 
-        assert (
-            input_ids == expected_input_ids
-        ), f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
-        assert (
-            labels == expected_labels
-        ), f"Labels mismatch: {labels} != {expected_labels}"
+        assert input_ids == expected_input_ids, (
+            f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+        )
+        assert labels == expected_labels, (
+            f"Labels mismatch: {labels} != {expected_labels}"
+        )
 
 
 class TestAssistantToolCallingChatTemplateLlama32Vision:
@@ -481,13 +481,13 @@ class TestAssistantToolCallingChatTemplateLlama32Vision:
         ]
         # fmt: on
 
-        assert (
-            input_ids == expected_input_ids
-        ), f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+        assert input_ids == expected_input_ids, (
+            f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+        )
 
-        assert (
-            labels == expected_labels
-        ), f"Labels mismatch: {labels} != {expected_labels}"
+        assert labels == expected_labels, (
+            f"Labels mismatch: {labels} != {expected_labels}"
+        )
 
     def test_llama32vision_train_on_tools(
         self, llama3_tokenizer, toolcalling_dataset, llama3_2_vision_chat_template_jinja
@@ -495,7 +495,6 @@ class TestAssistantToolCallingChatTemplateLlama32Vision:
         LOG.info(
             "Testing assistant style datasets with tool_calling with llama-32 chat template, training on tools"
         )
-        # pylint: disable=duplicate-code
 
         strategy = ChatTemplateStrategy(
             ChatTemplatePrompter(
@@ -549,13 +548,13 @@ class TestAssistantToolCallingChatTemplateLlama32Vision:
         ]
         # fmt: on
 
-        assert (
-            input_ids == expected_input_ids
-        ), f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+        assert input_ids == expected_input_ids, (
+            f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+        )
 
-        assert (
-            labels == expected_labels
-        ), f"Labels mismatch: {labels} != {expected_labels}"
+        assert labels == expected_labels, (
+            f"Labels mismatch: {labels} != {expected_labels}"
+        )
 
 
 if __name__ == "__main__":
diff --git a/tests/prompt_strategies/test_chat_templates_advanced.py b/tests/prompt_strategies/test_chat_templates_advanced.py
index f847cab4a..fd39a4305 100644
--- a/tests/prompt_strategies/test_chat_templates_advanced.py
+++ b/tests/prompt_strategies/test_chat_templates_advanced.py
@@ -2,8 +2,6 @@
 tests for chat_template prompt strategy
 """
 
-# pylint: disable=too-many-lines
-
 from copy import deepcopy
 
 import pytest
@@ -96,9 +94,9 @@ class TestChatTemplateConfigurations:
             and turn.get("from") in ["system", "context"]
             and ("mistral" in tokenizer.name_or_path.lower())
         ):
-            assert (
-                start_idx == -1 and end_idx == -1
-            ), "Expected system message to be skipped"
+            assert start_idx == -1 and end_idx == -1, (
+                "Expected system message to be skipped"
+            )
             return True
         return False
 
@@ -155,7 +153,9 @@ class TestChatTemplateConfigurations:
 
             assert all(
                 label != IGNORE_TOKEN_ID for label in labels[start_idx:end_idx]
-            ), f"Expected labels for input '{response}' to be ignored, but got {labels[start_idx:end_idx]}"
+            ), (
+                f"Expected labels for input '{response}' to be ignored, but got {labels[start_idx:end_idx]}"
+            )
 
         LOG.debug("Full labels: %s", labels)
         LOG.debug("Full input_ids: %s", input_ids)
@@ -215,11 +215,15 @@ class TestChatTemplateConfigurations:
             if is_assistant:
                 assert all(
                     label != IGNORE_TOKEN_ID for label in labels[start_idx:end_idx]
-                ), f"Expected labels for assistant response '{response}' to be set, but got {labels[start_idx:end_idx]}"
+                ), (
+                    f"Expected labels for assistant response '{response}' to be set, but got {labels[start_idx:end_idx]}"
+                )
             else:
                 assert all(
                     label == IGNORE_TOKEN_ID for label in labels[start_idx:end_idx]
-                ), f"Expected labels for human input '{response}' to be IGNORE_TOKEN_ID, but got {labels[start_idx:end_idx]}"
+                ), (
+                    f"Expected labels for human input '{response}' to be IGNORE_TOKEN_ID, but got {labels[start_idx:end_idx]}"
+                )
 
     def test_roles_to_train_human_assistant_only(
         self,
@@ -276,11 +280,15 @@ class TestChatTemplateConfigurations:
             if should_be_labelled:
                 assert all(
                     label != IGNORE_TOKEN_ID for label in labels[start_idx:end_idx]
-                ), f"Expected labels for assistant response '{response}' to be set, but got {labels[start_idx:end_idx]}"
+                ), (
+                    f"Expected labels for assistant response '{response}' to be set, but got {labels[start_idx:end_idx]}"
+                )
             else:
                 assert all(
                     label == IGNORE_TOKEN_ID for label in labels[start_idx:end_idx]
-                ), f"Expected labels for human input '{response}' to be IGNORE_TOKEN_ID, but got {labels[start_idx:end_idx]}"
+                ), (
+                    f"Expected labels for human input '{response}' to be IGNORE_TOKEN_ID, but got {labels[start_idx:end_idx]}"
+                )
 
     def test_roles_to_train_all(
         self,
@@ -327,13 +335,15 @@ class TestChatTemplateConfigurations:
                 continue
 
             decoded_response = tokenizer.decode(input_ids[start_idx:end_idx])
-            assert (
-                response in decoded_response
-            ), f"Response {response} not found in index {start_idx}:{end_idx} decoded:{decoded_response}"
+            assert response in decoded_response, (
+                f"Response {response} not found in index {start_idx}:{end_idx} decoded:{decoded_response}"
+            )
 
             assert all(
                 label != IGNORE_TOKEN_ID for label in labels[start_idx:end_idx]
-            ), f"Expected labels for response '{response}' to be set, but got {labels[start_idx:end_idx]}"
+            ), (
+                f"Expected labels for response '{response}' to be set, but got {labels[start_idx:end_idx]}"
+            )
 
     def test_empty_roles_to_train(
         self,
@@ -371,9 +381,9 @@ class TestChatTemplateConfigurations:
 
         # Verify that no labels are set when roles_to_train is empty
         LOG.debug("Full labels: %s", labels)
-        assert all(
-            label == IGNORE_TOKEN_ID for label in labels
-        ), "Expected all labels to be IGNORE_TOKEN_ID when roles_to_train is empty"
+        assert all(label == IGNORE_TOKEN_ID for label in labels), (
+            "Expected all labels to be IGNORE_TOKEN_ID when roles_to_train is empty"
+        )
 
     def test_train_on_eos_all(
         self,
@@ -417,9 +427,9 @@ class TestChatTemplateConfigurations:
 
         assert len(eos_indices) > 0, "Expected at least one EOS token in the input"
         for eos_idx in eos_indices:
-            assert (
-                labels[eos_idx] != IGNORE_TOKEN_ID
-            ), f"Expected EOS token at index {eos_idx} to be labeled"
+            assert labels[eos_idx] != IGNORE_TOKEN_ID, (
+                f"Expected EOS token at index {eos_idx} to be labeled"
+            )
 
     def test_train_on_eos_turn(
         self,
@@ -477,9 +487,9 @@ class TestChatTemplateConfigurations:
             while eos_idx < len(input_ids) and input_ids[eos_idx] != eos_token_id:
                 eos_idx += 1
 
-            assert eos_idx < len(
-                input_ids
-            ), f"Could not find EOS token after '{response}'"
+            assert eos_idx < len(input_ids), (
+                f"Could not find EOS token after '{response}'"
+            )
 
             LOG.debug(
                 f"Turn {i}: role={turn['from']}, content='{turn['value']}', start_idx={start_idx}, end_idx={end_idx}, eos_idx={eos_idx}"
@@ -492,13 +502,13 @@ class TestChatTemplateConfigurations:
             # Verify EOS token labeling based on role
             is_assistant = turn["from"] == "assistant"
             if is_assistant:
-                assert (
-                    labels[eos_idx] != IGNORE_TOKEN_ID
-                ), f"Expected EOS token after assistant response '{response}' to be labeled"
+                assert labels[eos_idx] != IGNORE_TOKEN_ID, (
+                    f"Expected EOS token after assistant response '{response}' to be labeled"
+                )
             else:
-                assert (
-                    labels[eos_idx] == IGNORE_TOKEN_ID
-                ), f"Expected EOS token after non-assistant input '{response}' to not be labeled"
+                assert labels[eos_idx] == IGNORE_TOKEN_ID, (
+                    f"Expected EOS token after non-assistant input '{response}' to not be labeled"
+                )
 
     def test_train_on_eos_last(
         self,
@@ -545,12 +555,12 @@ class TestChatTemplateConfigurations:
 
         # Check that only the last EOS token is labeled
         for idx in eos_indices[:-1]:
-            assert (
-                labels[idx] == IGNORE_TOKEN_ID
-            ), f"Expected EOS token at index {idx} to not be labeled"
-        assert (
-            labels[last_eos_idx] != IGNORE_TOKEN_ID
-        ), f"Expected last EOS token at index {last_eos_idx} to be labeled"
+            assert labels[idx] == IGNORE_TOKEN_ID, (
+                f"Expected EOS token at index {idx} to not be labeled"
+            )
+        assert labels[last_eos_idx] != IGNORE_TOKEN_ID, (
+            f"Expected last EOS token at index {last_eos_idx} to be labeled"
+        )
 
     def test_train_on_eos_none(
         self,
@@ -594,9 +604,9 @@ class TestChatTemplateConfigurations:
 
         assert len(eos_indices) > 0, "Expected at least one EOS token in the input"
         for eos_idx in eos_indices:
-            assert (
-                labels[eos_idx] == IGNORE_TOKEN_ID
-            ), f"Expected EOS token at index {eos_idx} to not be labeled"
+            assert labels[eos_idx] == IGNORE_TOKEN_ID, (
+                f"Expected EOS token at index {eos_idx} to not be labeled"
+            )
 
     def test_drop_system_message(
         self,
@@ -634,9 +644,9 @@ class TestChatTemplateConfigurations:
         # Check if system message is not present in input_ids
         system_message = "You are an AI assistant."
         decoded_message = tokenizer.decode(input_ids)
-        assert (
-            system_message not in decoded_message
-        ), "Expected system message to be dropped"
+        assert system_message not in decoded_message, (
+            "Expected system message to be dropped"
+        )
 
     def test_custom_roles(
         self,
@@ -711,7 +721,9 @@ class TestChatTemplateConfigurations:
             else:
                 assert all(
                     label == IGNORE_TOKEN_ID for label in labels[start_idx:end_idx]
-                ), f"Expected labels for non-AI message '{response}' to be IGNORE_TOKEN_ID"
+                ), (
+                    f"Expected labels for non-AI message '{response}' to be IGNORE_TOKEN_ID"
+                )
 
     def test_message_field_training(
         self,
@@ -776,13 +788,13 @@ class TestChatTemplateConfigurations:
         def verify_labels(labels_span, should_train, context_message):
             """Helper to verify if a span of labels matches expected training state"""
             if should_train:
-                assert all(
-                    label != IGNORE_TOKEN_ID for label in labels_span
-                ), f"Expected all labels for {context_message} to be set, but got {labels_span}"
+                assert all(label != IGNORE_TOKEN_ID for label in labels_span), (
+                    f"Expected all labels for {context_message} to be set, but got {labels_span}"
+                )
             else:
-                assert all(
-                    label == IGNORE_TOKEN_ID for label in labels_span
-                ), f"Expected all labels for {context_message} to be {IGNORE_TOKEN_ID}, but got {labels_span}"
+                assert all(label == IGNORE_TOKEN_ID for label in labels_span), (
+                    f"Expected all labels for {context_message} to be {IGNORE_TOKEN_ID}, but got {labels_span}"
+                )
 
         # Process all turns and verify labeling
         for i, turn in enumerate(modified_dataset[0]["messages"]):
@@ -861,9 +873,9 @@ class TestChatTemplateConfigurations:
                 actual_labels = labels[
                     start_idx : start_idx + len(token_offsets_masked)
                 ]
-                assert (
-                    actual_labels == expected_labels
-                ), f"Labels mismatch for turn: {turn['value']}\nExpected: {expected_labels}\nActual: {actual_labels}"
+                assert actual_labels == expected_labels, (
+                    f"Labels mismatch for turn: {turn['value']}\nExpected: {expected_labels}\nActual: {actual_labels}"
+                )
 
                 # Verify each detail section
                 for detail in adjusted_train_details:
@@ -958,7 +970,7 @@ class TestChatTemplateConfigurations:
         chat_template,
         chat_template_jinja,
         eos_token,
-        basic_dataset,  # pylint: disable=unused-argument
+        basic_dataset,
         request,
     ):
         """Test that an error is raised when eot_tokens contains eos_token and train_on_eot/train_on_eos conflict"""
@@ -1005,7 +1017,7 @@ class TestChatTemplateConfigurations:
         chat_template,
         chat_template_jinja,
         eos_token,
-        basic_dataset,  # pylint: disable=unused-argument
+        basic_dataset,
         request,
     ):
         """Test that eot_tokens inherits from eos_token when not specified"""
@@ -1032,12 +1044,12 @@ class TestChatTemplateConfigurations:
         )
 
         # In backward compatibility mode, eot_tokens should be derived from eos_token
-        assert strategy.eot_tokens == [
-            tokenizer.eos_token
-        ], f"Expected eot_tokens to inherit from eos_token, got {strategy.eot_tokens}"
-        assert (
-            strategy.train_on_eot == "turn"
-        ), f"Expected train_on_eot to inherit from train_on_eos, got {strategy.train_on_eot}"
+        assert strategy.eot_tokens == [tokenizer.eos_token], (
+            f"Expected eot_tokens to inherit from eos_token, got {strategy.eot_tokens}"
+        )
+        assert strategy.train_on_eot == "turn", (
+            f"Expected train_on_eot to inherit from train_on_eos, got {strategy.train_on_eot}"
+        )
 
     def test_token_not_in_template(
         self,
@@ -1091,7 +1103,7 @@ class TestChatTemplateConfigurations:
         tokenizer,
         chat_template,
         chat_template_jinja,
-        eos_token,  # pylint: disable=unused-argument
+        eos_token,
         basic_dataset,
         request,
     ):
@@ -1157,13 +1169,13 @@ class TestChatTemplateConfigurations:
             )
 
             if is_after_assistant:
-                assert (
-                    labels[eot_idx] != IGNORE_TOKEN_ID
-                ), f"Expected EOT token after assistant turn at index {eot_idx} to be labeled"
+                assert labels[eot_idx] != IGNORE_TOKEN_ID, (
+                    f"Expected EOT token after assistant turn at index {eot_idx} to be labeled"
+                )
             else:
-                assert (
-                    labels[eot_idx] == IGNORE_TOKEN_ID
-                ), f"Expected EOT token not after assistant turn at index {eot_idx} to not be labeled"
+                assert labels[eot_idx] == IGNORE_TOKEN_ID, (
+                    f"Expected EOT token not after assistant turn at index {eot_idx} to not be labeled"
+                )
 
     def test_multiple_train_on_eot_settings(
         self,
@@ -1224,9 +1236,9 @@ class TestChatTemplateConfigurations:
                 i for i, token_id in enumerate(input_ids) if token_id == eos_token_id
             ]
 
-            assert (
-                len(eos_indices) > 0
-            ), "Expected at least one EOS/EOT token in the input"
+            assert len(eos_indices) > 0, (
+                "Expected at least one EOS/EOT token in the input"
+            )
 
             # Check labeling for each EOS/EOT token
             for idx, eos_idx in enumerate(eos_indices):
@@ -1252,13 +1264,13 @@ class TestChatTemplateConfigurations:
                 )
 
                 if expected_label:
-                    assert (
-                        labels[eos_idx] == IGNORE_TOKEN_ID
-                    ), f"Expected EOT token at index {eos_idx} to not be labeled with train_on_eot='{setting}'"
+                    assert labels[eos_idx] == IGNORE_TOKEN_ID, (
+                        f"Expected EOT token at index {eos_idx} to not be labeled with train_on_eot='{setting}'"
+                    )
                 else:
-                    assert (
-                        labels[eos_idx] != IGNORE_TOKEN_ID
-                    ), f"Expected EOT token at index {eos_idx} to be labeled with train_on_eot='{setting}'"
+                    assert labels[eos_idx] != IGNORE_TOKEN_ID, (
+                        f"Expected EOT token at index {eos_idx} to be labeled with train_on_eot='{setting}'"
+                    )
 
 
 class TestChatTemplateToolCalling:
@@ -1378,29 +1390,27 @@ class TestChatTemplateToolCalling:
         decoded_conversation = tokenizer.decode(input_ids)
 
         # Verify tool calling structure is present in the decoded conversation
-        assert (
-            '"type": "function",' in decoded_conversation
-        ), "Tool type function should be in conversation"
-        assert (
-            '"name": "multiples",' in decoded_conversation
-        ), "Tool function name should be in conversation"
+        assert '"type": "function",' in decoded_conversation, (
+            "Tool type function should be in conversation"
+        )
+        assert '"name": "multiples",' in decoded_conversation, (
+            "Tool function name should be in conversation"
+        )
 
         assert (
             '<|python_start|><|python_end|>{"name": "multiples", "parameters": {"number": 5, "limit": 20}}<|eot|>'
             in decoded_conversation
         ), "Assistant tool call should be in conversation"
-        assert (
-            "<|header_start|>ipython<|header_end|>" in decoded_conversation
-        ), "IPython header should be in conversation"
-        assert (
-            '"5,10,15"' in decoded_conversation
-        ), "Tool response should be in conversation"
+        assert "<|header_start|>ipython<|header_end|>" in decoded_conversation, (
+            "IPython header should be in conversation"
+        )
+        assert '"5,10,15"' in decoded_conversation, (
+            "Tool response should be in conversation"
+        )
 
         # Get conversation turns to verify labeling
         turns = strategy.get_conversation_thread(tool_calling_dataset[0])
-        tools = strategy._get_tools(  # pylint: disable=protected-access
-            tool_calling_dataset[0]
-        )
+        tools = strategy._get_tools(tool_calling_dataset[0])
 
         # Check that assistant responses are properly labeled
         for i, turn in enumerate(tool_calling_dataset[0]["messages"]):
@@ -1409,12 +1419,12 @@ class TestChatTemplateToolCalling:
                     turns=turns, turn_idx=i, tools=tools
                 )
 
-                assert (
-                    start_idx != -1 and end_idx != -1
-                ), f"Assistant turn {i} should be found"
+                assert start_idx != -1 and end_idx != -1, (
+                    f"Assistant turn {i} should be found"
+                )
 
                 # Verify that assistant responses have proper labels
                 turn_labels = labels[start_idx:end_idx]
-                assert all(
-                    label != IGNORE_TOKEN_ID for label in turn_labels
-                ), f"Assistant turn {i} should be unmasked"
+                assert all(label != IGNORE_TOKEN_ID for label in turn_labels), (
+                    f"Assistant turn {i} should be unmasked"
+                )
diff --git a/tests/prompt_strategies/test_chat_templates_mistral.py b/tests/prompt_strategies/test_chat_templates_mistral.py
index a5b31a771..85aa72111 100644
--- a/tests/prompt_strategies/test_chat_templates_mistral.py
+++ b/tests/prompt_strategies/test_chat_templates_mistral.py
@@ -28,7 +28,7 @@ def test_mistral_chat_template(
     request: pytest.FixtureRequest,
 ):
     """Test chat template with the Magistral/Devstral tokenizer"""
-    # pylint: disable=duplicate-code
+
     from axolotl.prompt_strategies.chat_template import MistralPrompter, MistralStrategy
 
     tokenizer: HFMistralTokenizer = request.getfixturevalue(tokenizer_str)
diff --git a/tests/prompt_strategies/test_chat_templates_thinking.py b/tests/prompt_strategies/test_chat_templates_thinking.py
index e807111aa..5475666a5 100644
--- a/tests/prompt_strategies/test_chat_templates_thinking.py
+++ b/tests/prompt_strategies/test_chat_templates_thinking.py
@@ -59,7 +59,7 @@ def messages_w_reasoning_fixture():
 @pytest.fixture(name="qwen3_tokenizer")
 def qwen3_tokenizer_fixture(
     download_qwen3_half_billion_model,
-):  # pylint: disable=unused-argument
+):
     tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")
 
     return tokenizer
@@ -71,7 +71,6 @@ class TestSplitThinking:
     """
 
     def test_splits_think(self, messages_w_reasoning, qwen3_tokenizer):
-        # pylint: disable=duplicate-code
         strategy = load(
             qwen3_tokenizer,
             DictDefault(
@@ -130,6 +129,6 @@ class TestSplitThinking:
                 198,  # \n
             ]
             # fmt: on
-            assert (
-                input_ids == expected_input_ids
-            ), f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+            assert input_ids == expected_input_ids, (
+                f"Input IDs mismatch: {input_ids} != {expected_input_ids}"
+            )
diff --git a/tests/prompt_strategies/test_dpo_chat_templates.py b/tests/prompt_strategies/test_dpo_chat_templates.py
index e5f30a6c4..e570cfc9d 100644
--- a/tests/prompt_strategies/test_dpo_chat_templates.py
+++ b/tests/prompt_strategies/test_dpo_chat_templates.py
@@ -16,7 +16,6 @@ from tests.hf_offline_utils import enable_hf_offline
 
 @pytest.fixture(name="assistant_dataset")
 def fixture_assistant_dataset():
-    # pylint: disable=duplicate-code
     return Dataset.from_list(
         [
             {
@@ -49,7 +48,6 @@ def fixture_assistant_dataset():
 
 @pytest.fixture(name="custom_assistant_dataset")
 def fixture_custom_assistant_dataset():
-    # pylint: disable=duplicate-code
     return Dataset.from_list(
         [
             {
@@ -102,7 +100,6 @@ class TestAssistantDPOChatTemplateLlama3:
     """
 
     def test_llama3_defaults(self, llama3_tokenizer, assistant_dataset):
-        # pylint: disable=duplicate-code
         transform_fn, _ = default(
             DictDefault(
                 {
@@ -127,7 +124,6 @@ class TestAssistantDPOChatTemplateLlama3:
         assert result["rejected"] == "party on<|eot_id|>"
 
     def test_llama3_configured(self, llama3_tokenizer, custom_assistant_dataset):
-        # pylint: disable=duplicate-code
         transform_fn, _ = default(
             DictDefault(
                 {
@@ -168,7 +164,6 @@ class TestAssistantDPOChatTemplatePhi3:
     """
 
     def test_phi3_defaults(self, phi3_tokenizer, assistant_dataset):
-        # pylint: disable=duplicate-code
         transform_fn, _ = default(
             DictDefault(
                 {
@@ -198,7 +193,6 @@ class TestAssistantDPOChatTemplateGemma:
     """
 
     def test_gemma_defaults(self, gemma_tokenizer, assistant_dataset):
-        # pylint: disable=duplicate-code
         transform_fn, _ = default(
             DictDefault(
                 {
diff --git a/tests/prompt_strategies/test_stepwise.py b/tests/prompt_strategies/test_stepwise.py
index 2abe4ae18..ad3f7531f 100644
--- a/tests/prompt_strategies/test_stepwise.py
+++ b/tests/prompt_strategies/test_stepwise.py
@@ -20,7 +20,6 @@ class TestStepWiseSupervisedPromptTokenizingStrategy:
 
     @pytest.fixture()
     def stepwise_supervised_dataset(self):
-        # pylint: disable=duplicate-code
         return Dataset.from_list(
             [
                 {
diff --git a/tests/test_chunked_xentropy.py b/tests/test_chunked_xentropy.py
index 3e439f0a3..56ac1b168 100644
--- a/tests/test_chunked_xentropy.py
+++ b/tests/test_chunked_xentropy.py
@@ -22,7 +22,7 @@ def chunked_fixtures():
     return lm_head, hidden_state, labels, vocab_size
 
 
-def test_chunked_forward(chunked_fixtures):  # pylint: disable=redefined-outer-name
+def test_chunked_forward(chunked_fixtures):
     lm_head, hidden_state, labels, vocab_size = chunked_fixtures
     lm_loss = get_causal_lm_loss()
 
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index 719dfdc19..ea5ee368d 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -374,7 +374,6 @@ class TestDatasetPreparation:
             }
         )
 
-        # pylint: disable=duplicate-code
         with patch(
             "axolotl.utils.data.rl.load_dataset_with_config"
         ) as mock_load_dataset:
diff --git a/tests/test_dict.py b/tests/test_dict.py
index 0bcf8ca7b..19a370199 100644
--- a/tests/test_dict.py
+++ b/tests/test_dict.py
@@ -21,26 +21,26 @@ class DictDefaultTest(unittest.TestCase):
             }
         )
 
-        assert (
-            cfg.key_a.key_b == "value_a"
-        ), "DictDefault should return value for existing nested keys"
+        assert cfg.key_a.key_b == "value_a", (
+            "DictDefault should return value for existing nested keys"
+        )
 
-        assert (
-            cfg.key_c == "value_c"
-        ), "DictDefault should return value for existing keys"
+        assert cfg.key_c == "value_c", (
+            "DictDefault should return value for existing keys"
+        )
 
-        assert (
-            cfg.key_d[0] == "value_d"
-        ), "DictDefault should return value for existing keys in list"
+        assert cfg.key_d[0] == "value_d", (
+            "DictDefault should return value for existing keys in list"
+        )
 
-        assert (
-            "value_e" in cfg.key_d
-        ), "DictDefault should support in operator for existing keys in list"
+        assert "value_e" in cfg.key_d, (
+            "DictDefault should support in operator for existing keys in list"
+        )
 
     def test_dict_or_operator(self):
         cfg = DictDefault({"key_a": {"key_b": "value_b"}, "key_f": "value_g"})
 
-        cfg = cfg | DictDefault(  # pylint: disable=unsupported-binary-operation
+        cfg = cfg | DictDefault(
             {
                 "key_a": {"key_b": "value_a"},
                 "key_c": "value_c",
@@ -49,9 +49,9 @@ class DictDefaultTest(unittest.TestCase):
             }
         )
 
-        assert (
-            cfg.key_a.key_b == "value_b"
-        ), "DictDefault should support OR operator for existing nested keys"
+        assert cfg.key_a.key_b == "value_b", (
+            "DictDefault should support OR operator for existing nested keys"
+        )
 
         assert cfg.key_c == "value_c", "DictDefault should not delete existing key"
 
@@ -60,9 +60,9 @@ class DictDefaultTest(unittest.TestCase):
             "value_e",
         ], "DictDefault should not overwrite existing keys in list"
 
-        assert (
-            cfg.key_f == "value_g"
-        ), "DictDefault should support OR operator for existing key"
+        assert cfg.key_f == "value_g", (
+            "DictDefault should support OR operator for existing key"
+        )
 
     def test_dict_missingkey(self):
         cfg = DictDefault({})
@@ -72,9 +72,9 @@ class DictDefaultTest(unittest.TestCase):
     def test_dict_or(self):
         cfg = DictDefault({}) | DictDefault({})
 
-        assert (
-            cfg.random_key is None
-        ), "DictDefault should return None for missing keys after | operation"
+        assert cfg.random_key is None, (
+            "DictDefault should return None for missing keys after | operation"
+        )
 
     def test_dict_nested_missingparentkey(self):
         """
diff --git a/tests/test_exact_deduplication.py b/tests/test_exact_deduplication.py
index d97aad8ea..65deb5209 100644
--- a/tests/test_exact_deduplication.py
+++ b/tests/test_exact_deduplication.py
@@ -41,9 +41,9 @@ def verify_deduplication(actual_dataset, expected_dataset, dataset_name):
     assert actual_rows == expected_rows, f"Mismatch in {dataset_name} dataset"
 
     # Verify size consistency
-    assert len(actual_rows) == len(
-        actual_dataset
-    ), f"Size mismatch in {dataset_name} dataset after deduplication"
+    assert len(actual_rows) == len(actual_dataset), (
+        f"Size mismatch in {dataset_name} dataset after deduplication"
+    )
 
 
 class TestDeduplicateIndividualFunctions(unittest.TestCase):
@@ -224,7 +224,6 @@ class TestDeduplicateRLDataset:
     ):
         """Verify that loading with deduplication removes duplicates."""
 
-        # pylint: disable=duplicate-code
         with (
             patch(
                 "axolotl.utils.data.rl.load_dataset_with_config"
@@ -251,7 +250,6 @@ class TestDeduplicateRLDataset:
         dataset_fozziethebeat_alpaca_messages_2k_dpo_test_rev_ea82cff,
         tokenizer_huggyllama,
     ):
-        # pylint: disable=duplicate-code
         with (
             patch(
                 "axolotl.utils.data.rl.load_dataset_with_config"
@@ -271,9 +269,9 @@ class TestDeduplicateRLDataset:
             train_dataset, _ = prepare_preference_datasets(cfg, tokenizer)
 
             # Verify that the dataset retains duplicates
-            assert (
-                len(train_dataset) == 1800 * 2
-            ), "Dataset deduplication occurred when it should not have"
+            assert len(train_dataset) == 1800 * 2, (
+                "Dataset deduplication occurred when it should not have"
+            )
 
 
 class TestDeduplicateNonRL(unittest.TestCase):
diff --git a/tests/test_loaders.py b/tests/test_loaders.py
index d45f41998..f516d0ca4 100644
--- a/tests/test_loaders.py
+++ b/tests/test_loaders.py
@@ -17,7 +17,7 @@ class TestModelsUtils:
 
     def setup_method(self) -> None:
         # load config
-        self.cfg = DictDefault(  # pylint: disable=attribute-defined-outside-init
+        self.cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
                 "model_type": "AutoModelForCausalLM",
@@ -30,20 +30,16 @@ class TestModelsUtils:
                 "device_map": "auto",
             }
         )
-        self.tokenizer = MagicMock(  # pylint: disable=attribute-defined-outside-init
-            spec=PreTrainedTokenizerBase
-        )
-        self.inference = False  # pylint: disable=attribute-defined-outside-init
-        self.reference_model = True  # pylint: disable=attribute-defined-outside-init
+        self.tokenizer = MagicMock(spec=PreTrainedTokenizerBase)
+        self.inference = False
+        self.reference_model = True
 
         # init ModelLoader
-        self.model_loader = (  # pylint: disable=attribute-defined-outside-init
-            ModelLoader(
-                cfg=self.cfg,
-                tokenizer=self.tokenizer,
-                inference=self.inference,
-                reference_model=self.reference_model,
-            )
+        self.model_loader = ModelLoader(
+            cfg=self.cfg,
+            tokenizer=self.tokenizer,
+            inference=self.inference,
+            reference_model=self.reference_model,
         )
 
     def test_set_device_map_config(self):
@@ -51,7 +47,7 @@ class TestModelsUtils:
         device_map = self.cfg.device_map
         if is_torch_mps_available():
             device_map = "mps"
-        # pylint: disable=protected-access
+
         self.model_loader._set_device_map_config()
         if is_deepspeed_zero3_enabled():
             assert "device_map" not in self.model_loader.model_kwargs
@@ -78,7 +74,6 @@ class TestModelsUtils:
         self.cfg.gptq = gptq
         self.cfg.adapter = adapter
 
-        # pylint: disable=protected-access
         self.model_loader._set_quantization_config()
         if "quantization_config" in self.model_loader.model_kwargs or self.cfg.gptq:
             assert not (
@@ -194,7 +189,7 @@ class TestModelsUtils:
         is_fsdp,
         expected,
     ):
-        res = _get_parallel_config_kwargs(  # pylint: disable=protected-access
+        res = _get_parallel_config_kwargs(
             world_size,
             tensor_parallel_size,
             context_parallel_size,
diff --git a/tests/test_lora.py b/tests/test_lora.py
index 6edcdd88e..50cbea9bc 100644
--- a/tests/test_lora.py
+++ b/tests/test_lora.py
@@ -6,7 +6,6 @@ from axolotl.loaders import ModelLoader, load_tokenizer
 from axolotl.utils.config import normalize_config, validate_config
 from axolotl.utils.dict import DictDefault
 
-# pylint: disable=duplicate-code
 minimal_config = DictDefault(
     {
         "base_model": "HuggingFaceTB/SmolLM2-135M",
diff --git a/tests/test_packed_batch_sampler.py b/tests/test_packed_batch_sampler.py
index d839c6ea3..a5db7cbe0 100644
--- a/tests/test_packed_batch_sampler.py
+++ b/tests/test_packed_batch_sampler.py
@@ -93,7 +93,7 @@ class TestBatchedSamplerPacking:
         loader = DataLoader(
             train_dataset,
             batch_sampler=batch_sampler,
-            collate_fn=V2BatchSamplerDataCollatorForSeq2Seq(  # pylint: disable=unexpected-keyword-arg
+            collate_fn=V2BatchSamplerDataCollatorForSeq2Seq(
                 tokenizer=tokenizer,
                 padding=True,
                 pad_to_multiple_of=max_seq_length,
diff --git a/tests/test_packed_dataset.py b/tests/test_packed_dataset.py
index 699d5e6cc..43e4f3d39 100644
--- a/tests/test_packed_dataset.py
+++ b/tests/test_packed_dataset.py
@@ -26,7 +26,6 @@ class TestPacking(unittest.TestCase):
 
     @enable_hf_offline
     def setUp(self) -> None:
-        # pylint: disable=duplicate-code
         self.tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b")
         self.tokenizer.add_special_tokens(
             {
@@ -75,7 +74,6 @@ class TestPacking(unittest.TestCase):
 
     @with_temp_dir
     def test_lora_packing(self, temp_dir):
-        # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
                 "base_model": "HuggingFaceTB/SmolLM2-135M",
@@ -127,9 +125,7 @@ class TestPacking(unittest.TestCase):
             _,
         ) = setup_model_and_trainer(cfg, dataset_meta)
 
-        sampler = trainer._get_eval_sampler(  # pylint: disable=protected-access
-            trainer.eval_dataset
-        )
+        sampler = trainer._get_eval_sampler(trainer.eval_dataset)
         assert "MultipackBatchSampler" in sampler.__class__.__name__
         assert (
             "V2BatchSamplerDataCollatorForSeq2Seq"
@@ -140,9 +136,7 @@ class TestPacking(unittest.TestCase):
         batch = next(dataloader_iter)
         assert batch["input_ids"].shape == (1, 8192)
 
-        sampler = trainer._get_train_sampler(  # pylint: disable=protected-access
-            trainer.train_dataset
-        )
+        sampler = trainer._get_train_sampler(trainer.train_dataset)
         assert "MultipackBatchSampler" in sampler.__class__.__name__
         assert (
             "V2BatchSamplerDataCollatorForSeq2Seq"
diff --git a/tests/test_packed_pretraining.py b/tests/test_packed_pretraining.py
index 115813df2..117bc0dbd 100644
--- a/tests/test_packed_pretraining.py
+++ b/tests/test_packed_pretraining.py
@@ -76,7 +76,6 @@ class TestPretrainingPacking:
             cfg.pretraining_dataset[0]["type"] or "pretrain",
         )
 
-        # pylint: disable=duplicate-code
         original_bsz = cfg.micro_batch_size
         train_dataset = wrap_pretraining_dataset(
             dataset,
diff --git a/tests/test_perplexity.py b/tests/test_perplexity.py
index 9a1c9b223..8f4306994 100644
--- a/tests/test_perplexity.py
+++ b/tests/test_perplexity.py
@@ -1,7 +1,5 @@
 """unit tests for perplexity eval callback"""
 
-# pylint: disable=redefined-outer-name
-
 from pytest import fixture
 from transformers.models.auto.modeling_auto import AutoModelForCausalLM
 from transformers.models.auto.tokenization_auto import AutoTokenizer
diff --git a/tests/test_prompt_tokenizers.py b/tests/test_prompt_tokenizers.py
index 5e5de4ff8..672643a92 100644
--- a/tests/test_prompt_tokenizers.py
+++ b/tests/test_prompt_tokenizers.py
@@ -64,7 +64,7 @@ class TestPromptTokenizationStrategies:
         tests the interface between the user and assistant parts
         """
         prompter = NoSystemPrompter()
-        # pylint: disable=duplicate-code
+
         strat = AlpacaPromptTokenizingStrategy(
             prompter,
             tokenizer_huggyllama_w_special_tokens,
@@ -85,7 +85,7 @@ class TestPromptTokenizationStrategies:
         """
         tests the interface between the user and assistant parts
         """
-        # pylint: disable=duplicate-code
+
         prompter = AlpacaPrompter()
         strat = AlpacaPromptTokenizingStrategy(
             prompter,
@@ -171,7 +171,7 @@ class Llama2ChatTokenizationTest:
         # from transformers.models.llama.tokenization_llama import DEFAULT_SYSTEM_PROMPT
         # broken as of 23/7/20
         # see https://github.com/huggingface/transformers/pull/24935
-        # pylint: disable=C0103
+
         DEFAULT_SYSTEM_PROMPT = """\
 You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
 
@@ -201,7 +201,7 @@ If a question does not make any sense, or is not factually coherent, explain why
             + user_input[1:-1],
             generated_responses=answers,
         )
-        # pylint: disable=W0212
+
         hf_tokens = tokenizer_llama2_7b._build_conversation_input_ids(hf_conf)
 
         assert hf_tokens == tokenized_conversation["input_ids"][: len(hf_tokens)]
diff --git a/tests/test_schedulers.py b/tests/test_schedulers.py
index 92664cca8..c783a68db 100644
--- a/tests/test_schedulers.py
+++ b/tests/test_schedulers.py
@@ -22,7 +22,7 @@ class TestCosineConstantLr(unittest.TestCase):
         self.constant_lr_ratio = 0.8
         self._lr = 0.01
         self.optimizer = SGD([torch.tensor(1)], lr=self._lr)
-        self.lr_scheduler = get_cosine_schedule_with_warmup_decay_constant(  # pylint: disable=attribute-defined-outside-init
+        self.lr_scheduler = get_cosine_schedule_with_warmup_decay_constant(
             self.optimizer,
             num_warmup_steps=self.warmup_steps,
             num_training_steps=self.train_steps,
diff --git a/tests/test_validation_dataset.py b/tests/test_validation_dataset.py
index 1a4c97314..3d3b5db96 100644
--- a/tests/test_validation_dataset.py
+++ b/tests/test_validation_dataset.py
@@ -24,7 +24,6 @@ def fixture_cfg():
     )
 
 
-# pylint: disable=too-many-public-methods (duplicate-code)
 class BaseValidation:
     """
     Base validation module to setup the log capture
diff --git a/tests/utils/schemas/validation/test_fsdp.py b/tests/utils/schemas/validation/test_fsdp.py
index 5b461a113..08fc50c61 100644
--- a/tests/utils/schemas/validation/test_fsdp.py
+++ b/tests/utils/schemas/validation/test_fsdp.py
@@ -2,7 +2,6 @@
 tests for pydantic fsdp validation
 """
 
-# pylint: disable=too-many-boolean-expressions
 import pytest
 
 from axolotl.utils.config import validate_config