set max steps to -1 when empty

don't set total num steps for grpo
cleanup pythonpath if axo in it
2025-02-06 17:27:52 -05:00 · 2025-02-06 17:23:13 -05:00 · 2025-02-06 17:03:21 -05:00 · 2025-02-06 17:01:19 -05:00 · 2025-02-06 14:00:15 -05:00 · 2025-02-06 13:50:39 -05:00
49 changed files with 565 additions and 11386 deletions
--- a/.github/workflows/base.yml
+++ b/.github/workflows/base.yml
@@ -22,6 +22,12 @@ jobs:
      fail-fast: false
      matrix:
        include:
+          - cuda: "124"
+            cuda_version: 12.4.1
+            cudnn_version: ""
+            python_version: "3.10"
+            pytorch: 2.4.1
+            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
          - cuda: "124"
            cuda_version: 12.4.1
            cudnn_version: ""
@@ -34,12 +40,6 @@ jobs:
            python_version: "3.11"
            pytorch: 2.5.1
            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
-          - cuda: "124"
-            cuda_version: 12.4.1
-            cudnn_version: ""
-            python_version: "3.11"
-            pytorch: 2.6.0
-            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
    steps:
      - name: Checkout
        uses: actions/checkout@v4
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -19,7 +19,7 @@ jobs:
        - name: Setup Python
          uses: actions/setup-python@v5
          with:
-            python-version: '3.11'
+            python-version: '3.10'
        - name: install dependencies
          run: |
            python3 -m pip install jupyter
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -19,6 +19,6 @@ jobs:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
-          python-version: "3.11"
+          python-version: "3.10"
          cache: 'pip' # caching pip dependencies
      - uses: pre-commit/action@v3.0.1
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -26,11 +26,6 @@ jobs:
            pytorch: 2.5.1
            axolotl_extras:
            is_latest: true
-          - cuda: 124
-            cuda_version: 12.4.1
-            python_version: "3.11"
-            pytorch: 2.6.0
-            axolotl_extras:
    runs-on: axolotl-gpu-runner
    steps:
      - name: Checkout
--- a/.github/workflows/multi-gpu-e2e.yml
+++ b/.github/workflows/multi-gpu-e2e.yml
@@ -34,13 +34,6 @@ jobs:
            axolotl_extras:
            num_gpus: 2
            nightly_build: "true"
-          - cuda: 124
-            cuda_version: 12.4.1
-            python_version: "3.11"
-            pytorch: 2.6.0
-            axolotl_extras:
-            num_gpus: 2
-            nightly_build: "true"
    runs-on: [self-hosted, modal]
    timeout-minutes: 120
    steps:
@@ -49,7 +42,7 @@ jobs:
      - name: Install Python
        uses: actions/setup-python@v5
        with:
-          python-version: "3.11"
+          python-version: "3.10"
      - name: Install Modal
        run: |
          python -m pip install --upgrade pip
--- a/.github/workflows/nightlies.yml
+++ b/.github/workflows/nightlies.yml
@@ -22,11 +22,6 @@ jobs:
            python_version: "3.11"
            pytorch: 2.5.1
            axolotl_extras:
-          - cuda: 124
-            cuda_version: 12.4.1
-            python_version: "3.11"
-            pytorch: 2.6.0
-            axolotl_extras:
    runs-on: axolotl-gpu-runner
    steps:
      - name: Checkout
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@@ -36,7 +36,7 @@ jobs:
      - name: Setup Python
        uses: actions/setup-python@v5
        with:
-          python-version: "3.11"
+          python-version: "3.10"

      - name: Install dependencies
        run: |
--- a/.github/workflows/tests-nightly.yml
+++ b/.github/workflows/tests-nightly.yml
@@ -12,7 +12,7 @@ jobs:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
-          python-version: "3.11"
+          python-version: "3.10"
          cache: 'pip' # caching pip dependencies
      - uses: pre-commit/action@v3.0.1
        env:
@@ -25,8 +25,13 @@ jobs:
      fail-fast: false
      max-parallel: 2
      matrix:
-        python_version: ["3.11"]
-        pytorch_version: ["2.4.1", "2.5.1", "2.6.0"]
+        python_version: ["3.10", "3.11"]
+        pytorch_version: ["2.4.1", "2.5.1"]
+        exclude:
+          - python_version: "3.10"
+            pytorch_version: "2.4.1"
+          - python_version: "3.10"
+            pytorch_version: "2.5.1"
    timeout-minutes: 20

    steps:
@@ -107,20 +112,13 @@ jobs:
            num_gpus: 1
            axolotl_extras:
            nightly_build: "true"
-          - cuda: 124
-            cuda_version: 12.4.1
-            python_version: "3.11"
-            pytorch: 2.6.0
-            num_gpus: 1
-            axolotl_extras:
-            nightly_build: "true"
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Install Python
        uses: actions/setup-python@v5
        with:
-          python-version: "3.11"
+          python-version: "3.10"
      - name: Install Modal
        run: |
          python -m pip install --upgrade pip
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -35,7 +35,7 @@ jobs:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
-          python-version: "3.11"
+          python-version: "3.10"
          cache: 'pip' # caching pip dependencies
      - uses: pre-commit/action@v3.0.1
        env:
@@ -48,8 +48,13 @@ jobs:
      fail-fast: false
      max-parallel: 2
      matrix:
-        python_version: ["3.11"]
-        pytorch_version: ["2.4.1", "2.5.1", "2.6.0"]
+        python_version: ["3.10", "3.11"]
+        pytorch_version: ["2.4.1", "2.5.1"]
+        exclude:
+          - python_version: "3.10"
+            pytorch_version: "2.4.1"
+          - python_version: "3.10"
+            pytorch_version: "2.5.1"
    timeout-minutes: 20

    steps:
@@ -122,7 +127,7 @@ jobs:
      max-parallel: 1
      matrix:
        python_version: ["3.11"]
-        pytorch_version: ["2.4.1", "2.5.1", "2.6.0"]
+        pytorch_version: ["2.4.1", "2.5.1"]
    timeout-minutes: 20

    steps:
@@ -211,7 +216,7 @@ jobs:
      - name: Install Python
        uses: actions/setup-python@v5
        with:
-          python-version: "3.11"
+          python-version: "3.10"
      - name: Install Modal
        run: |
          python -m pip install --upgrade pip
@@ -246,19 +251,13 @@ jobs:
            pytorch: 2.4.1
            num_gpus: 1
            axolotl_extras:
-          - cuda: 124
-            cuda_version: 12.4.1
-            python_version: "3.11"
-            pytorch: 2.6.0
-            num_gpus: 1
-            axolotl_extras:
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Install Python
        uses: actions/setup-python@v5
        with:
-          python-version: "3.11"
+          python-version: "3.10"
      - name: Install Modal
        run: |
          python -m pip install --upgrade pip
--- a/README.md
+++ b/README.md
@@ -51,7 +51,7 @@ Features:

 **Requirements**:
 - NVIDIA GPU (Ampere or newer for `bf16` and Flash Attention) or AMD GPU
- Python 3.11
+- Python ≥3.10
 - PyTorch ≥2.4.1

 ### Installation
--- a/cicd/Dockerfile.jinja
+++ b/cicd/Dockerfile.jinja
@@ -32,9 +32,9 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
    fi

 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
+        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,ray,vllm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
    else \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
+        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,ray,vllm] $AXOLOTL_ARGS; \
    fi

 RUN python scripts/unsloth_install.py | sh
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -20,9 +20,9 @@ WORKDIR /workspace/axolotl

 # If AXOLOTL_EXTRAS is set, append it in brackets
 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
+        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,ray,vllm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
    else \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
+        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,ray,vllm] $AXOLOTL_ARGS; \
    fi

 RUN python scripts/unsloth_install.py | sh
--- a/docs/config.qmd
+++ b/docs/config.qmd
@@ -46,10 +46,6 @@ overrides_of_model_config:
    type: # linear | dynamic
    factor: # float

-# optional overrides the base model loading from_pretrained
-overrides_of_model_kwargs:
-  # use_cache: False
-
 # optional overrides to the bnb 4bit quantization configuration
 # https://huggingface.co/docs/transformers/main/main_classes/quantization#transformers.BitsAndBytesConfig
 bnb_config_kwargs:
--- a/docs/faq.qmd
+++ b/docs/faq.qmd
@@ -19,7 +19,3 @@ description: Frequently asked questions
 **Q: AttributeError: 'DummyOptim' object has no attribute 'step'**

 > A: You may be using deepspeed with single gpu. Please don't set `deepspeed:` in yaml or cli.
-
-**Q: The codes is stuck on saving preprocessed datasets.**
-
-> A: This is usually an issue with the GPU. This can be resolved through setting the os environment variable `CUDA_VISIBLE_DEVICES=0`. If you are on runpod, this is usually a pod issue. Starting a new pod should take care of it.
--- a/docs/multi-node.qmd
+++ b/docs/multi-node.qmd
@@ -3,18 +3,6 @@ title: Multi Node
 description: How to use Axolotl on multiple machines
 ---

-The below are three ways to train multi-node in Axolotl.
-
-::: {.callout-important}
-Each machine needs a copy of Axolotl, we suggest using the same commit to ensure compatibility.
-
-You will also need to have the same configuration file for your model on each machine.
-
-Make sure the main machine is reachable by other machines.
-:::
-
-# Accelerate
-
 You will need to create a configuration for accelerate, either by using `accelerate config` and follow the instructions or you can use one of the preset below:

 ~/.cache/huggingface/accelerate/default_config.yaml
@@ -38,7 +26,7 @@ tpu_use_sudo: false
 use_cpu: false
 ```

-Configure your model to use FSDP in the Axolotl yaml. For example:
+Configure your model to use FSDP with for example:
 ```yaml
 fsdp:
  - full_shard
@@ -49,40 +37,12 @@ fsdp_config:
  fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
 ```

+## Machine configuration
+
+On each machine you need a copy of Axolotl, we suggest using the same commit to ensure compatibility.
+
+You will also need to have the same configuration file for your model on each machine.
+
+On the main machine only, make sure the port you set as `main_process_port` is open in TCP and reachable by other machines.
+
 All you have to do now is launch using accelerate as you would usually do on each machine and voila, the processes will start once you have launched accelerate on every machine.
-
-# Raytrain
-
-Please see ray train doc [here](ray-integration.qmd).
-
-# Torchrun
-
-If you are using Infiniband, we recommend torchrun to utilize the full bandwidth.
-
-Set the following env (change buffersize/socketname depending on your system):
-
-```yaml
-export NCCL_IB_DISABLE=0
-export NCCL_SOCKET_IFNAME="eth0,en,eth,em,bond"
-export NCCL_BUFFSIZE=2097152
-```
-
-Run the following on each node:
-
-```bash
-torchrun --nnodes $num_nodes --nproc_per_node $gpu_per_node --rdzv_id $rdzv_id --rdzv_backend c10d --rdzv_endpoint "$head_node_ip:$head_node_port" -m axolotl.cli.train config.yaml
-```
-
-Please make sure to substitute the placeholder variables.
-
- `num_nodes`: Number of nodes (containing GPUs)
- `gpu_per_node`: Number of gpus per node
- `head_node_ip`: IP of the head node (make sure other machines can connect to this)
- `head_node_port`: Port of the head node (make sure other machines can connect to this. Default 29400)
- `rdzv_id`: A unique job ID that is used by the job across nodes.
-
-::: {.callout-note}
-You need to call `axolotl.cli.train` instead of `axolotl train` as the latter calls accelerate under the hood
-:::
-
-More info on the available configs can be found on the Pytorch docs [here](https://pytorch.org/docs/stable/elastic/run.html)
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,10 @@
 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/

 # START section of dependencies that don't install on Darwin/MacOS
-bitsandbytes==0.45.2
+bitsandbytes==0.45.1
 triton>=3.0.0
 mamba-ssm==1.2.0.post1
-flash-attn==2.7.4.post1
+flash-attn==2.7.0.post2
 xformers>=0.0.23.post1
 autoawq==0.2.7.post3
 liger-kernel==0.5.2
@@ -13,12 +13,12 @@ liger-kernel==0.5.2
 packaging==23.2

 peft==0.14.0
-transformers==4.48.3
+transformers==4.48.2
 tokenizers>=0.21.0
 accelerate==1.3.0
 datasets==3.2.0
 deepspeed==0.16.1
-trl==0.13.0
+trl==0.14.0

 optimum==1.16.2
 hf_transfer
@@ -26,7 +26,7 @@ sentencepiece
 gradio==3.50.2

 modal==0.70.5
-pydantic==2.6.3
+pydantic==2.10.6
 addict
 fire
 PyYAML>=6.0
--- a/setup.py
+++ b/setup.py
@@ -71,15 +71,12 @@ def parse_requirements():
            else:
                raise ValueError("Invalid version format")

-            if (major, minor) >= (2, 6):
-                _install_requires.pop(_install_requires.index(xformers_version))
-                _install_requires.append("xformers==0.0.29.post2")
-            elif (major, minor) >= (2, 5):
+            if (major, minor) >= (2, 5):
                _install_requires.pop(_install_requires.index(xformers_version))
                if patch == 0:
                    _install_requires.append("xformers==0.0.28.post2")
                else:
-                    _install_requires.append("xformers==0.0.29")
+                    _install_requires.append("xformers==0.0.28.post3")
                _install_requires.pop(_install_requires.index(autoawq_version))
            elif (major, minor) >= (2, 4):
                if patch == 0:
@@ -156,5 +153,8 @@ setup(
        "ray": [
            "ray[train]",
        ],
+        "vllm": [
+            "vllm>=0.7.1",
+        ],
    },
 )
--- a/src/axolotl/cli/cloud/init.py
+++ b/src/axolotl/cli/cloud/init.py
@@ -35,13 +35,18 @@ def do_cli_train(
    cloud_config: Union[Path, str],
    config: Union[Path, str],
    accelerate: bool = True,
+    cwd=None,
+    **kwargs,
 ) -> None:
    print_axolotl_text_art()
    cloud_cfg = load_cloud_cfg(cloud_config)
    cloud = ModalCloud(cloud_cfg)
    with open(config, "r", encoding="utf-8") as file:
        config_yaml = file.read()
-    cloud.train(config_yaml, accelerate=accelerate)
+    local_dirs = {}
+    if cwd and not Path(cwd).joinpath("src", "axolotl").exists():
+        local_dirs = {"/workspace/mounts": cwd}
+    cloud.train(config_yaml, accelerate=accelerate, local_dirs=local_dirs, **kwargs)


 def do_cli_lm_eval(
--- a/src/axolotl/cli/cloud/modal_.py
+++ b/src/axolotl/cli/cloud/modal_.py
@@ -7,6 +7,7 @@ import os
 import subprocess  # nosec B404
 from pathlib import Path
 from random import randint
+from typing import Optional

 import modal

@@ -22,8 +23,18 @@ def run_cmd(cmd: str, run_folder: str, volumes=None):

    # modal workaround so it doesn't use the automounted axolotl
    new_env = copy.deepcopy(os.environ)
+
    if "PYTHONPATH" in new_env:
-        del new_env["PYTHONPATH"]
+        paths = ["/workspace/mounts"]
+        for sub_python_path_str in new_env["PYTHONPATH"].split(":"):
+            sub_python_path = Path(sub_python_path_str)
+            if not sub_python_path.joinpath("src", "axolotl").exists():
+                # we don't want to use the automounted axolotl or unexpected behavior happens
+                paths.append(str(sub_python_path))
+        if paths:
+            new_env["PYTHONPATH"] = ":".join(paths)
+        else:
+            del new_env["PYTHONPATH"]

    # Propagate errors from subprocess.
    if exit_code := subprocess.call(  # nosec B603
@@ -203,9 +214,12 @@ class ModalCloud(Cloud):
            memory = int(self.config.memory)
        return 1024 * memory

-    def get_train_env(self):
+    def get_train_env(self, local_dirs=None):
+        image = self.get_image()
+        for mount, local_dir in (local_dirs or {}).items():
+            image = image.add_local_dir(local_dir, mount)
        return self.app.function(
-            image=self.get_image(),
+            image=image,
            volumes={k: v[0] for k, v in self.volumes.items()},
            cpu=16.0,
            gpu=self.get_train_gpu(),
@@ -214,14 +228,21 @@ class ModalCloud(Cloud):
            secrets=self.get_secrets(),
        )

-    def train(self, config_yaml: str, accelerate: bool = True):
-        modal_fn = self.get_train_env()(_train)
+    def train(
+        self,
+        config_yaml: str,
+        accelerate: bool = True,
+        local_dirs: Optional[dict[str, str]] = None,
+        **kwargs,
+    ):
+        modal_fn = self.get_train_env(local_dirs)(_train)
        with modal.enable_output():
            with self.app.run(detach=True):
                modal_fn.remote(
                    config_yaml,
                    accelerate=accelerate,
                    volumes={k: v[0] for k, v in self.volumes.items()},
+                    **kwargs,
                )

    def lm_eval(self, config_yaml: str):
@@ -252,7 +273,7 @@ def _preprocess(config_yaml: str, volumes=None):
    )


-def _train(config_yaml: str, accelerate: bool = True, volumes=None):
+def _train(config_yaml: str, accelerate: bool = True, volumes=None, **kwargs):
    with open(
        "/workspace/artifacts/axolotl/config.yaml", "w", encoding="utf-8"
    ) as f_out:
@@ -262,8 +283,11 @@ def _train(config_yaml: str, accelerate: bool = True, volumes=None):
        accelerate_args = "--accelerate"
    else:
        accelerate_args = "--no-accelerate"
+    num_processes_args = ""
+    if num_processes := kwargs.pop("num_processes", None):
+        num_processes_args = f"--num-processes {num_processes}"
    run_cmd(
-        f"axolotl train {accelerate_args} /workspace/artifacts/axolotl/config.yaml",
+        f"axolotl train {accelerate_args} {num_processes_args} /workspace/artifacts/axolotl/config.yaml",
        run_folder,
        volumes,
    )
--- a/src/axolotl/cli/main.py
+++ b/src/axolotl/cli/main.py
@@ -2,6 +2,7 @@
 # pylint: disable=redefined-outer-name

 import logging
+import os
 import random
 import subprocess  # nosec B404
 import tempfile
@@ -12,6 +13,7 @@ from typing import Optional

 import click
 import yaml
+from dotenv import load_dotenv

 import axolotl
 from axolotl.cli.args import EvaluateCliArgs, PreprocessCliArgs, TrainerCliArgs
@@ -199,7 +201,10 @@ def train(
        try:
            if accelerate:
                if cloud:
-                    do_cli_train(cloud_config=cloud, config=config, accelerate=True)
+                    cwd = os.getcwd()
+                    do_cli_train(
+                        cloud_config=cloud, config=config, accelerate=True, cwd=cwd, **kwargs
+                    )
                else:
                    accelerate_args = []
                    if "main_process_port" in kwargs:
@@ -208,7 +213,7 @@ def train(
                        accelerate_args.append(str(main_process_port))
                    if "num_processes" in kwargs:
                        num_processes = kwargs.pop("num_processes", None)
-                        accelerate_args.append("--num-processes")
+                        accelerate_args.append("--num_processes")
                        accelerate_args.append(str(num_processes))

                    base_cmd = ["accelerate", "launch"]
@@ -220,7 +225,9 @@ def train(
                    subprocess.run(cmd, check=True)  # nosec B603
            else:
                if cloud:
-                    do_cli_train(cloud_config=cloud, config=config, accelerate=False)
+                    do_cli_train(
+                        cloud_config=cloud, config=config, accelerate=False, **kwargs
+                    )
                else:
                    from axolotl.cli.train import do_cli

@@ -381,4 +388,5 @@ def main():


 if __name__ == "__main__":
+    load_dotenv()
    main()
--- a/src/axolotl/common/datasets.py
+++ b/src/axolotl/common/datasets.py
@@ -122,9 +122,11 @@ def load_preference_datasets(
        `total_num_steps`.
    """
    train_dataset, eval_dataset = load_prepare_preference_datasets(cfg)
-    total_num_steps = int(
+    total_num_steps: Optional[int] = int(
        math.ceil(len(train_dataset) * cfg.num_epochs / cfg.batch_size)
    )
+    if cfg.rl == "grpo":
+        total_num_steps = None

    if cli_args.debug or cfg.debug:
        LOG.info("check_dataset_labels...")
--- a/src/axolotl/core/trainer_builder.py
+++ b/src/axolotl/core/trainer_builder.py
@@ -39,7 +39,6 @@ from trl.trainer.utils import RewardDataCollatorWithPadding

 from axolotl.core.trainers.base import (
    AxolotlCPOTrainer,
-    AxolotlDPOTrainer,
    AxolotlKTOTrainer,
    AxolotlMambaTrainer,
    AxolotlORPOTrainer,
@@ -48,9 +47,11 @@ from axolotl.core.trainers.base import (
    AxolotlTrainer,
    ReLoRATrainer,
 )
+from axolotl.core.trainers.dpo import DPOStrategy
+from axolotl.core.trainers.dpo.args import AxolotlDPOConfig
+from axolotl.core.trainers.grpo import GRPOStrategy
 from axolotl.core.training_args import (
    AxolotlCPOConfig,
-    AxolotlDPOConfig,
    AxolotlKTOConfig,
    AxolotlORPOConfig,
    AxolotlPRMConfig,
@@ -652,7 +653,7 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
        trainer_kwargs = {}

        if self.cfg.reward_model:
-            trainer_kwargs["max_length"] = self.cfg.sequence_len
+            training_arguments_kwargs["max_length"] = self.cfg.sequence_len

        # pylint: disable=duplicate-code
        if self.cfg.optimizer in [
@@ -965,10 +966,11 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
            # default to saving each epoch if not defined
            training_args_kwargs["save_strategy"] = "epoch"

-        training_args_kwargs["dataset_num_proc"] = self.cfg.dataset_processes
+        if self.cfg.dataset_processes:
+            training_args_kwargs["dataset_num_proc"] = self.cfg.dataset_processes

-        if self.cfg.rl_beta:
-            training_args_kwargs["beta"] = self.cfg.rl_beta
+        if (self.cfg.trl and self.cfg.trl.beta) or self.cfg.rl_beta:
+            training_args_kwargs["beta"] = self.cfg.trl.beta or self.cfg.rl_beta
        if self.cfg.orpo_alpha:
            # trl does some odd mapping of alpha to beta to reuse the beta parameter ???
            training_args_kwargs["beta"] = self.cfg.orpo_alpha
@@ -977,6 +979,7 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
            training_args_kwargs["rpo_alpha"] = self.cfg.rpo_alpha

        training_args_cls = None
+        blocklist_args_kwargs = []
        if self.cfg.rl == "simpo":
            training_args_cls = AxolotlCPOConfig
            training_args_kwargs["loss_type"] = "simpo"
@@ -1001,11 +1004,15 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
                self.cfg.kto_undesirable_weight or 1.0
            )

-            training_args_kwargs["dataset_num_proc"] = self.cfg.dataset_processes
            training_args_kwargs["max_length"] = self.cfg.sequence_len
            if self.cfg.max_prompt_len:
                training_args_kwargs["max_prompt_length"] = self.cfg.max_prompt_len

+        elif self.cfg.rl == "grpo":
+            training_args_cls = GRPOStrategy.get_training_args_class()
+            training_args_kwargs.update(GRPOStrategy.set_training_args_kwargs(self.cfg))
+            blocklist_args_kwargs = GRPOStrategy.get_blocklist_args_kwargs()
+
        else:
            training_args_cls = AxolotlDPOConfig
            if self.cfg.rl == "ipo":
@@ -1016,11 +1023,20 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
            training_args_kwargs["generate_during_eval"] = self.cfg.use_wandb
            if self.cfg.dpo_use_weighting is not None:
                training_args_kwargs["use_weighting"] = self.cfg.dpo_use_weighting
+            if self.cfg.dpo_use_logits_to_keep is not None:
+                training_args_kwargs[
+                    "use_logits_to_keep"
+                ] = self.cfg.dpo_use_logits_to_keep

+        for blocklist_key in blocklist_args_kwargs:
+            if blocklist_key in training_args_kwargs:
+                del training_args_kwargs[blocklist_key]
+
+        max_steps = self.cfg.max_steps or total_num_steps or -1
        training_args = training_args_cls(  # pylint: disable=unexpected-keyword-arg
-            output_dir=self.cfg.output_dir,
+            self.cfg.output_dir,
            per_device_train_batch_size=self.cfg.micro_batch_size,
-            max_steps=self.cfg.max_steps or total_num_steps,
+            max_steps=max_steps,
            gradient_accumulation_steps=self.cfg.gradient_accumulation_steps,
            learning_rate=self.cfg.learning_rate,
            warmup_steps=self.cfg.warmup_steps,
@@ -1047,8 +1063,12 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
            dpo_trainer_kwargs[
                "precompute_ref_log_probs"
            ] = self.cfg.precompute_ref_log_probs
-        if self.cfg.rl in ["dpo", "ipo"]:
-            trainer_cls = AxolotlDPOTrainer
+        if self.cfg.rl == "grpo":
+            trainer_cls = GRPOStrategy.get_trainer_class()
+            trainer_cls_args = [self.model]
+            dpo_trainer_kwargs.update(GRPOStrategy.set_trainer_kwargs(self.cfg))
+        elif self.cfg.rl in ["dpo", "ipo"]:
+            trainer_cls = DPOStrategy.get_trainer_class()
            trainer_cls_args = [self.model, self.model_ref]
        elif self.cfg.rl == "orpo":
            trainer_cls = AxolotlORPOTrainer
@@ -1068,7 +1088,9 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
        else:
            dpo_trainer_kwargs["tokenizer"] = self.tokenizer

-        if self.cfg.datasets is not None and (trainer_cls is AxolotlDPOTrainer):
+        if self.cfg.datasets is not None and (
+            trainer_cls is DPOStrategy.get_trainer_class()
+        ):
            dpo_trainer_kwargs["dataset_tags"] = [
                d["path"] for d in self.cfg.datasets if not Path(d["path"]).is_dir()
            ]
--- a/src/axolotl/core/trainers/base.py
+++ b/src/axolotl/core/trainers/base.py
@@ -5,30 +5,21 @@ module for customized trainers
 from __future__ import annotations

 # pylint: disable=too-many-lines
-import gc
 import logging
 import os
 from collections import defaultdict
 from functools import wraps
-from typing import Any, Dict, Literal, Optional, Union
+from typing import Dict, Literal, Optional

 import torch
 from datasets import Dataset
 from peft.optimizers import create_loraplus_optimizer
-from torch import nn
 from torch.optim.lr_scheduler import OneCycleLR
 from torch.utils.data import BatchSampler, DataLoader, RandomSampler, SequentialSampler
 from transformers import Trainer
 from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR, seed_worker
 from transformers.utils import is_sagemaker_mp_enabled
-from trl import (
-    CPOTrainer,
-    DPOTrainer,
-    KTOTrainer,
-    ORPOTrainer,
-    PRMTrainer,
-    RewardTrainer,
-)
+from trl import CPOTrainer, KTOTrainer, ORPOTrainer, PRMTrainer, RewardTrainer
 from trl.trainer.utils import pad_to_length

 from axolotl.monkeypatch.relora import ReLoRAScheduler
@@ -847,107 +838,6 @@ class ReLoRATrainer(AxolotlTrainer):
        return self.lr_scheduler


-class AxolotlDPOTrainer(SchedulerMixin, DPOTrainer):
-    """
-    Extend the base DPOTrainer for axolotl helpers
-    """
-
-    tag_names = ["axolotl", "dpo"]
-
-    def __init__(self, *args, dataset_tags=None, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.dataset_tags = dataset_tags
-        self.optimizer = None
-        self.model_accepts_loss_kwargs = False
-
-    def create_optimizer(self):
-        if self.args.loraplus_lr_ratio is None:
-            return super().create_optimizer()
-
-        opt_model = self.model_wrapped if is_sagemaker_mp_enabled() else self.model
-        if self.optimizer is None:  # pylint: disable=access-member-before-definition
-            optimizer_cls, optimizer_kwargs = Trainer.get_optimizer_cls_and_kwargs(
-                self.args,
-                opt_model,
-            )
-
-            loraplus_lr_ratio = getattr(self.args, "loraplus_lr_ratio", None)
-            if loraplus_lr_ratio:
-                print("Using lora+")
-            loraplus_lr_embedding = getattr(self.args, "loraplus_lr_embedding", None)
-            self.optimizer = create_loraplus_optimizer(  # pylint: disable=attribute-defined-outside-init
-                opt_model,
-                optimizer_cls,
-                loraplus_lr_ratio=loraplus_lr_ratio,
-                loraplus_lr_embedding=loraplus_lr_embedding,
-                **optimizer_kwargs,
-            )
-
-        if is_sagemaker_mp_enabled():
-            self.optimizer = smp.DistributedOptimizer(  # pylint: disable=attribute-defined-outside-init
-                self.optimizer
-            )
-
-        return self.optimizer
-
-    @wraps(DPOTrainer.push_to_hub)
-    def push_to_hub(self, *args, **kwargs) -> str:
-        """
-        Overwrite the `push_to_hub` method in order to force-add the tags when pushing the
-        model on the Hub. Please refer to `~transformers.Trainer.push_to_hub` for more details.
-        """
-        kwargs = _sanitize_kwargs_for_ds_tagging(
-            dataset_tags=self.dataset_tags, kwargs=kwargs
-        )
-        kwargs = _sanitize_kwargs_for_tagging(tag_names=self.tag_names, kwargs=kwargs)
-
-        return super().push_to_hub(*args, **kwargs)
-
-    @staticmethod
-    def tokenize_row(
-        features,
-        processing_class,
-        max_prompt_length,
-        max_completion_length,
-        add_special_tokens,
-    ) -> Dict:
-        res = DPOTrainer.tokenize_row(
-            features,
-            processing_class,
-            max_prompt_length,
-            max_completion_length,
-            add_special_tokens,
-        )
-        # fix when the tokenizer doesn't have a bos_token_id, e.g. Qwen
-        if processing_class.bos_token is None and res["prompt_input_ids"][0] is None:
-            for key in res.keys():
-                res[key] = res[key][1:]
-
-        if processing_class.bos_token and processing_class.bos_token_id is not None:
-            # dpo trainer may incorrectly prepend the bos_token_id to the dpo outputs
-            if res["chosen_input_ids"][0] == processing_class.bos_token_id:
-                res["chosen_input_ids"] = res["chosen_input_ids"][1:]
-                res["chosen_labels"] = res["chosen_labels"][1:]
-                res["chosen_attention_mask"] = res["chosen_attention_mask"][1:]
-            if res["rejected_input_ids"][0] == processing_class.bos_token_id:
-                res["rejected_input_ids"] = res["rejected_input_ids"][1:]
-                res["rejected_labels"] = res["rejected_labels"][1:]
-                res["rejected_attention_mask"] = res["rejected_attention_mask"][1:]
-
-        return res
-
-    def training_step(
-        self,
-        model: nn.Module,
-        inputs: Dict[str, Union[torch.Tensor, Any]],
-        num_items_in_batch=None,
-    ) -> torch.Tensor:
-        loss: torch.Tensor = super().training_step(model, inputs, num_items_in_batch)
-        gc.collect()
-        torch.cuda.empty_cache()
-        return loss
-
-
 class AxolotlORPOTrainer(SchedulerMixin, ORPOTrainer):
    """
    Extend the base ORPOTrainer for axolotl helpers
--- a/src/axolotl/core/trainers/dpo/init.py
+++ b/src/axolotl/core/trainers/dpo/init.py
@@ -0,0 +1,33 @@
+"""
+DPO Specific Strategy for training
+"""
+from axolotl.core.trainers.dpo.trainer import AxolotlDPOTrainer
+
+
+class DPOStrategy:
+    """
+    Strategy for DPO training
+    """
+
+    @classmethod
+    def get_trainer_class(cls):
+        return AxolotlDPOTrainer
+
+    @classmethod
+    def get_training_args_class(cls):
+        from axolotl.core.trainers.dpo.args import AxolotlDPOConfig
+
+        return AxolotlDPOConfig
+
+    @classmethod
+    def set_training_args_kwargs(cls, cfg):
+        training_args_kwargs = {}
+        if cfg.rl == "ipo":
+            training_args_kwargs["loss_type"] = "ipo"
+        training_args_kwargs["max_length"] = cfg.sequence_len
+        training_args_kwargs["max_completion_length"] = None
+        training_args_kwargs["max_prompt_length"] = cfg.sequence_len
+        training_args_kwargs["generate_during_eval"] = cfg.use_wandb
+        if cfg.dpo_use_weighting is not None:
+            training_args_kwargs["use_weighting"] = cfg.dpo_use_weighting
+        return training_args_kwargs
--- a/src/axolotl/core/trainers/dpo/args.py
+++ b/src/axolotl/core/trainers/dpo/args.py
@@ -0,0 +1,15 @@
+"""
+Axolotl specific DPO args
+"""
+from dataclasses import dataclass
+
+from trl import DPOConfig
+
+from axolotl.core.training_args import AxolotlTrainingMixins
+
+
+@dataclass
+class AxolotlDPOConfig(AxolotlTrainingMixins, DPOConfig):
+    """
+    DPO config for DPO training
+    """
--- a/src/axolotl/core/trainers/dpo/trainer.py
+++ b/src/axolotl/core/trainers/dpo/trainer.py
@@ -0,0 +1,125 @@
+"""
+DPO trainer for axolotl
+"""
+import gc
+from functools import wraps
+from typing import Any, Dict, Union
+
+import torch
+from peft.optimizers import create_loraplus_optimizer
+from torch import nn
+from transformers import Trainer
+from transformers.utils import is_sagemaker_mp_enabled
+from trl import DPOTrainer
+
+from axolotl.core.trainers.base import (
+    SchedulerMixin,
+    _sanitize_kwargs_for_ds_tagging,
+    _sanitize_kwargs_for_tagging,
+)
+
+if is_sagemaker_mp_enabled():
+    import smdistributed.modelparallel.torch as smp
+
+
+class AxolotlDPOTrainer(SchedulerMixin, DPOTrainer):
+    """
+    Extend the base DPOTrainer for axolotl helpers
+    """
+
+    tag_names = ["axolotl", "dpo"]
+
+    def __init__(self, *args, dataset_tags=None, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.dataset_tags = dataset_tags
+        self.optimizer = None
+        self.model_accepts_loss_kwargs = False
+
+    def create_optimizer(self):
+        # pylint: disable=duplicate-code
+        if self.args.loraplus_lr_ratio is None:
+            return super().create_optimizer()
+
+        opt_model = self.model_wrapped if is_sagemaker_mp_enabled() else self.model
+        if self.optimizer is None:  # pylint: disable=access-member-before-definition
+            optimizer_cls, optimizer_kwargs = Trainer.get_optimizer_cls_and_kwargs(
+                self.args,
+                opt_model,
+            )
+
+            loraplus_lr_ratio = getattr(self.args, "loraplus_lr_ratio", None)
+            if loraplus_lr_ratio:
+                print("Using lora+")
+            loraplus_lr_embedding = getattr(self.args, "loraplus_lr_embedding", None)
+            # pylint: disable=duplicate-code
+            self.optimizer = create_loraplus_optimizer(  # pylint: disable=attribute-defined-outside-init
+                opt_model,
+                optimizer_cls,
+                loraplus_lr_ratio=loraplus_lr_ratio,
+                loraplus_lr_embedding=loraplus_lr_embedding,
+                **optimizer_kwargs,
+            )
+
+        if is_sagemaker_mp_enabled():
+            self.optimizer = smp.DistributedOptimizer(  # pylint: disable=attribute-defined-outside-init
+                self.optimizer
+            )
+
+        return self.optimizer
+
+    @wraps(DPOTrainer.push_to_hub)
+    def push_to_hub(self, *args, **kwargs) -> str:
+        """
+        Overwrite the `push_to_hub` method in order to force-add the tags when pushing the
+        model on the Hub. Please refer to `~transformers.Trainer.push_to_hub` for more details.
+        """
+        kwargs = _sanitize_kwargs_for_ds_tagging(
+            dataset_tags=self.dataset_tags, kwargs=kwargs
+        )
+        kwargs = _sanitize_kwargs_for_tagging(tag_names=self.tag_names, kwargs=kwargs)
+
+        return super().push_to_hub(*args, **kwargs)
+
+    @staticmethod
+    def tokenize_row(
+        features,
+        processing_class,
+        max_prompt_length,
+        max_completion_length,
+        add_special_tokens,
+    ) -> Dict:
+        res = DPOTrainer.tokenize_row(
+            features,
+            processing_class,
+            max_prompt_length,
+            max_completion_length,
+            add_special_tokens,
+        )
+        # fix when the tokenizer doesn't have a bos_token_id, e.g. Qwen
+        if processing_class.bos_token is None and res["prompt_input_ids"][0] is None:
+            for key in res.keys():
+                res[key] = res[key][1:]
+
+        if processing_class.bos_token and processing_class.bos_token_id is not None:
+            # dpo trainer may incorrectly prepend the bos_token_id to the dpo outputs
+            if res["chosen_input_ids"][0] == processing_class.bos_token_id:
+                res["chosen_input_ids"] = res["chosen_input_ids"][1:]
+                res["chosen_labels"] = res["chosen_labels"][1:]
+                res["chosen_attention_mask"] = res["chosen_attention_mask"][1:]
+            if res["rejected_input_ids"][0] == processing_class.bos_token_id:
+                res["rejected_input_ids"] = res["rejected_input_ids"][1:]
+                res["rejected_labels"] = res["rejected_labels"][1:]
+                res["rejected_attention_mask"] = res["rejected_attention_mask"][1:]
+
+        return res
+
+    def training_step(
+        self,
+        model: nn.Module,
+        inputs: Dict[str, Union[torch.Tensor, Any]],
+        num_items_in_batch=None,
+    ) -> torch.Tensor:
+        loss: torch.Tensor = super().training_step(model, inputs, num_items_in_batch)
+        gc.collect()
+        torch.cuda.empty_cache()
+        return loss
--- a/src/axolotl/core/trainers/grpo/init.py
+++ b/src/axolotl/core/trainers/grpo/init.py
@@ -0,0 +1,113 @@
+"""
+GRPO Specific Strategy for training
+"""
+
+import importlib
+import inspect
+import logging
+
+from trl.trainer.grpo_trainer import RewardFunc
+
+from axolotl.core.trainers.grpo.trainer import AxolotlGRPOTrainer
+
+LOG = logging.getLogger("axolotl")
+
+
+class GRPOStrategy:
+    """
+    Strategy for GRPO training
+    """
+
+    @classmethod
+    def get_trainer_class(cls):
+        return AxolotlGRPOTrainer
+
+    @classmethod
+    def get_training_args_class(cls):
+        from axolotl.core.trainers.grpo.args import AxolotlGRPOConfig
+
+        return AxolotlGRPOConfig
+
+    @classmethod
+    def set_training_args_kwargs(cls, cfg):
+        grpo_args_kwargs = {}
+        if cfg.trl and cfg.trl.use_vllm:
+            grpo_args_kwargs["use_vllm"] = cfg.trl.use_vllm
+            if cfg.trl and cfg.trl.vllm_device:
+                grpo_args_kwargs["vllm_device"] = cfg.trl.vllm_device
+            else:
+                grpo_args_kwargs["vllm_device"] = "auto"
+            if cfg.trl and cfg.trl.vllm_gpu_memory_utilization:
+                grpo_args_kwargs[
+                    "vllm_gpu_memory_utilization"
+                ] = cfg.trl.vllm_gpu_memory_utilization
+            if cfg.trl and cfg.trl.vllm_max_model_len:
+                grpo_args_kwargs["vllm_max_model_len"] = cfg.trl.vllm_max_model_len
+        if cfg.trl and cfg.trl.num_generations:
+            grpo_args_kwargs["num_generations"] = cfg.trl.num_generations
+        if cfg.trl and cfg.trl.sync_ref_model:
+            grpo_args_kwargs["sync_ref_model"] = cfg.trl.sync_ref_model
+            if cfg.trl and cfg.trl.ref_model_mixup_alpha:
+                grpo_args_kwargs[
+                    "ref_model_mixup_alpha"
+                ] = cfg.trl.ref_model_mixup_alpha
+            if cfg.trl and cfg.trl.ref_model_sync_steps:
+                grpo_args_kwargs["ref_model_sync_steps"] = cfg.trl.ref_model_sync_steps
+        grpo_args_kwargs["max_completion_length"] = cfg.trl.max_completion_length
+        return grpo_args_kwargs
+
+    @classmethod
+    def set_trainer_kwargs(cls, cfg):
+        trainer_kwargs = {}
+        if cfg.trl and cfg.trl.reward_funcs:
+            reward_funcs = []
+            for reward_func_fqn in cfg.trl.reward_funcs:
+                reward_funcs.append(cls.get_reward_func(reward_func_fqn))
+            trainer_kwargs["reward_funcs"] = reward_funcs
+        if cfg.trl and cfg.trl.reward_processing_classes:
+            trainer_kwargs[
+                "reward_processing_classes"
+            ] = cfg.trl.reward_processing_classes
+        return trainer_kwargs
+
+    @classmethod
+    def get_collator(cls, *args, **kwargs):  # pylint: disable=unused-argument
+        # No data collation is needed in GRPO, handled by trl's trainer __init__
+        return None
+
+    @classmethod
+    def get_blocklist_args_kwargs(cls):
+        return ["dataset_num_proc"]
+
+    @classmethod
+    def get_reward_func(cls, reward_func_fqn: str) -> RewardFunc:
+        """
+        Returns the reward function from the given fully qualified name, or the path to the reward function model.
+
+        Args:
+            reward_func_fqn (str): Fully qualified name of the reward function (e.g. r1_grpo.gsm8k_transform),
+                or a HF hub path to the reward model.
+        Raises:
+            ValueError: If the reward function does not accept at least two arguments.
+
+        Returns:
+            RewardFunc: A callable that accepts prompts and completions and returns rewards,
+                or a path to a reward model.
+
+        """
+        try:
+            # use importlib to dynamically load the reward function from the module
+            reward_func_module_name = reward_func_fqn.split(".")[-1]
+            reward_func_module = importlib.import_module(reward_func_fqn.split(".")[-2])
+            reward_func = getattr(reward_func_module, reward_func_module_name)
+            if not len(inspect.signature(reward_func).parameters) >= 2:
+                raise ValueError(
+                    "Reward function must accept at least two arguments: prompts: list and completions: list"
+                )
+            return reward_func
+        except ModuleNotFoundError:
+            # the user has passed a string (ideally indicating the path of a reward model)
+            LOG.info(
+                f"Reward function {reward_func} is a pre-trained model path - if this is unexpected, please check the reward function path."
+            )
+            return reward_func
--- a/src/axolotl/core/trainers/grpo/args.py
+++ b/src/axolotl/core/trainers/grpo/args.py
@@ -0,0 +1,15 @@
+"""
+Axolotl Specific Training Args
+"""
+from dataclasses import dataclass
+
+from trl import GRPOConfig
+
+from axolotl.core.training_args import AxolotlTrainingMixins
+
+
+@dataclass
+class AxolotlGRPOConfig(AxolotlTrainingMixins, GRPOConfig):
+    """
+    Axolotl GRPO Config for GRPO training
+    """
--- a/src/axolotl/core/trainers/grpo/trainer.py
+++ b/src/axolotl/core/trainers/grpo/trainer.py
@@ -0,0 +1,14 @@
+"""
+Axolotl GRPO trainer
+"""
+from trl import GRPOTrainer
+
+from axolotl.core.trainers.base import SchedulerMixin
+
+
+class AxolotlGRPOTrainer(SchedulerMixin, GRPOTrainer):
+    """
+    Extend the base GRPOTrainer for axolotl helpers
+    """
+
+    _tag_names = ["trl", "grpo", "axolotl"]
--- a/src/axolotl/core/training_args.py
+++ b/src/axolotl/core/training_args.py
@@ -5,7 +5,7 @@ from dataclasses import dataclass, field
 from typing import Optional

 from transformers import TrainingArguments
-from trl import CPOConfig, DPOConfig, KTOConfig, ORPOConfig, PRMConfig, RewardConfig
+from trl import CPOConfig, KTOConfig, ORPOConfig, PRMConfig, RewardConfig


@dataclass
@@ -217,13 +217,6 @@ class AxolotlTrainingArguments(AxolotlTrainingMixins, TrainingArguments):
    """


-@dataclass
-class AxolotlDPOConfig(AxolotlTrainingMixins, DPOConfig):
-    """
-    DPO config for DPO training
-    """
-
-
@dataclass
 class AxolotlORPOConfig(AxolotlTrainingMixins, ORPOConfig):
    """
--- a/src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-1.5B-Instruct.json
+++ b/src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-1.5B-Instruct.json
--- a/src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-1.5B.json
+++ b/src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-1.5B.json
--- a/src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-3B-Instruct.json
+++ b/src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-3B-Instruct.json
--- a/src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-3B.json
+++ b/src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-3B.json
--- a/src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-7B-Instruct.json
+++ b/src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-7B-Instruct.json
--- a/src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-7B.json
+++ b/src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-7B.json
--- a/src/axolotl/integrations/spectrum/model_snr_results/snr_results_google-gemma-2-2b.json
+++ b/src/axolotl/integrations/spectrum/model_snr_results/snr_results_google-gemma-2-2b.json
--- a/src/axolotl/integrations/spectrum/model_snr_results/snr_results_meta-llama-Llama-3.2-1B-Instruct.json
+++ b/src/axolotl/integrations/spectrum/model_snr_results/snr_results_meta-llama-Llama-3.2-1B-Instruct.json
@@ -1,590 +0,0 @@
-{
-    "model.layers.0.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.1.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.2.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.3.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.4.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.5.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.6.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.7.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.8.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.9.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.10.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.11.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.12.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.13.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.14.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.15.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "lm_head": {
-        "snr": Infinity,
-        "type": "lm_head"
-    },
-    "model.layers.0.mlp.down_proj": {
-        "snr": 70.0594253540039,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.1.mlp.down_proj": {
-        "snr": 11.135851860046387,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.2.mlp.down_proj": {
-        "snr": 7.035482883453369,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.3.mlp.down_proj": {
-        "snr": 6.422532081604004,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.4.mlp.down_proj": {
-        "snr": 5.748020172119141,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.5.mlp.down_proj": {
-        "snr": 3.885556697845459,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.6.mlp.down_proj": {
-        "snr": 3.4336745738983154,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.7.mlp.down_proj": {
-        "snr": 2.791595935821533,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.8.mlp.down_proj": {
-        "snr": 5.36277961730957,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.9.mlp.down_proj": {
-        "snr": 4.459208011627197,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.10.mlp.down_proj": {
-        "snr": 6.272170066833496,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.11.mlp.down_proj": {
-        "snr": 5.264761447906494,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.12.mlp.down_proj": {
-        "snr": 4.324735641479492,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.13.mlp.down_proj": {
-        "snr": 3.878648042678833,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.14.mlp.down_proj": {
-        "snr": 2.9773054122924805,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.15.mlp.down_proj": {
-        "snr": 4.471445560455322,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.0.mlp.gate_proj": {
-        "snr": 25.227100372314453,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.1.mlp.gate_proj": {
-        "snr": 6.58299446105957,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.2.mlp.gate_proj": {
-        "snr": 3.4688243865966797,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.3.mlp.gate_proj": {
-        "snr": 1.555246114730835,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.4.mlp.gate_proj": {
-        "snr": 0.7770601511001587,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.5.mlp.gate_proj": {
-        "snr": 0.6239906549453735,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.6.mlp.gate_proj": {
-        "snr": 0.6440379023551941,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.7.mlp.gate_proj": {
-        "snr": 0.5120116472244263,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.8.mlp.gate_proj": {
-        "snr": 0.6544050574302673,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.9.mlp.gate_proj": {
-        "snr": 0.5381016731262207,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.10.mlp.gate_proj": {
-        "snr": 0.622873842716217,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.11.mlp.gate_proj": {
-        "snr": 0.9361700415611267,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.12.mlp.gate_proj": {
-        "snr": 1.475605845451355,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.13.mlp.gate_proj": {
-        "snr": 1.608325719833374,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.14.mlp.gate_proj": {
-        "snr": 1.0720024108886719,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.15.mlp.gate_proj": {
-        "snr": 0.7111338973045349,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.0.mlp.up_proj": {
-        "snr": 28.431896209716797,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.1.mlp.up_proj": {
-        "snr": 15.546019554138184,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.2.mlp.up_proj": {
-        "snr": 23.048023223876953,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.3.mlp.up_proj": {
-        "snr": 25.790977478027344,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.4.mlp.up_proj": {
-        "snr": 18.552549362182617,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.5.mlp.up_proj": {
-        "snr": 8.85106372833252,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.6.mlp.up_proj": {
-        "snr": 10.653799057006836,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.7.mlp.up_proj": {
-        "snr": 7.365357875823975,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.8.mlp.up_proj": {
-        "snr": 11.98373794555664,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.9.mlp.up_proj": {
-        "snr": 8.04493236541748,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.10.mlp.up_proj": {
-        "snr": 8.523039817810059,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.11.mlp.up_proj": {
-        "snr": 5.381742477416992,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.12.mlp.up_proj": {
-        "snr": 3.9845118522644043,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.13.mlp.up_proj": {
-        "snr": 3.4893221855163574,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.14.mlp.up_proj": {
-        "snr": 1.764201045036316,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.15.mlp.up_proj": {
-        "snr": 0.9730708599090576,
-        "type": "mlp.up_proj"
-    },
-    "model.embed_tokens": {
-        "snr": Infinity,
-        "type": "model.embed_tokens"
-    },
-    "model.norm": {
-        "snr": Infinity,
-        "type": "model.norm"
-    },
-    "model.layers.0.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.1.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.2.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.3.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.4.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.5.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.6.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.7.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.8.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.9.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.10.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.11.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.12.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.13.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.14.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.15.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.0.self_attn.k_proj": {
-        "snr": 0.11727584153413773,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.1.self_attn.k_proj": {
-        "snr": 0.24786807596683502,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.2.self_attn.k_proj": {
-        "snr": 0.36378130316734314,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.3.self_attn.k_proj": {
-        "snr": 0.2983120381832123,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.4.self_attn.k_proj": {
-        "snr": 0.33789733052253723,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.5.self_attn.k_proj": {
-        "snr": 0.29155924916267395,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.6.self_attn.k_proj": {
-        "snr": 0.2537297010421753,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.7.self_attn.k_proj": {
-        "snr": 0.28204113245010376,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.8.self_attn.k_proj": {
-        "snr": 0.2776711583137512,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.9.self_attn.k_proj": {
-        "snr": 0.2927376627922058,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.10.self_attn.k_proj": {
-        "snr": 0.31486213207244873,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.11.self_attn.k_proj": {
-        "snr": 0.32363659143447876,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.12.self_attn.k_proj": {
-        "snr": 0.31382912397384644,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.13.self_attn.k_proj": {
-        "snr": 0.4635234773159027,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.14.self_attn.k_proj": {
-        "snr": 0.25379249453544617,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.15.self_attn.k_proj": {
-        "snr": 0.2628238797187805,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.0.self_attn.o_proj": {
-        "snr": 0.27602291107177734,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.1.self_attn.o_proj": {
-        "snr": 0.2149604707956314,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.2.self_attn.o_proj": {
-        "snr": 0.2540294826030731,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.3.self_attn.o_proj": {
-        "snr": 0.27978822588920593,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.4.self_attn.o_proj": {
-        "snr": 0.3121289908885956,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.5.self_attn.o_proj": {
-        "snr": 0.35037684440612793,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.6.self_attn.o_proj": {
-        "snr": 0.366205096244812,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.7.self_attn.o_proj": {
-        "snr": 0.3692712187767029,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.8.self_attn.o_proj": {
-        "snr": 0.3301038146018982,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.9.self_attn.o_proj": {
-        "snr": 0.3003396987915039,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.10.self_attn.o_proj": {
-        "snr": 0.30804169178009033,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.11.self_attn.o_proj": {
-        "snr": 0.28501132130622864,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.12.self_attn.o_proj": {
-        "snr": 0.2171541005373001,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.13.self_attn.o_proj": {
-        "snr": 0.19183959066867828,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.14.self_attn.o_proj": {
-        "snr": 0.19215913116931915,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.15.self_attn.o_proj": {
-        "snr": 0.25486502051353455,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.0.self_attn.q_proj": {
-        "snr": 0.03850084915757179,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.1.self_attn.q_proj": {
-        "snr": 0.0713055431842804,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.2.self_attn.q_proj": {
-        "snr": 0.07948919385671616,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.3.self_attn.q_proj": {
-        "snr": 0.08047746121883392,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.4.self_attn.q_proj": {
-        "snr": 0.0852593332529068,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.5.self_attn.q_proj": {
-        "snr": 0.09794823825359344,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.6.self_attn.q_proj": {
-        "snr": 0.09627152234315872,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.7.self_attn.q_proj": {
-        "snr": 0.11065381020307541,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.8.self_attn.q_proj": {
-        "snr": 0.12031875550746918,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.9.self_attn.q_proj": {
-        "snr": 0.09804573655128479,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.10.self_attn.q_proj": {
-        "snr": 0.10897502303123474,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.11.self_attn.q_proj": {
-        "snr": 0.09267337620258331,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.12.self_attn.q_proj": {
-        "snr": 0.08803492039442062,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.13.self_attn.q_proj": {
-        "snr": 0.0902542844414711,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.14.self_attn.q_proj": {
-        "snr": 0.10154066979885101,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.15.self_attn.q_proj": {
-        "snr": 0.09083802253007889,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.0.self_attn.v_proj": {
-        "snr": 2.842210054397583,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.1.self_attn.v_proj": {
-        "snr": 10.59461498260498,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.2.self_attn.v_proj": {
-        "snr": 8.993025779724121,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.3.self_attn.v_proj": {
-        "snr": 62.567787170410156,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.4.self_attn.v_proj": {
-        "snr": 23.80082893371582,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.5.self_attn.v_proj": {
-        "snr": 7.957369804382324,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.6.self_attn.v_proj": {
-        "snr": 12.01815414428711,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.7.self_attn.v_proj": {
-        "snr": 5.095500469207764,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.8.self_attn.v_proj": {
-        "snr": 11.719332695007324,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.9.self_attn.v_proj": {
-        "snr": 555.0869750976562,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.10.self_attn.v_proj": {
-        "snr": 22.95538330078125,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.11.self_attn.v_proj": {
-        "snr": 30.042158126831055,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.12.self_attn.v_proj": {
-        "snr": 9.577271461486816,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.13.self_attn.v_proj": {
-        "snr": 18.176361083984375,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.14.self_attn.v_proj": {
-        "snr": 1.5695856809616089,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.15.self_attn.v_proj": {
-        "snr": 2.7235565185546875,
-        "type": "self_attn.v_proj"
-    }
-}
--- a/src/axolotl/integrations/spectrum/model_snr_results/snr_results_meta-llama-Llama-3.2-1B.json
+++ b/src/axolotl/integrations/spectrum/model_snr_results/snr_results_meta-llama-Llama-3.2-1B.json
@@ -1,590 +0,0 @@
-{
-    "model.layers.0.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.1.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.2.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.3.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.4.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.5.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.6.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.7.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.8.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.9.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.10.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.11.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.12.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.13.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.14.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "model.layers.15.input_layernorm": {
-        "snr": Infinity,
-        "type": "input_layernorm"
-    },
-    "lm_head": {
-        "snr": Infinity,
-        "type": "lm_head"
-    },
-    "model.layers.0.mlp.down_proj": {
-        "snr": 57.09797286987305,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.1.mlp.down_proj": {
-        "snr": 9.538983345031738,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.2.mlp.down_proj": {
-        "snr": 6.227016925811768,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.3.mlp.down_proj": {
-        "snr": 5.660686492919922,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.4.mlp.down_proj": {
-        "snr": 5.178432464599609,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.5.mlp.down_proj": {
-        "snr": 3.5638349056243896,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.6.mlp.down_proj": {
-        "snr": 3.0918056964874268,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.7.mlp.down_proj": {
-        "snr": 2.456392288208008,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.8.mlp.down_proj": {
-        "snr": 4.525328636169434,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.9.mlp.down_proj": {
-        "snr": 3.9409055709838867,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.10.mlp.down_proj": {
-        "snr": 5.447249412536621,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.11.mlp.down_proj": {
-        "snr": 4.807600975036621,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.12.mlp.down_proj": {
-        "snr": 3.915374517440796,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.13.mlp.down_proj": {
-        "snr": 3.4820363521575928,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.14.mlp.down_proj": {
-        "snr": 2.6045074462890625,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.15.mlp.down_proj": {
-        "snr": 3.7237701416015625,
-        "type": "mlp.down_proj"
-    },
-    "model.layers.0.mlp.gate_proj": {
-        "snr": 22.160131454467773,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.1.mlp.gate_proj": {
-        "snr": 6.072206020355225,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.2.mlp.gate_proj": {
-        "snr": 3.2467362880706787,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.3.mlp.gate_proj": {
-        "snr": 1.4111896753311157,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.4.mlp.gate_proj": {
-        "snr": 0.7405938506126404,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.5.mlp.gate_proj": {
-        "snr": 0.5916463136672974,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.6.mlp.gate_proj": {
-        "snr": 0.6149423718452454,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.7.mlp.gate_proj": {
-        "snr": 0.48369669914245605,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.8.mlp.gate_proj": {
-        "snr": 0.6047574877738953,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.9.mlp.gate_proj": {
-        "snr": 0.5092479586601257,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.10.mlp.gate_proj": {
-        "snr": 0.5999670624732971,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.11.mlp.gate_proj": {
-        "snr": 0.8980127573013306,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.12.mlp.gate_proj": {
-        "snr": 1.4252448081970215,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.13.mlp.gate_proj": {
-        "snr": 1.509937047958374,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.14.mlp.gate_proj": {
-        "snr": 1.0066585540771484,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.15.mlp.gate_proj": {
-        "snr": 0.6413647532463074,
-        "type": "mlp.gate_proj"
-    },
-    "model.layers.0.mlp.up_proj": {
-        "snr": 26.08852195739746,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.1.mlp.up_proj": {
-        "snr": 13.382951736450195,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.2.mlp.up_proj": {
-        "snr": 20.088768005371094,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.3.mlp.up_proj": {
-        "snr": 23.0632381439209,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.4.mlp.up_proj": {
-        "snr": 16.07433319091797,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.5.mlp.up_proj": {
-        "snr": 8.00507640838623,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.6.mlp.up_proj": {
-        "snr": 9.538354873657227,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.7.mlp.up_proj": {
-        "snr": 6.286602973937988,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.8.mlp.up_proj": {
-        "snr": 10.092820167541504,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.9.mlp.up_proj": {
-        "snr": 7.193963527679443,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.10.mlp.up_proj": {
-        "snr": 7.320116996765137,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.11.mlp.up_proj": {
-        "snr": 4.8728532791137695,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.12.mlp.up_proj": {
-        "snr": 3.596583366394043,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.13.mlp.up_proj": {
-        "snr": 3.166161298751831,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.14.mlp.up_proj": {
-        "snr": 1.5600818395614624,
-        "type": "mlp.up_proj"
-    },
-    "model.layers.15.mlp.up_proj": {
-        "snr": 0.8726214170455933,
-        "type": "mlp.up_proj"
-    },
-    "model.embed_tokens": {
-        "snr": Infinity,
-        "type": "model.embed_tokens"
-    },
-    "model.norm": {
-        "snr": Infinity,
-        "type": "model.norm"
-    },
-    "model.layers.0.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.1.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.2.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.3.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.4.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.5.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.6.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.7.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.8.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.9.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.10.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.11.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.12.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.13.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.14.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.15.post_attention_layernorm": {
-        "snr": Infinity,
-        "type": "post_attention_layernorm"
-    },
-    "model.layers.0.self_attn.k_proj": {
-        "snr": 0.1154392883181572,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.1.self_attn.k_proj": {
-        "snr": 0.24299409985542297,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.2.self_attn.k_proj": {
-        "snr": 0.3624322712421417,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.3.self_attn.k_proj": {
-        "snr": 0.29509487748146057,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.4.self_attn.k_proj": {
-        "snr": 0.32953736186027527,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.5.self_attn.k_proj": {
-        "snr": 0.2908833622932434,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.6.self_attn.k_proj": {
-        "snr": 0.2488437294960022,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.7.self_attn.k_proj": {
-        "snr": 0.27847856283187866,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.8.self_attn.k_proj": {
-        "snr": 0.27143892645835876,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.9.self_attn.k_proj": {
-        "snr": 0.28804272413253784,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.10.self_attn.k_proj": {
-        "snr": 0.31197959184646606,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.11.self_attn.k_proj": {
-        "snr": 0.3203586935997009,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.12.self_attn.k_proj": {
-        "snr": 0.30905747413635254,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.13.self_attn.k_proj": {
-        "snr": 0.46828722953796387,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.14.self_attn.k_proj": {
-        "snr": 0.24205778539180756,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.15.self_attn.k_proj": {
-        "snr": 0.2559327781200409,
-        "type": "self_attn.k_proj"
-    },
-    "model.layers.0.self_attn.o_proj": {
-        "snr": 0.2638678550720215,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.1.self_attn.o_proj": {
-        "snr": 0.21109595894813538,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.2.self_attn.o_proj": {
-        "snr": 0.24751724302768707,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.3.self_attn.o_proj": {
-        "snr": 0.2728094160556793,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.4.self_attn.o_proj": {
-        "snr": 0.3001374304294586,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.5.self_attn.o_proj": {
-        "snr": 0.33903488516807556,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.6.self_attn.o_proj": {
-        "snr": 0.3530929982662201,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.7.self_attn.o_proj": {
-        "snr": 0.36753255128860474,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.8.self_attn.o_proj": {
-        "snr": 0.3373180329799652,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.9.self_attn.o_proj": {
-        "snr": 0.2970578670501709,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.10.self_attn.o_proj": {
-        "snr": 0.3076324760913849,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.11.self_attn.o_proj": {
-        "snr": 0.2766900658607483,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.12.self_attn.o_proj": {
-        "snr": 0.20973259210586548,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.13.self_attn.o_proj": {
-        "snr": 0.18185566365718842,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.14.self_attn.o_proj": {
-        "snr": 0.18329747021198273,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.15.self_attn.o_proj": {
-        "snr": 0.2437991499900818,
-        "type": "self_attn.o_proj"
-    },
-    "model.layers.0.self_attn.q_proj": {
-        "snr": 0.038040731102228165,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.1.self_attn.q_proj": {
-        "snr": 0.0707998052239418,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.2.self_attn.q_proj": {
-        "snr": 0.0787411704659462,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.3.self_attn.q_proj": {
-        "snr": 0.08089710026979446,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.4.self_attn.q_proj": {
-        "snr": 0.08591937273740768,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.5.self_attn.q_proj": {
-        "snr": 0.09852176159620285,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.6.self_attn.q_proj": {
-        "snr": 0.09690654277801514,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.7.self_attn.q_proj": {
-        "snr": 0.11181341856718063,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.8.self_attn.q_proj": {
-        "snr": 0.12042108923196793,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.9.self_attn.q_proj": {
-        "snr": 0.09799323976039886,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.10.self_attn.q_proj": {
-        "snr": 0.10901063680648804,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.11.self_attn.q_proj": {
-        "snr": 0.09307146072387695,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.12.self_attn.q_proj": {
-        "snr": 0.0880950540304184,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.13.self_attn.q_proj": {
-        "snr": 0.08886399120092392,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.14.self_attn.q_proj": {
-        "snr": 0.09955056011676788,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.15.self_attn.q_proj": {
-        "snr": 0.08929339051246643,
-        "type": "self_attn.q_proj"
-    },
-    "model.layers.0.self_attn.v_proj": {
-        "snr": 2.5501928329467773,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.1.self_attn.v_proj": {
-        "snr": 9.449499130249023,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.2.self_attn.v_proj": {
-        "snr": 7.9920830726623535,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.3.self_attn.v_proj": {
-        "snr": 50.69462585449219,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.4.self_attn.v_proj": {
-        "snr": 19.083511352539062,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.5.self_attn.v_proj": {
-        "snr": 7.21597146987915,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.6.self_attn.v_proj": {
-        "snr": 11.27744197845459,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.7.self_attn.v_proj": {
-        "snr": 4.579711437225342,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.8.self_attn.v_proj": {
-        "snr": 10.940719604492188,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.9.self_attn.v_proj": {
-        "snr": 553.4417724609375,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.10.self_attn.v_proj": {
-        "snr": 20.59434700012207,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.11.self_attn.v_proj": {
-        "snr": 26.636865615844727,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.12.self_attn.v_proj": {
-        "snr": 8.614749908447266,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.13.self_attn.v_proj": {
-        "snr": 17.722007751464844,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.14.self_attn.v_proj": {
-        "snr": 1.48500657081604,
-        "type": "self_attn.v_proj"
-    },
-    "model.layers.15.self_attn.v_proj": {
-        "snr": 2.5776851177215576,
-        "type": "self_attn.v_proj"
-    }
-}
--- a/src/axolotl/integrations/spectrum/model_snr_results/snr_results_meta-llama-Llama-3.2-3B-Instruct.json
+++ b/src/axolotl/integrations/spectrum/model_snr_results/snr_results_meta-llama-Llama-3.2-3B-Instruct.json
--- a/src/axolotl/integrations/spectrum/model_snr_results/snr_results_meta-llama-Llama-3.2-3B.json
+++ b/src/axolotl/integrations/spectrum/model_snr_results/snr_results_meta-llama-Llama-3.2-3B.json
--- a/src/axolotl/prompt_strategies/base.py
+++ b/src/axolotl/prompt_strategies/base.py
@@ -13,8 +13,19 @@ def load(strategy, cfg, module_base=None, **kwargs):
        if len(strategy.split(".")) == 1:
            strategy = strategy + ".default"
        load_fn = strategy.split(".")[-1]
-        strategy = ".".join(strategy.split(".")[:-1])
-        mod = importlib.import_module(f".{strategy}", module_base)
+        if len(strategy.split(".")) > 1:
+            try:
+                importlib.import_module(
+                    strategy.split(".")[-2],
+                    ".".join(strategy.split(".")[:-2]),
+                )
+                module_base = ".".join(strategy.split(".")[:-2])
+                strategy = strategy.split(".")[-2]
+            except ModuleNotFoundError:
+                strategy = "." + ".".join(strategy.split(".")[:-1])
+        else:
+            strategy = "." + ".".join(strategy.split(".")[:-1])
+        mod = importlib.import_module(strategy, module_base)
        func = getattr(mod, load_fn)
        return func(cfg, **kwargs)
    except Exception:  # pylint: disable=broad-exception-caught
--- a/src/axolotl/prompt_strategies/dpo/passthrough.py
+++ b/src/axolotl/prompt_strategies/dpo/passthrough.py
@@ -0,0 +1,14 @@
+"""
+DPO prompt strategies passthrough/zero-processing strategy
+"""
+
+
+def default(
+    cfg, dataset_idx=0, **kwargs
+):  # pylint: disable=possibly-unused-variable,unused-argument
+    def transform_fn(
+        sample, tokenizer=None
+    ):  # pylint: disable=possibly-unused-variable,unused-argument
+        return sample
+
+    return transform_fn
--- a/src/axolotl/utils/config/models/input/v0_4_1/init.py
+++ b/src/axolotl/utils/config/models/input/v0_4_1/init.py
@@ -24,6 +24,8 @@ from transformers.utils.import_utils import is_torch_npu_available

 from axolotl.utils.config.models.internals import EnvCapabilities, GPUCapabilities

+from .trl import TrlConfig
+
 LOG = logging.getLogger("axolotl.utils.config.models.input")

 SUPPORTED_METRICS = {"sacrebleu", "comet", "ter", "chrf", "perplexity"}
@@ -33,6 +35,7 @@ class RLType(str, Enum):
    """RL trainer type configuration subset"""

    dpo = "dpo"  # pylint: disable=invalid-name
+    grpo = "grpo"  # pylint: disable=invalid-name
    ipo = "ipo"  # pylint: disable=invalid-name
    orpo = "orpo"  # pylint: disable=invalid-name
    kto = "kto"  # pylint: disable=invalid-name
@@ -115,9 +118,6 @@ class RemappedParameters(BaseModel):
    overrides_of_model_config: Optional[Dict[str, Any]] = Field(
        default=None, alias="model_config"
    )
-    overrides_of_model_kwargs: Optional[Dict[str, Any]] = Field(
-        default=None, alias="model_kwargs"
-    )
    type_of_model: Optional[str] = Field(default=None, alias="model_type")
    revision_of_model: Optional[str] = Field(default=None, alias="model_revision")

@@ -429,6 +429,8 @@ class ModelInputConfig(BaseModel):
    )
    trust_remote_code: Optional[bool] = None

+    model_kwargs: Optional[Dict[str, Any]] = None
+
    @field_validator("trust_remote_code")
    @classmethod
    def hint_trust_remote_code(cls, trust_remote_code):
@@ -664,14 +666,20 @@ class AxolotlInputConfig(
    auto_resume_from_checkpoints: Optional[bool] = None
    resize_token_embeddings_to_32x: Optional[bool] = None
    mean_resizing_embeddings: Optional[bool] = False
+    # optionally shrink the embeddings when the tokenizer vocab size is smaller
+    shrink_embeddings: Optional[bool] = None

    rl: Optional[RLType] = None
+    trl: Optional[TrlConfig] = Field(
+        default_factory=lambda: TrlConfig(),  # pylint: disable=unnecessary-lambda
+    )
    reward_model: Optional[bool] = None
    process_reward_model: Optional[bool] = None
    num_labels: Optional[int] = None
    dpo_use_weighting: Optional[
        bool
    ] = None  # whether to use weighting in DPO trainer. If none, default is false in the trainer.
+    dpo_use_logits_to_keep: Optional[bool] = None

    datasets: Optional[conlist(Union[SFTDataset, DPODataset, KTODataset, StepwiseSupervisedDataset], min_length=1)] = None  # type: ignore
    test_datasets: Optional[conlist(Union[SFTDataset, DPODataset, KTODataset, StepwiseSupervisedDataset], min_length=1)] = None  # type: ignore
--- a/src/axolotl/utils/config/models/input/v0_4_1/trl.py
+++ b/src/axolotl/utils/config/models/input/v0_4_1/trl.py
@@ -0,0 +1,32 @@
+"""
+GRPO specific configuration args
+"""
+from typing import List, Optional
+
+from pydantic import BaseModel, Field
+
+
+class TrlConfig(BaseModel):
+    """
+    Input args for TRL.
+    """
+
+    beta: Optional[float] = None
+    max_completion_length: Optional[int] = Field(
+        default=None,
+        json_schema_extra={
+            "description": "Maximum length of the completion for RL training"
+        },
+    )
+
+    # GRPO specific args
+    use_vllm: Optional[bool] = False
+    vllm_device: Optional[str] = "auto"
+    vllm_gpu_memory_utilization: Optional[float] = 0.9
+    vllm_max_model_len: Optional[int] = None
+    vllm_dtype: Optional[str] = "auto"
+    reward_funcs: Optional[List[str]] = None
+    num_generations: Optional[int] = None
+    sync_ref_model: Optional[bool] = False
+    ref_model_mixup_alpha: Optional[float] = 0.9
+    ref_model_sync_steps: Optional[int] = 64
--- a/src/axolotl/utils/data/rl.py
+++ b/src/axolotl/utils/data/rl.py
@@ -57,7 +57,7 @@ def _save_preprocessed_ds(cfg, sub_cfg, dataset):
        dataset.save_to_disk(str(prepared_ds_path))


-def map_dataset(cfg, data_set, ds_transform_fn, tokenizer):
+def map_dataset(cfg, data_set, ds_transform_fn, tokenizer, **map_kwargs):
    sig = inspect.signature(ds_transform_fn)
    if "tokenizer" in sig.parameters:
        if not tokenizer:
@@ -70,6 +70,7 @@ def map_dataset(cfg, data_set, ds_transform_fn, tokenizer):
    data_set = data_set.map(
        ds_transform_fn,
        desc="Mapping RL Dataset",
+        **map_kwargs,
    )

    return data_set
@@ -150,36 +151,45 @@ def load_prepare_preference_datasets(cfg):
                else:
                    ds_transform_fn = load_dpo(_type, _cfg, dataset_idx=i)

+                map_kwargs = {}
+                if isinstance(ds_transform_fn, tuple):
+                    ds_transform_fn, map_kwargs = ds_transform_fn
                split_datasets[i] = map_dataset(
-                    cfg, data_set, ds_transform_fn, tokenizer
+                    cfg, data_set, ds_transform_fn, tokenizer, **map_kwargs
                )
            elif _cfg.rl == "kto":
                ds_transform_fn = load_kto(_type, _cfg, dataset_idx=i)
+                map_kwargs = {}
+                if isinstance(ds_transform_fn, tuple):
+                    ds_transform_fn, map_kwargs = ds_transform_fn
                split_datasets[i] = map_dataset(
-                    cfg, data_set, ds_transform_fn, tokenizer
+                    cfg, data_set, ds_transform_fn, tokenizer, **map_kwargs
                )
            else:
                # If no `type` is provided, assume the dataset is already in the expected format with
                # "prompt", "chosen" and "rejected" already preprocessed
                split_datasets[i] = data_set

-            drop_long = partial(
-                drop_long_rl_seq,
-                rl=_cfg.rl,
-                tokenizer=tokenizer,
-                sequence_len=cfg.sequence_len,
-            )
+            if not cfg.skip_prepare_dataset:
+                drop_long = partial(
+                    drop_long_rl_seq,
+                    rl=_cfg.rl,
+                    tokenizer=tokenizer,
+                    sequence_len=cfg.sequence_len,
+                )

-            prior_len = len(split_datasets[i])
-            split_datasets[i] = split_datasets[i].filter(
-                drop_long,
-                num_proc=cfg.dataset_processes,
-                load_from_cache_file=not cfg.is_preprocess,
-                desc="Dropping Long Sequences",
-            )
-            dropped = prior_len - len(split_datasets[i])
-            if dropped:
-                LOG.warning(f"Dropped {dropped} long samples from dataset index {i}")
+                prior_len = len(split_datasets[i])
+                split_datasets[i] = split_datasets[i].filter(
+                    drop_long,
+                    num_proc=cfg.dataset_processes,
+                    load_from_cache_file=not cfg.is_preprocess,
+                    desc="Dropping Long Sequences",
+                )
+                dropped = prior_len - len(split_datasets[i])
+                if dropped:
+                    LOG.warning(
+                        f"Dropped {dropped} long samples from dataset index {i}"
+                    )

        combined_datasets = concatenate_datasets(split_datasets)
        combined_datasets = combined_datasets.shuffle(seed=cfg.seed)
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -357,8 +357,8 @@ class ModelLoader:

        # init model kwargs
        self.model_kwargs: Dict[str, Any] = {}
-        if cfg.overrides_of_model_kwargs:
-            for key, val in cfg.overrides_of_model_kwargs.items():
+        if cfg.model_kwargs:
+            for key, val in cfg.model_kwargs.items():
                self.model_kwargs[key] = val

        # init model
@@ -1053,9 +1053,12 @@ class ModelLoader:
            if self.cfg.resize_token_embeddings_to_32x
            else len(self.tokenizer)
        )
-        if (
-            hasattr(self.model, "get_input_embeddings")
-            and self.model.get_input_embeddings().num_embeddings != embeddings_len
+        if hasattr(self.model, "get_input_embeddings") and (
+            self.model.get_input_embeddings().num_embeddings < embeddings_len
+            or (
+                self.model.get_input_embeddings().num_embeddings > embeddings_len
+                and self.cfg.shrink_embeddings
+            )
        ):
            resize_kwargs = {}
            if self.cfg.mean_resizing_embeddings is not None:
--- a/src/axolotl/utils/trainer.py
+++ b/src/axolotl/utils/trainer.py
@@ -576,7 +576,7 @@ def prepare_opinionated_env(cfg):
 def setup_trainer(
    cfg, train_dataset, eval_dataset, model, tokenizer, processor, total_num_steps
 ):
-    if cfg.rl in ("dpo", "ipo", "orpo", "kto", "simpo"):
+    if cfg.rl in ("dpo", "grpo", "ipo", "orpo", "kto", "simpo"):
        trainer_builder = HFRLTrainerBuilder(cfg, model[0], tokenizer, processor)
        trainer_builder.model_ref = model[1]
        trainer_builder.peft_config = model[2]
--- a/tests/e2e/multigpu/test_grpo.py
+++ b/tests/e2e/multigpu/test_grpo.py
Author	SHA1	Message	Date
Wing Lian	6905711e45	set max steps to -1 when empty	2025-02-06 17:27:52 -05:00
Wing Lian	bb5a6135eb	don't set total num steps for grpo	2025-02-06 17:23:13 -05:00
Wing Lian	e637f9b1a4	cleanup pythonpath if axo in it	2025-02-06 17:03:21 -05:00
Wing Lian	1a3bfd6e0f	test not deleting pythonpath for custom code bundling clean path and add mounts handle mounting	2025-02-06 17:01:19 -05:00
Wing Lian	3df4df868c	make sure to pass kwargs when using accelerate	2025-02-06 14:00:15 -05:00
Wing Lian	c82cbdc6d9	make sure to handle num-processes with cloud	2025-02-06 13:50:39 -05:00
Wing Lian	ecea44c902	fix num_processes in passing to accelerate	2025-02-06 13:39:46 -05:00
Wing Lian	4f9c57e95d	check for src axolotl in PYTHONPATH before removing it	2025-02-06 13:26:23 -05:00
Wing Lian	3d38bc82b8	include vllm in build	2025-02-06 11:09:42 -05:00
Wing Lian	756a8332d6	set default on trl config	2025-02-05 22:17:10 -05:00
Wing Lian	aded9c500d	refactor cfg.grpo_* to use cfg.trl.*	2025-02-05 20:41:14 -05:00
Wing Lian	3659d812f7	use cfg.max_completion_length, not sequence_len	2025-02-05 13:20:17 -05:00
Salman Mohammadi	bdb0f97082	adding 'reward_processing_classes'	2025-02-05 18:18:42 +00:00
Salman Mohammadi	65b6519447	adding 'reward_processing_classes'	2025-02-05 18:13:05 +00:00
Wing Lian	a1958b09de	seperately include max_completion_len	2025-02-05 13:01:52 -05:00
Salman Mohammadi	b8f258817e	adding reward fn verification	2025-02-05 13:30:02 +00:00
Wing Lian	753146b458	max_length moved to reward config	2025-02-04 11:06:26 -05:00
Wing Lian	d683c50113	fix config cls	2025-02-04 11:06:26 -05:00
Wing Lian	234cd8311e	fix failure case in prompter loading	2025-02-04 11:06:26 -05:00
Wing Lian	f9893e3842	fix dpo config and add use_logits_to_keep	2025-02-04 11:06:26 -05:00
Wing Lian	ac1ebc58a8	add support for num_generations	2025-02-04 11:06:25 -05:00
Wing Lian	56f3b9f20f	bump pydantic to support vllm	2025-02-04 11:06:25 -05:00
Wing Lian	2c1376d8c4	don't shrink embeddings unless told to	2025-02-04 11:06:25 -05:00
Wing Lian	3c7517fd55	add support for passing map kwargs to dataset map in rl	2025-02-04 11:06:25 -05:00
Wing Lian	1e94d7ef65	more fixes to get grpo working	2025-02-04 11:06:25 -05:00
Wing Lian	cfc7fe0df2	remove ununsable args kwargs	2025-02-04 11:06:25 -05:00
Wing Lian	3c4fe478cf	be nice with self.cfg.dataset_processes	2025-02-04 11:06:25 -05:00
Wing Lian	c810599c66	order matters	2025-02-04 11:06:24 -05:00
Wing Lian	300ffc2cb6	make it a dataclass	2025-02-04 11:06:24 -05:00
Wing Lian	b1c4711145	load the class from strat	2025-02-04 11:06:24 -05:00
Wing Lian	d155849e2c	use correct builder	2025-02-04 11:06:24 -05:00
Wing Lian	626db6cb84	collator for grpo and prompt loader	2025-02-04 11:06:24 -05:00
Wing Lian	79159b4871	support custom module prompt strategy for rl	2025-02-04 11:06:24 -05:00
Wing Lian	704ddd6ff1	honor skip prepare for rl	2025-02-04 11:06:24 -05:00
Wing Lian	54b0d3d0e8	passthrough dataset parser for dpo/grpo	2025-02-04 11:06:23 -05:00
Wing Lian	59ad21f2de	refactor a bit for better grpo support	2025-02-04 11:06:23 -05:00
Wing Lian	57264b6491	respect dotenv for cli	2025-02-04 11:06:23 -05:00
Wing Lian	d495e41ba1	refactor dpo trainer into own module	2025-02-04 11:06:23 -05:00
Wing Lian	6067fe6c28	upgrade trl to 0.14.0	2025-02-04 11:06:23 -05:00