remove ref_model when peft model is passed into grpo trainer

feat: update transformers version to 4.49.0 (#2340 )
calculate sample length fixes and SFT splitting fixes (#2351 )
2025-02-20 21:53:20 -05:00 · 2025-02-20 21:12:06 -05:00 · 2025-02-20 14:29:58 -05:00 · 2025-02-19 09:23:31 -05:00
7 changed files with 26 additions and 10 deletions
--- a/docs/lora_optims.qmd
+++ b/docs/lora_optims.qmd
@@ -12,6 +12,7 @@ to leverage operator fusion and tensor re-use in order to improve speed and redu
 memory usage during the forward and backward passes of these calculations.

 We currently support several common model architectures, including (but not limited to):
+
 - `llama`
 - `mistral`
 - `qwen2`
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,7 +13,7 @@ liger-kernel==0.5.2
 packaging==23.2

 peft==0.14.0
-transformers==4.48.3
+transformers==4.49.0
 tokenizers>=0.21.0
 accelerate==1.3.0
 datasets==3.2.0
--- a/src/axolotl/core/trainers/grpo/trainer.py
+++ b/src/axolotl/core/trainers/grpo/trainer.py
@@ -39,6 +39,15 @@ class AxolotlGRPOTrainer(SchedulerMixin, GRPOTrainer):
            self.model = self._enable_gradient_checkpointing(self.model, kwargs["args"])
        # pylint: enable=access-member-before-definition

+        # cleanup the ref_model if we have a peft model passed in
+        # TODO remove this after next major trl release
+        if (
+            self.ref_model  # pylint: disable=access-member-before-definition
+            and is_peft_model(self.model)
+        ):
+            del self.ref_model
+            self.ref_model = None
+
    def _enable_gradient_checkpointing(
        self, model: PreTrainedModel, args: GRPOConfig
    ) -> PreTrainedModel:
--- a/src/axolotl/monkeypatch/relora.py
+++ b/src/axolotl/monkeypatch/relora.py
@@ -127,6 +127,8 @@ class ReLoRACallback(TrainerCallback):
        optimizer: torch.optim.Optimizer,
        **_kwargs,
    ):
+        if not optimizer:
+            optimizer = state.optimizer
        if state.global_step > 0 and state.global_step % self.relora_steps == 0:
            checkpoint_folder = os.path.join(
                args.output_dir,
--- a/src/axolotl/prompt_strategies/chat_template.py
+++ b/src/axolotl/prompt_strategies/chat_template.py
@@ -272,8 +272,7 @@ class ChatTemplateStrategy(PromptTokenizingStrategy):
                dict(zip(feature_names, row))
            )
            for key, val in tokenized_prompt.items():
-                for i in range(0, len(val), self.sequence_len):
-                    res[key].append(val[i : i + self.sequence_len])
+                res[key].append(val)

        # If there are no examples left, return an empty dictionary
        if not res:
--- a/src/axolotl/utils/data/utils.py
+++ b/src/axolotl/utils/data/utils.py
@@ -172,10 +172,11 @@ def drop_long_seq_in_dataset(dataset: Dataset, cfg: DictDefault):
    )

    try:
-        min_input_len = np.min(get_dataset_lengths(dataset))
-        LOG.debug(f"min_input_len: {min_input_len}")
-        max_input_len = np.max(get_dataset_lengths(dataset))
-        LOG.debug(f"max_input_len: {max_input_len}")
+        ds_lengths = get_dataset_lengths(dataset, from_arrow=True)
+        min_input_len = np.min(ds_lengths)
+        LOG.info(f"min_input_len: {min_input_len}")
+        max_input_len = np.max(ds_lengths)
+        LOG.info(f"max_input_len: {max_input_len}")
    except AttributeError:
        pass

--- a/src/axolotl/utils/samplers/utils.py
+++ b/src/axolotl/utils/samplers/utils.py
@@ -4,13 +4,17 @@ helper util to calculate dataset lengths
 import numpy as np


-def get_dataset_lengths(dataset):
+def get_dataset_lengths(dataset, from_arrow=False):
    if "length" in dataset.column_names:
        lengths = np.array(dataset["length"])
    elif "position_ids" in dataset.column_names:
        position_ids = dataset["position_ids"]
        lengths = np.array([x[-1] + 1 for x in position_ids])
    else:
-        input_ids = dataset["input_ids"]
-        lengths = np.array([len(seq) for seq in input_ids])
+        if from_arrow:
+            input_ids = dataset.data.column("input_ids")
+            lengths = np.vectorize(len)(np.array(input_ids, dtype=object))
+        else:
+            input_ids = dataset["input_ids"]
+            lengths = np.array([len(seq) for seq in input_ids])
    return lengths
Author	SHA1	Message	Date
Wing Lian	a9ebff087c	remove ref_model when peft model is passed into grpo trainer	2025-02-20 21:53:20 -05:00
NanoCode012	b53a41372f	feat: update transformers version to 4.49.0 (#2340 )	2025-02-20 21:12:06 -05:00
Wing Lian	02f45e94be	calculate sample length fixes and SFT splitting fixes (#2351 ) * fix chat template splitting long samples across multiple rows * make the preprocessing faster	2025-02-20 14:29:58 -05:00
Dan Saunders	954e192f38	quick formatting fix for LoRA optims doc (#2349 )	2025-02-19 09:23:31 -05:00