Compare commits

...

3 Commits

Author SHA1 Message Date
Wing Lian
a9ebff087c remove ref_model when peft model is passed into grpo trainer 2025-02-20 21:53:20 -05:00
NanoCode012
b53a41372f feat: update transformers version to 4.49.0 (#2340) 2025-02-20 21:12:06 -05:00
Wing Lian
02f45e94be calculate sample length fixes and SFT splitting fixes (#2351)
* fix chat template splitting long samples across multiple rows

* make the preprocessing faster
2025-02-20 14:29:58 -05:00
6 changed files with 25 additions and 10 deletions

View File

@@ -13,7 +13,7 @@ liger-kernel==0.5.2
packaging==23.2
peft==0.14.0
transformers==4.48.3
transformers==4.49.0
tokenizers>=0.21.0
accelerate==1.3.0
datasets==3.2.0

View File

@@ -39,6 +39,15 @@ class AxolotlGRPOTrainer(SchedulerMixin, GRPOTrainer):
self.model = self._enable_gradient_checkpointing(self.model, kwargs["args"])
# pylint: enable=access-member-before-definition
# cleanup the ref_model if we have a peft model passed in
# TODO remove this after next major trl release
if (
self.ref_model # pylint: disable=access-member-before-definition
and is_peft_model(self.model)
):
del self.ref_model
self.ref_model = None
def _enable_gradient_checkpointing(
self, model: PreTrainedModel, args: GRPOConfig
) -> PreTrainedModel:

View File

@@ -127,6 +127,8 @@ class ReLoRACallback(TrainerCallback):
optimizer: torch.optim.Optimizer,
**_kwargs,
):
if not optimizer:
optimizer = state.optimizer
if state.global_step > 0 and state.global_step % self.relora_steps == 0:
checkpoint_folder = os.path.join(
args.output_dir,

View File

@@ -272,8 +272,7 @@ class ChatTemplateStrategy(PromptTokenizingStrategy):
dict(zip(feature_names, row))
)
for key, val in tokenized_prompt.items():
for i in range(0, len(val), self.sequence_len):
res[key].append(val[i : i + self.sequence_len])
res[key].append(val)
# If there are no examples left, return an empty dictionary
if not res:

View File

@@ -172,10 +172,11 @@ def drop_long_seq_in_dataset(dataset: Dataset, cfg: DictDefault):
)
try:
min_input_len = np.min(get_dataset_lengths(dataset))
LOG.debug(f"min_input_len: {min_input_len}")
max_input_len = np.max(get_dataset_lengths(dataset))
LOG.debug(f"max_input_len: {max_input_len}")
ds_lengths = get_dataset_lengths(dataset, from_arrow=True)
min_input_len = np.min(ds_lengths)
LOG.info(f"min_input_len: {min_input_len}")
max_input_len = np.max(ds_lengths)
LOG.info(f"max_input_len: {max_input_len}")
except AttributeError:
pass

View File

@@ -4,13 +4,17 @@ helper util to calculate dataset lengths
import numpy as np
def get_dataset_lengths(dataset):
def get_dataset_lengths(dataset, from_arrow=False):
if "length" in dataset.column_names:
lengths = np.array(dataset["length"])
elif "position_ids" in dataset.column_names:
position_ids = dataset["position_ids"]
lengths = np.array([x[-1] + 1 for x in position_ids])
else:
input_ids = dataset["input_ids"]
lengths = np.array([len(seq) for seq in input_ids])
if from_arrow:
input_ids = dataset.data.column("input_ids")
lengths = np.vectorize(len)(np.array(input_ids, dtype=object))
else:
input_ids = dataset["input_ids"]
lengths = np.array([len(seq) for seq in input_ids])
return lengths