upgrade trl to 0.24.0 and liger to 0.6.3 (#3230)

* upgrade trl to 0.24.0

* fix reward collator init

* use newer DataCollatorForPreference instead

* DataCollatorForPreference doesn't use padding kwarg

* fix input id labels

* fix fbgemm-gpu version for pytorch versions

* tweak pinned deps

* transformers doesn't support hub 1.0 yet

* upgrade liger dep to 0.6.3

* set TORCH_CUDA_ARCH_LIST correctly
This commit is contained in:
Wing Lian
2025-10-29 18:02:16 -04:00
committed by GitHub
parent 9d4d39e939
commit 98333e639a
5 changed files with 21 additions and 14 deletions

View File

@@ -12,7 +12,7 @@ from transformers import (
EarlyStoppingCallback,
Trainer,
)
from trl.trainer.utils import RewardDataCollatorWithPadding
from trl.trainer.reward_trainer import DataCollatorForPreference
from axolotl.core.builders.base import TrainerBuilderBase
from axolotl.core.trainers import (
@@ -453,7 +453,7 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
BatchSamplerDataCollatorForSeq2Seq,
DataCollatorForSeq2Seq,
DataCollatorWithFlattening,
RewardDataCollatorWithPadding,
DataCollatorForPreference,
]
]
collator_args = [self.tokenizer]
@@ -470,7 +470,10 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
if kwargs and isinstance(kwargs, dict):
kwargs.update(collator_cls_and_kwargs[1])
elif self.cfg.reward_model:
collator = RewardDataCollatorWithPadding
collator = DataCollatorForPreference
tokenizer = collator_args.pop(0)
kwargs["pad_token_id"] = tokenizer.pad_token_id
kwargs.pop("padding")
elif use_batch_sampler_collator:
# Use V2BatchSamplerDataCollatorForSeq2Seq for flex attention,
# supported multipack models, or non-flash-attention llama

View File

@@ -71,10 +71,10 @@ class BTChatTemplateStrategy(ChatTemplateStrategy):
]
return {
"input_ids_chosen": chosen_tokenized["input_ids"],
"chosen_input_ids": chosen_tokenized["input_ids"],
"attention_mask_chosen": chosen_tokenized["attention_mask"],
"labels_chosen": 1.0,
"input_ids_rejected": rejected_tokenized["input_ids"],
"rejected_input_ids": rejected_tokenized["input_ids"],
"attention_mask_rejected": rejected_tokenized["attention_mask"],
"labels_rejected": 0.0,
}