upgrade trl to 0.24.0 and liger to 0.6.3 (#3230)
* upgrade trl to 0.24.0 * fix reward collator init * use newer DataCollatorForPreference instead * DataCollatorForPreference doesn't use padding kwarg * fix input id labels * fix fbgemm-gpu version for pytorch versions * tweak pinned deps * transformers doesn't support hub 1.0 yet * upgrade liger dep to 0.6.3 * set TORCH_CUDA_ARCH_LIST correctly
This commit is contained in:
@@ -12,7 +12,7 @@ from transformers import (
|
||||
EarlyStoppingCallback,
|
||||
Trainer,
|
||||
)
|
||||
from trl.trainer.utils import RewardDataCollatorWithPadding
|
||||
from trl.trainer.reward_trainer import DataCollatorForPreference
|
||||
|
||||
from axolotl.core.builders.base import TrainerBuilderBase
|
||||
from axolotl.core.trainers import (
|
||||
@@ -453,7 +453,7 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
|
||||
BatchSamplerDataCollatorForSeq2Seq,
|
||||
DataCollatorForSeq2Seq,
|
||||
DataCollatorWithFlattening,
|
||||
RewardDataCollatorWithPadding,
|
||||
DataCollatorForPreference,
|
||||
]
|
||||
]
|
||||
collator_args = [self.tokenizer]
|
||||
@@ -470,7 +470,10 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
|
||||
if kwargs and isinstance(kwargs, dict):
|
||||
kwargs.update(collator_cls_and_kwargs[1])
|
||||
elif self.cfg.reward_model:
|
||||
collator = RewardDataCollatorWithPadding
|
||||
collator = DataCollatorForPreference
|
||||
tokenizer = collator_args.pop(0)
|
||||
kwargs["pad_token_id"] = tokenizer.pad_token_id
|
||||
kwargs.pop("padding")
|
||||
elif use_batch_sampler_collator:
|
||||
# Use V2BatchSamplerDataCollatorForSeq2Seq for flex attention,
|
||||
# supported multipack models, or non-flash-attention llama
|
||||
|
||||
@@ -71,10 +71,10 @@ class BTChatTemplateStrategy(ChatTemplateStrategy):
|
||||
]
|
||||
|
||||
return {
|
||||
"input_ids_chosen": chosen_tokenized["input_ids"],
|
||||
"chosen_input_ids": chosen_tokenized["input_ids"],
|
||||
"attention_mask_chosen": chosen_tokenized["attention_mask"],
|
||||
"labels_chosen": 1.0,
|
||||
"input_ids_rejected": rejected_tokenized["input_ids"],
|
||||
"rejected_input_ids": rejected_tokenized["input_ids"],
|
||||
"attention_mask_rejected": rejected_tokenized["attention_mask"],
|
||||
"labels_rejected": 0.0,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user