fix: mistral nemo does not recognize token_type_ids in forward (#2233)

This commit is contained in:
NanoCode012
2025-01-10 04:00:36 +07:00
committed by GitHub
parent 3c1921e400
commit 2e8d7c1adb

View File

@@ -196,7 +196,7 @@ def process_datasets_for_packing(cfg, train_dataset, eval_dataset):
if eval_dataset:
eval_dataset = eval_dataset.remove_columns("attention_mask")
if cfg.model_config_type == "falcon":
if cfg.model_config_type in ["falcon", "mistral"]:
LOG.info("dropping token_type_ids column if it exists")
if "token_type_ids" in train_dataset.column_names:
train_dataset = train_dataset.remove_columns("token_type_ids")