Fix falcon tokenization step (#1441) [skip ci]
* Fix falcon tokenization step * chore: lint --------- Co-authored-by: Wing Lian <wing.lian@gmail.com>
This commit is contained in:
@@ -124,9 +124,10 @@ def process_datasets_for_packing(cfg, train_dataset, eval_dataset):
|
|||||||
eval_dataset = eval_dataset.remove_columns("attention_mask")
|
eval_dataset = eval_dataset.remove_columns("attention_mask")
|
||||||
|
|
||||||
if cfg.model_config_type == "falcon":
|
if cfg.model_config_type == "falcon":
|
||||||
LOG.info("dropping token_type_ids column")
|
LOG.info("dropping token_type_ids column if it exists")
|
||||||
train_dataset = train_dataset.remove_columns("token_type_ids")
|
if "token_type_ids" in train_dataset.column_names:
|
||||||
if eval_dataset:
|
train_dataset = train_dataset.remove_columns("token_type_ids")
|
||||||
|
if eval_dataset and "token_type_ids" in eval_dataset.column_names:
|
||||||
eval_dataset = eval_dataset.remove_columns("token_type_ids")
|
eval_dataset = eval_dataset.remove_columns("token_type_ids")
|
||||||
|
|
||||||
train_dataset = train_dataset.filter(
|
train_dataset = train_dataset.filter(
|
||||||
|
|||||||
Reference in New Issue
Block a user