only process eval dataset for packing if not None

This commit is contained in:
Wing Lian
2023-07-30 22:55:17 -04:00
parent e74eab6e73
commit 958d423e7c

View File

@@ -182,9 +182,10 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer):
train_dataset = train_dataset.filter(drop_long).map(
add_position_ids, num_proc=os.cpu_count()
)
eval_dataset = eval_dataset.filter(drop_long).map(
add_position_ids, num_proc=os.cpu_count()
)
if eval_dataset:
eval_dataset = eval_dataset.filter(drop_long).map(
add_position_ids, num_proc=os.cpu_count()
)
if cfg.sample_packing_eff_est:
total_num_tokens = (
cfg.total_num_tokens