only process eval dataset for packing if not None
This commit is contained in:
@@ -182,9 +182,10 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer):
|
|||||||
train_dataset = train_dataset.filter(drop_long).map(
|
train_dataset = train_dataset.filter(drop_long).map(
|
||||||
add_position_ids, num_proc=os.cpu_count()
|
add_position_ids, num_proc=os.cpu_count()
|
||||||
)
|
)
|
||||||
eval_dataset = eval_dataset.filter(drop_long).map(
|
if eval_dataset:
|
||||||
add_position_ids, num_proc=os.cpu_count()
|
eval_dataset = eval_dataset.filter(drop_long).map(
|
||||||
)
|
add_position_ids, num_proc=os.cpu_count()
|
||||||
|
)
|
||||||
if cfg.sample_packing_eff_est:
|
if cfg.sample_packing_eff_est:
|
||||||
total_num_tokens = (
|
total_num_tokens = (
|
||||||
cfg.total_num_tokens
|
cfg.total_num_tokens
|
||||||
|
|||||||
Reference in New Issue
Block a user