drop empty tokenized rows too (#509)

This commit is contained in:
Wing Lian
2023-08-30 06:55:26 -07:00
committed by GitHub
parent 1e07c162f1
commit c56b450cf5

View File

@@ -361,7 +361,7 @@ def add_position_ids(sample):
def drop_long_seq(sample, sequence_len=2048):
return len(sample["input_ids"]) <= sequence_len
return len(sample["input_ids"]) <= sequence_len and len(sample["input_ids"]) > 0
@contextmanager