drop empty tokenized rows too (#509)
This commit is contained in:
@@ -361,7 +361,7 @@ def add_position_ids(sample):
|
|||||||
|
|
||||||
|
|
||||||
def drop_long_seq(sample, sequence_len=2048):
|
def drop_long_seq(sample, sequence_len=2048):
|
||||||
return len(sample["input_ids"]) <= sequence_len
|
return len(sample["input_ids"]) <= sequence_len and len(sample["input_ids"]) > 0
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
|
|||||||
Reference in New Issue
Block a user