whoops, gt vs lt
This commit is contained in:
@@ -213,7 +213,7 @@ def load_prepare_datasets(tokenizer, cfg, default_dataset_prepared_path):
|
|||||||
[
|
[
|
||||||
d
|
d
|
||||||
for d in dataset
|
for d in dataset
|
||||||
if len(d["input_ids"]) > cfg.sequence_len
|
if len(d["input_ids"]) < cfg.sequence_len
|
||||||
and len(d["input_ids"]) > 0
|
and len(d["input_ids"]) > 0
|
||||||
and len(d["input_ids"]) == len(d["attention_mask"])
|
and len(d["input_ids"]) == len(d["attention_mask"])
|
||||||
and len(d["input_ids"]) == len(d["labels"])
|
and len(d["input_ids"]) == len(d["labels"])
|
||||||
|
|||||||
Reference in New Issue
Block a user