From 84c7bc4b68cd8d025cd5861448d8d4532a4c8b97 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Fri, 12 May 2023 14:03:25 -0400 Subject: [PATCH] whoops, gt vs lt --- src/axolotl/utils/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py index a168c5247..98fc00faf 100644 --- a/src/axolotl/utils/data.py +++ b/src/axolotl/utils/data.py @@ -213,7 +213,7 @@ def load_prepare_datasets(tokenizer, cfg, default_dataset_prepared_path): [ d for d in dataset - if len(d["input_ids"]) > cfg.sequence_len + if len(d["input_ids"]) < cfg.sequence_len and len(d["input_ids"]) > 0 and len(d["input_ids"]) == len(d["attention_mask"]) and len(d["input_ids"]) == len(d["labels"])