gracefully handle length feature used for group by (#565)

This commit is contained in:
Wing Lian
2023-09-13 11:23:30 -04:00
committed by GitHub
parent e5bb22a56b
commit e7aa7b1a1e

View File

@@ -223,6 +223,8 @@ class MultipackDistributedDataloader:
concatenated = {} concatenated = {}
batched_data = [self.dataset[batch_idx] for batch_idx in batch] batched_data = [self.dataset[batch_idx] for batch_idx in batch]
for feature in features: for feature in features:
if feature == "length":
continue
if feature == "attention_mask": if feature == "attention_mask":
arrays = [ arrays = [
(attn_mask_cum_idx + idx + 1) * np.array(item[feature]) (attn_mask_cum_idx + idx + 1) * np.array(item[feature])