gracefully handle length feature used for group by (#565)
This commit is contained in:
@@ -223,6 +223,8 @@ class MultipackDistributedDataloader:
|
|||||||
concatenated = {}
|
concatenated = {}
|
||||||
batched_data = [self.dataset[batch_idx] for batch_idx in batch]
|
batched_data = [self.dataset[batch_idx] for batch_idx in batch]
|
||||||
for feature in features:
|
for feature in features:
|
||||||
|
if feature == "length":
|
||||||
|
continue
|
||||||
if feature == "attention_mask":
|
if feature == "attention_mask":
|
||||||
arrays = [
|
arrays = [
|
||||||
(attn_mask_cum_idx + idx + 1) * np.array(item[feature])
|
(attn_mask_cum_idx + idx + 1) * np.array(item[feature])
|
||||||
|
|||||||
Reference in New Issue
Block a user