From ac79360161ab9265fd8472a1189a83bbf99dddf2 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Thu, 25 May 2023 16:31:59 -0400 Subject: [PATCH] shard fix --- src/axolotl/utils/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py index 2f9a1afec..0375cf9db 100644 --- a/src/axolotl/utils/data.py +++ b/src/axolotl/utils/data.py @@ -112,7 +112,7 @@ def load_tokenized_prepared_datasets( raise Exception("unhandled dataset load") # support for using a subset of the data if d.shards: - ds = ds.shuffle(seed=42)["train"].shard(num_shards=cfg.shards, index=0) + ds = ds.shuffle(seed=42)["train"].shard(num_shards=d.shards, index=0) d_type = d.type d_type_split = d_type.split(":") d_base_type = d_type_split[0]