Compare commits

...

1 Commits

Author SHA1 Message Date
Wing Lian
9f1d548534 don't use zero first context for loading datasets 2025-05-23 10:38:32 -04:00

View File

@@ -53,7 +53,7 @@ from axolotl.utils.data.utils import (
retry_on_request_exceptions, retry_on_request_exceptions,
) )
from axolotl.utils.dict import DictDefault from axolotl.utils.dict import DictDefault
from axolotl.utils.distributed import is_local_main_process, zero_first from axolotl.utils.distributed import is_local_main_process
from axolotl.utils.trainer import ( from axolotl.utils.trainer import (
calculate_total_num_steps, calculate_total_num_steps,
process_datasets_for_packing, process_datasets_for_packing,
@@ -66,7 +66,6 @@ LOG = logging.getLogger(__name__)
def prepare_dataset(cfg, tokenizer, processor=None, preprocess_iterable=None): def prepare_dataset(cfg, tokenizer, processor=None, preprocess_iterable=None):
prompters = [] prompters = []
if not cfg.pretraining_dataset: if not cfg.pretraining_dataset:
with zero_first(is_local_main_process()):
if cfg.test_datasets: if cfg.test_datasets:
train_dataset, _, prompters = load_prepare_datasets( train_dataset, _, prompters = load_prepare_datasets(
tokenizer, tokenizer,
@@ -272,7 +271,7 @@ def load_tokenized_prepared_datasets(
LOG.info("Loading raw datasets...") LOG.info("Loading raw datasets...")
if not cfg.is_preprocess: if not cfg.is_preprocess:
LOG.warning( LOG.warning(
"Processing datasets during training can lead to VRAM instability. Please pre-process your dataset." "Processing datasets during training can lead to VRAM instability. Please use `axolotl preprocess` to prepare your dataset."
) )
if cfg.seed: if cfg.seed: