From ae8738aa874b5aba709bb05c3e4c661aeae1317b Mon Sep 17 00:00:00 2001 From: Sunny Liu Date: Wed, 19 Mar 2025 12:26:11 -0400 Subject: [PATCH] skip check_datasets_label during debug for grpo --- src/axolotl/common/datasets.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/axolotl/common/datasets.py b/src/axolotl/common/datasets.py index 3e712f772..627f08c60 100644 --- a/src/axolotl/common/datasets.py +++ b/src/axolotl/common/datasets.py @@ -129,17 +129,21 @@ def load_preference_datasets( total_num_steps = None if cli_args.debug or cfg.debug: - LOG.info("check_dataset_labels...") + if cfg.rl == "grpo": + LOG.info("skip check_dataset_labels during debug for grpo") + else: + LOG.info("check_dataset_labels...") - tokenizer = load_tokenizer(cfg) - train_samples = sample_dataset(train_dataset, cli_args.debug_num_examples) - check_dataset_labels( - train_samples, - tokenizer, - num_examples=cli_args.debug_num_examples, - text_only=cli_args.debug_text_only, - rl_mode=True, - ) + tokenizer = load_tokenizer(cfg) + train_samples = sample_dataset(train_dataset, cli_args.debug_num_examples) + + check_dataset_labels( + train_samples, + tokenizer, + num_examples=cli_args.debug_num_examples, + text_only=cli_args.debug_text_only, + rl_mode=True, + ) return TrainDatasetMeta( train_dataset=train_dataset,