skip check_datasets_label during debug for grpo

This commit is contained in:
Sunny Liu
2025-03-19 12:26:11 -04:00
committed by Sung Ching Liu
parent ec52561a0c
commit ae8738aa87

View File

@@ -129,17 +129,21 @@ def load_preference_datasets(
total_num_steps = None
if cli_args.debug or cfg.debug:
LOG.info("check_dataset_labels...")
if cfg.rl == "grpo":
LOG.info("skip check_dataset_labels during debug for grpo")
else:
LOG.info("check_dataset_labels...")
tokenizer = load_tokenizer(cfg)
train_samples = sample_dataset(train_dataset, cli_args.debug_num_examples)
check_dataset_labels(
train_samples,
tokenizer,
num_examples=cli_args.debug_num_examples,
text_only=cli_args.debug_text_only,
rl_mode=True,
)
tokenizer = load_tokenizer(cfg)
train_samples = sample_dataset(train_dataset, cli_args.debug_num_examples)
check_dataset_labels(
train_samples,
tokenizer,
num_examples=cli_args.debug_num_examples,
text_only=cli_args.debug_text_only,
rl_mode=True,
)
return TrainDatasetMeta(
train_dataset=train_dataset,