skip check_datasets_label during debug for grpo

This commit is contained in:
Sunny Liu
2025-03-19 12:26:11 -04:00
committed by Sung Ching Liu
parent ec52561a0c
commit ae8738aa87

View File

@@ -129,17 +129,21 @@ def load_preference_datasets(
total_num_steps = None total_num_steps = None
if cli_args.debug or cfg.debug: if cli_args.debug or cfg.debug:
LOG.info("check_dataset_labels...") if cfg.rl == "grpo":
LOG.info("skip check_dataset_labels during debug for grpo")
else:
LOG.info("check_dataset_labels...")
tokenizer = load_tokenizer(cfg) tokenizer = load_tokenizer(cfg)
train_samples = sample_dataset(train_dataset, cli_args.debug_num_examples) train_samples = sample_dataset(train_dataset, cli_args.debug_num_examples)
check_dataset_labels(
train_samples, check_dataset_labels(
tokenizer, train_samples,
num_examples=cli_args.debug_num_examples, tokenizer,
text_only=cli_args.debug_text_only, num_examples=cli_args.debug_num_examples,
rl_mode=True, text_only=cli_args.debug_text_only,
) rl_mode=True,
)
return TrainDatasetMeta( return TrainDatasetMeta(
train_dataset=train_dataset, train_dataset=train_dataset,