Create preprocess CLI (#785)

* Create preprocess CLI

* Print prompt template if debugging

* Add print for unsupported prompters

* Formatting

* Formatting

* Refactor variables

* Formatting

* Formatting

* Formatting

* Formatting
This commit is contained in:
Casper
2023-10-26 15:35:42 +02:00
committed by GitHub
parent 05bd6f1122
commit e50ab072e2
9 changed files with 354 additions and 190 deletions

View File

@@ -222,7 +222,9 @@ def load_datasets(
) -> TrainDatasetMeta:
tokenizer = load_tokenizer(cfg)
train_dataset, eval_dataset, total_num_steps = prepare_dataset(cfg, tokenizer)
train_dataset, eval_dataset, total_num_steps, prompters = prepare_dataset(
cfg, tokenizer
)
if cli_args.debug or cfg.debug:
LOG.info("check_dataset_labels...")
@@ -238,6 +240,10 @@ def load_datasets(
text_only=cli_args.debug_text_only,
)
LOG.info("printing prompters...")
for prompter in prompters:
LOG.info(prompter)
return TrainDatasetMeta(
train_dataset=train_dataset,
eval_dataset=eval_dataset,