Process reward models (#2241)
* adding model_cfg to set num_labels * using a num_labels field instead * linting * WIP stepwise prompt tokenizer * this should work? * trainer working? * pushing to runpod * fixing saving * updating conf * updating config, adding docs * adding stepwise supervision docpage * updating tests * adding test for dataset * fixing tests * linting * addressing some comments * adding additional cfg fields support * updating tests, fixing cfg * fixing tests * updating loss * Update test_process_reward_model_smollm2.py * updating loss values and seed * dumb pre-commit
This commit is contained in:
@@ -8,6 +8,8 @@ from typing import List, Tuple, Union
|
||||
from datasets import (
|
||||
Dataset,
|
||||
DatasetDict,
|
||||
Sequence,
|
||||
Value,
|
||||
concatenate_datasets,
|
||||
load_dataset,
|
||||
load_from_disk,
|
||||
@@ -467,6 +469,17 @@ def get_dataset_wrapper(
|
||||
dataset,
|
||||
**ds_kwargs,
|
||||
)
|
||||
elif config_dataset.type.startswith("stepwise_supervised"):
|
||||
dataset_prompter = UnsupportedPrompter()
|
||||
ds_strategy = load(config_dataset.type, tokenizer, cfg, config_dataset)
|
||||
# we need to explicitly cast boolean labels to int
|
||||
# for compatibility with how trl's PRMTrainer works
|
||||
dataset = dataset.cast_column("labels", Sequence(Value("int64")))
|
||||
dataset_wrapper = TokenizedPromptDataset(
|
||||
ds_strategy,
|
||||
dataset,
|
||||
**ds_kwargs,
|
||||
)
|
||||
elif ds_strategy := load(
|
||||
config_dataset.type, tokenizer, cfg, config_dataset, processor=processor
|
||||
):
|
||||
|
||||
Reference in New Issue
Block a user