rename references to dpo dataset prep to pref data (#2258)

This commit is contained in:
Wing Lian
2025-01-14 22:07:55 -05:00
committed by GitHub
parent 1ed4de73b6
commit 19cd83d408
5 changed files with 13 additions and 13 deletions

View File

@@ -17,7 +17,7 @@ from huggingface_hub import snapshot_download
from transformers import AutoTokenizer
from axolotl.utils.data import load_tokenized_prepared_datasets
from axolotl.utils.data.rl import load_prepare_dpo_datasets
from axolotl.utils.data.rl import load_prepare_preference_datasets
from axolotl.utils.dict import DictDefault
@@ -280,7 +280,7 @@ class TestDatasetPreparation(unittest.TestCase):
}
)
train_dataset, _ = load_prepare_dpo_datasets(cfg)
train_dataset, _ = load_prepare_preference_datasets(cfg)
assert len(train_dataset) == 1800
assert "conversation" in train_dataset.features
@@ -329,7 +329,7 @@ class TestDatasetPreparation(unittest.TestCase):
}
)
train_dataset, _ = load_prepare_dpo_datasets(cfg)
train_dataset, _ = load_prepare_preference_datasets(cfg)
assert len(train_dataset) == 1800
assert "conversation" in train_dataset.features

View File

@@ -12,7 +12,7 @@ from datasets import Dataset
from transformers import AutoTokenizer
from axolotl.utils.data import prepare_dataset
from axolotl.utils.data.rl import load_prepare_dpo_datasets
from axolotl.utils.data.rl import load_prepare_preference_datasets
from axolotl.utils.data.utils import deduplicate_and_log_datasets
from axolotl.utils.dict import DictDefault
from axolotl.utils.models import load_processor, load_tokenizer
@@ -236,7 +236,7 @@ class TestDeduplicateRLDataset(unittest.TestCase):
"""Verify that loading with deduplication removes duplicates."""
# Load the dataset using the deduplication setting
train_dataset, _ = load_prepare_dpo_datasets(self.cfg)
train_dataset, _ = load_prepare_preference_datasets(self.cfg)
# Verify that the dataset has been deduplicated
assert len(train_dataset) == 1800, "Dataset was not properly deduplicated"
@@ -245,7 +245,7 @@ class TestDeduplicateRLDataset(unittest.TestCase):
"""Verify that loading without deduplication retains duplicates."""
self.cfg.dataset_exact_deduplication = False
# Load the dataset without deduplication
train_dataset, _ = load_prepare_dpo_datasets(self.cfg)
train_dataset, _ = load_prepare_preference_datasets(self.cfg)
# Verify that the dataset retains duplicates
assert (