remove eval streaming (not HF supported)

This commit is contained in:
Dan Saunders
2025-08-22 00:12:17 +00:00
parent 4121bcbc33
commit 42b38a718a
8 changed files with 32 additions and 148 deletions

View File

@@ -664,42 +664,3 @@ class TestDatasetPreparation:
# Should have samples from both datasets
sources = [sample["source"] for sample in samples]
assert len(set(sources)) >= 1 # At least one unique source
def test_eval_streaming_config(self):
"""Test eval_streaming separate from streaming config."""
from axolotl.utils.data.sft import _is_streaming_enabled_for_split
# Test train streaming enabled, eval streaming disabled
cfg = DictDefault({"streaming": True, "eval_streaming": False})
assert _is_streaming_enabled_for_split(cfg, "train")
assert not _is_streaming_enabled_for_split(cfg, "test")
# Test train streaming disabled, eval streaming enabled
cfg2 = DictDefault({"streaming": False, "eval_streaming": True})
assert not _is_streaming_enabled_for_split(cfg2, "train")
assert _is_streaming_enabled_for_split(cfg2, "test")
def test_eval_specific_mixing_configs(self):
"""Test eval-specific mixing configs override main configs."""
from axolotl.utils.data.sft import _get_streaming_config_for_split
cfg = DictDefault(
{
"dataset_mixing_strategy": "round_robin",
"mixing_weights": [0.5, 0.5],
"eval_dataset_mixing_strategy": "weighted",
"eval_mixing_weights": [0.8, 0.2],
}
)
# Train split should use main config
train_cfg = _get_streaming_config_for_split(cfg, "train")
assert train_cfg["dataset_mixing_strategy"] == "round_robin"
assert train_cfg["mixing_weights"] == [0.5, 0.5]
# Test split should use eval-specific config
test_cfg = _get_streaming_config_for_split(cfg, "test")
assert test_cfg["dataset_mixing_strategy"] == "weighted"
assert test_cfg["mixing_weights"] == [0.8, 0.2]