make sure to download fixtures for kd test (#2541)

* make sure to download fixtures for kd test * use same alpaca dataset
2025-04-21 10:31:50 -04:00
parent 341e95aac9
commit 7651550850
6 changed files with 36 additions and 4 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -193,6 +193,14 @@ def download_tiny_shakespeare_dataset():
    snapshot_download_w_retry("winglian/tiny-shakespeare", repo_type="dataset")


+@pytest.fixture(scope="session", autouse=True)
+def download_evolkit_kd_sample_dataset():
+    # download the dataset
+    snapshot_download_w_retry(
+        "axolotl-ai-co/evolkit-logprobs-pipeline-75k-v2-sample", repo_type="dataset"
+    )
+
+
@pytest.fixture(scope="session", autouse=True)
 def download_deepseek_model_fixture():
    snapshot_download_w_retry("axolotl-ai-co/DeepSeek-V3-11M", repo_type="model")
@@ -208,6 +216,16 @@ def download_huggyllama_model_fixture():
    )


+@pytest.fixture(scope="session", autouse=True)
+def download_llama33_70b_model_fixture():
+    # download the tokenizer only
+    snapshot_download_w_retry(
+        "axolotl-ai-co/Llama-3.3-70B-Instruct-tokenizer",
+        repo_type="model",
+        allow_patterns=["*token*", "config.json"],
+    )
+
+
@pytest.fixture(scope="session", autouse=True)
 def download_llama_1b_model_fixture():
    # download the tokenizer only
@@ -315,6 +333,14 @@ def download_llama2_model_fixture():
    )


+@pytest.fixture(scope="session", autouse=True)
+def download_llama32_1b_model_fixture():
+    snapshot_download_w_retry(
+        "osllmai-community/Llama-3.2-1B",
+        repo_type="model",
+    )
+
+
@pytest.fixture
@enable_hf_offline
 def tokenizer_huggyllama(