make sure to download fixtures for kd test (#2541)
* make sure to download fixtures for kd test * use same alpaca dataset
This commit is contained in:
@@ -193,6 +193,14 @@ def download_tiny_shakespeare_dataset():
|
|||||||
snapshot_download_w_retry("winglian/tiny-shakespeare", repo_type="dataset")
|
snapshot_download_w_retry("winglian/tiny-shakespeare", repo_type="dataset")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
|
def download_evolkit_kd_sample_dataset():
|
||||||
|
# download the dataset
|
||||||
|
snapshot_download_w_retry(
|
||||||
|
"axolotl-ai-co/evolkit-logprobs-pipeline-75k-v2-sample", repo_type="dataset"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session", autouse=True)
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
def download_deepseek_model_fixture():
|
def download_deepseek_model_fixture():
|
||||||
snapshot_download_w_retry("axolotl-ai-co/DeepSeek-V3-11M", repo_type="model")
|
snapshot_download_w_retry("axolotl-ai-co/DeepSeek-V3-11M", repo_type="model")
|
||||||
@@ -208,6 +216,16 @@ def download_huggyllama_model_fixture():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
|
def download_llama33_70b_model_fixture():
|
||||||
|
# download the tokenizer only
|
||||||
|
snapshot_download_w_retry(
|
||||||
|
"axolotl-ai-co/Llama-3.3-70B-Instruct-tokenizer",
|
||||||
|
repo_type="model",
|
||||||
|
allow_patterns=["*token*", "config.json"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session", autouse=True)
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
def download_llama_1b_model_fixture():
|
def download_llama_1b_model_fixture():
|
||||||
# download the tokenizer only
|
# download the tokenizer only
|
||||||
@@ -315,6 +333,14 @@ def download_llama2_model_fixture():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
|
def download_llama32_1b_model_fixture():
|
||||||
|
snapshot_download_w_retry(
|
||||||
|
"osllmai-community/Llama-3.2-1B",
|
||||||
|
repo_type="model",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
@enable_hf_offline
|
@enable_hf_offline
|
||||||
def tokenizer_huggyllama(
|
def tokenizer_huggyllama(
|
||||||
|
|||||||
@@ -0,0 +1,2 @@
|
|||||||
|
# Tests under this directory should get run "solo" on their own as they
|
||||||
|
# seem to cause issues when run in the same batch as other tests.
|
||||||
|
|||||||
@@ -49,8 +49,9 @@ class TestPackedFlex:
|
|||||||
},
|
},
|
||||||
"datasets": [
|
"datasets": [
|
||||||
{
|
{
|
||||||
"path": "vicgalle/alpaca-gpt4",
|
"path": "tatsu-lab/alpaca",
|
||||||
"type": "alpaca",
|
"type": "alpaca",
|
||||||
|
"split": "train[:10%]",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
"num_epochs": 1,
|
"num_epochs": 1,
|
||||||
|
|||||||
@@ -46,8 +46,9 @@ class TestResumeLlama:
|
|||||||
},
|
},
|
||||||
"datasets": [
|
"datasets": [
|
||||||
{
|
{
|
||||||
"path": "vicgalle/alpaca-gpt4",
|
"path": "tatsu-lab/alpaca",
|
||||||
"type": "alpaca",
|
"type": "alpaca",
|
||||||
|
"split": "train[:10%]",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
"num_epochs": 2,
|
"num_epochs": 2,
|
||||||
|
|||||||
@@ -41,8 +41,9 @@ class TestPackedFlex(unittest.TestCase):
|
|||||||
},
|
},
|
||||||
"datasets": [
|
"datasets": [
|
||||||
{
|
{
|
||||||
"path": "vicgalle/alpaca-gpt4",
|
"path": "tatsu-lab/alpaca",
|
||||||
"type": "alpaca",
|
"type": "alpaca",
|
||||||
|
"split": "train[:10%]",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
"num_epochs": 1,
|
"num_epochs": 1,
|
||||||
|
|||||||
@@ -40,8 +40,9 @@ class TestPackedLlama(unittest.TestCase):
|
|||||||
},
|
},
|
||||||
"datasets": [
|
"datasets": [
|
||||||
{
|
{
|
||||||
"path": "vicgalle/alpaca-gpt4",
|
"path": "tatsu-lab/alpaca",
|
||||||
"type": "alpaca",
|
"type": "alpaca",
|
||||||
|
"split": "train[:10%]",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
"num_epochs": 1,
|
"num_epochs": 1,
|
||||||
|
|||||||
Reference in New Issue
Block a user