add missing fixture decorator for predownload dataset (#2117) [skip ci]

* add missing fixture decorator for predownload dataset * also pre download the tokenizer files
2024-12-03 18:08:46 -05:00
parent d87df2c776
commit 418ad2b586
2 changed files with 13 additions and 0 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -46,6 +46,7 @@ def download_mhenrichsen_alpaca_2k_w_revision_dataset():
    )
@pytest.fixture(scope="session", autouse=True)
 def download_mlabonne_finetome_100k_dataset():
    # download the dataset
    snapshot_download("mlabonne/FineTome-100k", repo_type="dataset")
--- a/tests/prompt_strategies/conftest.py
+++ b/tests/prompt_strategies/conftest.py
@@ -4,6 +4,7 @@ shared fixtures for prompt strategies tests
 import pytest
 from datasets import Dataset
 from huggingface_hub import hf_hub_download
 from transformers import AutoTokenizer
@@ -60,6 +61,17 @@ def fixture_basic_dataset():
@pytest.fixture(name="llama3_tokenizer")
 def fixture_llama3_tokenizer():
    hf_hub_download(
        repo_id="NousResearch/Meta-Llama-3-8B-Instruct",
        filename="special_tokens_map.json",
    )
    hf_hub_download(
        repo_id="NousResearch/Meta-Llama-3-8B-Instruct",
        filename="tokenizer_config.json",
    )
    hf_hub_download(
        repo_id="NousResearch/Meta-Llama-3-8B-Instruct", filename="tokenizer.json"
    )
    tokenizer = AutoTokenizer.from_pretrained("NousResearch/Meta-Llama-3-8B-Instruct")
    return tokenizer