diff --git a/tests/conftest.py b/tests/conftest.py index 2fc985d3a..3a20bbfdd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -46,6 +46,7 @@ def download_mhenrichsen_alpaca_2k_w_revision_dataset(): ) +@pytest.fixture(scope="session", autouse=True) def download_mlabonne_finetome_100k_dataset(): # download the dataset snapshot_download("mlabonne/FineTome-100k", repo_type="dataset") diff --git a/tests/prompt_strategies/conftest.py b/tests/prompt_strategies/conftest.py index 43423f725..00a2bf300 100644 --- a/tests/prompt_strategies/conftest.py +++ b/tests/prompt_strategies/conftest.py @@ -4,6 +4,7 @@ shared fixtures for prompt strategies tests import pytest from datasets import Dataset +from huggingface_hub import hf_hub_download from transformers import AutoTokenizer @@ -60,6 +61,17 @@ def fixture_basic_dataset(): @pytest.fixture(name="llama3_tokenizer") def fixture_llama3_tokenizer(): + hf_hub_download( + repo_id="NousResearch/Meta-Llama-3-8B-Instruct", + filename="special_tokens_map.json", + ) + hf_hub_download( + repo_id="NousResearch/Meta-Llama-3-8B-Instruct", + filename="tokenizer_config.json", + ) + hf_hub_download( + repo_id="NousResearch/Meta-Llama-3-8B-Instruct", filename="tokenizer.json" + ) tokenizer = AutoTokenizer.from_pretrained("NousResearch/Meta-Llama-3-8B-Instruct") return tokenizer