add missing fixture decorator for predownload dataset (#2117) [skip ci]
* add missing fixture decorator for predownload dataset * also pre download the tokenizer files
This commit is contained in:
@@ -46,6 +46,7 @@ def download_mhenrichsen_alpaca_2k_w_revision_dataset():
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def download_mlabonne_finetome_100k_dataset():
|
||||
# download the dataset
|
||||
snapshot_download("mlabonne/FineTome-100k", repo_type="dataset")
|
||||
|
||||
@@ -4,6 +4,7 @@ shared fixtures for prompt strategies tests
|
||||
|
||||
import pytest
|
||||
from datasets import Dataset
|
||||
from huggingface_hub import hf_hub_download
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
|
||||
@@ -60,6 +61,17 @@ def fixture_basic_dataset():
|
||||
|
||||
@pytest.fixture(name="llama3_tokenizer")
|
||||
def fixture_llama3_tokenizer():
|
||||
hf_hub_download(
|
||||
repo_id="NousResearch/Meta-Llama-3-8B-Instruct",
|
||||
filename="special_tokens_map.json",
|
||||
)
|
||||
hf_hub_download(
|
||||
repo_id="NousResearch/Meta-Llama-3-8B-Instruct",
|
||||
filename="tokenizer_config.json",
|
||||
)
|
||||
hf_hub_download(
|
||||
repo_id="NousResearch/Meta-Llama-3-8B-Instruct", filename="tokenizer.json"
|
||||
)
|
||||
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Meta-Llama-3-8B-Instruct")
|
||||
|
||||
return tokenizer
|
||||
|
||||
Reference in New Issue
Block a user