add missing fixture decorator for predownload dataset (#2117) [skip ci]
* add missing fixture decorator for predownload dataset * also pre download the tokenizer files
This commit is contained in:
@@ -46,6 +46,7 @@ def download_mhenrichsen_alpaca_2k_w_revision_dataset():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
def download_mlabonne_finetome_100k_dataset():
|
def download_mlabonne_finetome_100k_dataset():
|
||||||
# download the dataset
|
# download the dataset
|
||||||
snapshot_download("mlabonne/FineTome-100k", repo_type="dataset")
|
snapshot_download("mlabonne/FineTome-100k", repo_type="dataset")
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ shared fixtures for prompt strategies tests
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from datasets import Dataset
|
from datasets import Dataset
|
||||||
|
from huggingface_hub import hf_hub_download
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
|
||||||
|
|
||||||
@@ -60,6 +61,17 @@ def fixture_basic_dataset():
|
|||||||
|
|
||||||
@pytest.fixture(name="llama3_tokenizer")
|
@pytest.fixture(name="llama3_tokenizer")
|
||||||
def fixture_llama3_tokenizer():
|
def fixture_llama3_tokenizer():
|
||||||
|
hf_hub_download(
|
||||||
|
repo_id="NousResearch/Meta-Llama-3-8B-Instruct",
|
||||||
|
filename="special_tokens_map.json",
|
||||||
|
)
|
||||||
|
hf_hub_download(
|
||||||
|
repo_id="NousResearch/Meta-Llama-3-8B-Instruct",
|
||||||
|
filename="tokenizer_config.json",
|
||||||
|
)
|
||||||
|
hf_hub_download(
|
||||||
|
repo_id="NousResearch/Meta-Llama-3-8B-Instruct", filename="tokenizer.json"
|
||||||
|
)
|
||||||
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Meta-Llama-3-8B-Instruct")
|
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Meta-Llama-3-8B-Instruct")
|
||||||
|
|
||||||
return tokenizer
|
return tokenizer
|
||||||
|
|||||||
Reference in New Issue
Block a user