hf offline decorator for tests to workaround rate limits (#2452) [skip ci]
* hf offline decorator for tests to workaround rate limits * fail quicker so we can see logs * try new cache name * limit files downloaded * phi mini predownload * offline decorator for phi tokenizer * handle meta llama 8b offline too * make sure to return fixtures if they are wrapped too * more fixes * more things offline * more offline things * fix the env var * fix the model name * handle gemma also * force reload of modules to recheck offline status * prefetch mistral too * use reset_sessions so hub picks up offline mode * more fixes * rename so it doesn't seem like a context manager * fix backoff * switch out tinyshakespeare dataset since it runs a py script to fetch data and doesn't work offline * include additional dataset * more fixes * more fixes * replace tiny shakespeaere dataset * skip some tests for now * use more robust check using snapshot download to determine if a dataset name is on the hub * typo for skip reason * use local_files_only * more fixtures * remove local only * use tiny shakespeare as pretrain dataset and streaming can't be offline even if precached * make sure fixtures aren't offline improve the offline reset try bumping version of datasets reorder reloading and setting prime a new cache run the tests now with fresh cache try with a static cache * now run all the ci again with hopefully a correct cache * skip wonky tests for now * skip wonky tests for now * handle offline mode for model card creation
This commit is contained in:
@@ -4,8 +4,8 @@ shared fixtures for prompt strategies tests
|
||||
|
||||
import pytest
|
||||
from datasets import Dataset
|
||||
from huggingface_hub import hf_hub_download
|
||||
from transformers import AutoTokenizer
|
||||
from utils import enable_hf_offline
|
||||
|
||||
from axolotl.prompt_strategies.jinja_template_analyzer import JinjaTemplateAnalyzer
|
||||
from axolotl.utils.chat_templates import _CHAT_TEMPLATES
|
||||
@@ -108,24 +108,15 @@ def fixture_toolcalling_dataset():
|
||||
|
||||
|
||||
@pytest.fixture(name="llama3_tokenizer", scope="session", autouse=True)
|
||||
@enable_hf_offline
|
||||
def fixture_llama3_tokenizer():
|
||||
hf_hub_download(
|
||||
repo_id="NousResearch/Meta-Llama-3-8B-Instruct",
|
||||
filename="special_tokens_map.json",
|
||||
)
|
||||
hf_hub_download(
|
||||
repo_id="NousResearch/Meta-Llama-3-8B-Instruct",
|
||||
filename="tokenizer_config.json",
|
||||
)
|
||||
hf_hub_download(
|
||||
repo_id="NousResearch/Meta-Llama-3-8B-Instruct", filename="tokenizer.json"
|
||||
)
|
||||
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Meta-Llama-3-8B-Instruct")
|
||||
|
||||
return tokenizer
|
||||
|
||||
|
||||
@pytest.fixture(name="smollm2_tokenizer", scope="session", autouse=True)
|
||||
@enable_hf_offline
|
||||
def fixture_smollm2_tokenizer():
|
||||
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-135M")
|
||||
return tokenizer
|
||||
@@ -140,6 +131,7 @@ def fixture_mistralv03_tokenizer():
|
||||
|
||||
|
||||
@pytest.fixture(name="phi35_tokenizer", scope="session", autouse=True)
|
||||
@enable_hf_offline
|
||||
def fixture_phi35_tokenizer():
|
||||
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")
|
||||
return tokenizer
|
||||
|
||||
Reference in New Issue
Block a user