use offline for precached stream dataset (#2453)

This commit is contained in:
Wing Lian
2025-03-28 23:39:09 -04:00
committed by GitHub
parent e46239f8d3
commit c49682132b
8 changed files with 179 additions and 124 deletions

View File

@@ -109,7 +109,9 @@ def fixture_toolcalling_dataset():
@pytest.fixture(name="llama3_tokenizer", scope="session", autouse=True)
@enable_hf_offline
def fixture_llama3_tokenizer():
def fixture_llama3_tokenizer(
download_llama3_8b_instruct_model_fixture,
): # pylint: disable=unused-argument,redefined-outer-name
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Meta-Llama-3-8B-Instruct")
return tokenizer
@@ -123,7 +125,10 @@ def fixture_smollm2_tokenizer():
@pytest.fixture(name="mistralv03_tokenizer", scope="session", autouse=True)
def fixture_mistralv03_tokenizer():
@enable_hf_offline
def fixture_mistralv03_tokenizer(
download_mlx_mistral_7b_model_fixture,
): # pylint: disable=unused-argument,redefined-outer-name
tokenizer = AutoTokenizer.from_pretrained(
"mlx-community/Mistral-7B-Instruct-v0.3-4bit"
)

View File

@@ -9,6 +9,7 @@ import pytest
from datasets import Dataset
from tokenizers import AddedToken
from transformers import PreTrainedTokenizer
from utils import enable_hf_offline
from axolotl.prompt_strategies.chat_template import (
ChatTemplatePrompter,
@@ -101,6 +102,7 @@ class TestChatTemplateConfigurations:
return True
return False
@enable_hf_offline
def test_train_on_inputs_true(
self,
tokenizer,