hf offline decorator for tests to workaround rate limits (#2452) [skip ci]
* hf offline decorator for tests to workaround rate limits * fail quicker so we can see logs * try new cache name * limit files downloaded * phi mini predownload * offline decorator for phi tokenizer * handle meta llama 8b offline too * make sure to return fixtures if they are wrapped too * more fixes * more things offline * more offline things * fix the env var * fix the model name * handle gemma also * force reload of modules to recheck offline status * prefetch mistral too * use reset_sessions so hub picks up offline mode * more fixes * rename so it doesn't seem like a context manager * fix backoff * switch out tinyshakespeare dataset since it runs a py script to fetch data and doesn't work offline * include additional dataset * more fixes * more fixes * replace tiny shakespeaere dataset * skip some tests for now * use more robust check using snapshot download to determine if a dataset name is on the hub * typo for skip reason * use local_files_only * more fixtures * remove local only * use tiny shakespeare as pretrain dataset and streaming can't be offline even if precached * make sure fixtures aren't offline improve the offline reset try bumping version of datasets reorder reloading and setting prime a new cache run the tests now with fresh cache try with a static cache * now run all the ci again with hopefully a correct cache * skip wonky tests for now * skip wonky tests for now * handle offline mode for model card creation
This commit is contained in:
@@ -14,6 +14,7 @@ import transformers.modelcard
|
||||
from accelerate.logging import get_logger
|
||||
from accelerate.utils import save_fsdp_model
|
||||
from datasets import Dataset
|
||||
from huggingface_hub.errors import OfflineModeIsEnabled
|
||||
from peft import PeftConfig, PeftModel
|
||||
from transformers import PreTrainedModel, PreTrainedTokenizer, ProcessorMixin
|
||||
from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
|
||||
@@ -302,7 +303,7 @@ def create_model_card(cfg: DictDefault, trainer: Trainer):
|
||||
model_card_kwarg["dataset_tags"] = dataset_tags
|
||||
|
||||
trainer.create_model_card(**model_card_kwarg)
|
||||
except (AttributeError, UnicodeDecodeError):
|
||||
except (AttributeError, UnicodeDecodeError, OfflineModeIsEnabled):
|
||||
pass
|
||||
elif cfg.hub_model_id:
|
||||
# Defensively push to the hub to ensure the model card is updated
|
||||
|
||||
@@ -6,8 +6,12 @@ from pathlib import Path
|
||||
from typing import Optional, Union
|
||||
|
||||
from datasets import Dataset, DatasetDict, load_dataset, load_from_disk
|
||||
from huggingface_hub import hf_hub_download
|
||||
from huggingface_hub.errors import HFValidationError
|
||||
from huggingface_hub import hf_hub_download, snapshot_download
|
||||
from huggingface_hub.errors import (
|
||||
HFValidationError,
|
||||
RepositoryNotFoundError,
|
||||
RevisionNotFoundError,
|
||||
)
|
||||
|
||||
from axolotl.utils.dict import DictDefault
|
||||
|
||||
@@ -70,20 +74,25 @@ def load_dataset_w_config(
|
||||
# pylint: disable=invalid-name
|
||||
ds: Optional[Union[Dataset, DatasetDict]] = None # pylint: disable=invalid-name
|
||||
ds_from_hub = False
|
||||
ds_trust_remote_code = config_dataset.trust_remote_code
|
||||
try:
|
||||
# this is just a basic check to see if the path is a
|
||||
# valid HF dataset that's loadable
|
||||
load_dataset(
|
||||
config_dataset.path,
|
||||
name=config_dataset.name,
|
||||
streaming=True,
|
||||
snapshot_download(
|
||||
repo_id=config_dataset.path,
|
||||
repo_type="dataset",
|
||||
token=use_auth_token,
|
||||
revision=config_dataset.revision,
|
||||
trust_remote_code=ds_trust_remote_code,
|
||||
ignore_patterns=["*"],
|
||||
)
|
||||
ds_from_hub = True
|
||||
except (FileNotFoundError, ConnectionError, HFValidationError, ValueError):
|
||||
except (
|
||||
RepositoryNotFoundError,
|
||||
RevisionNotFoundError,
|
||||
FileNotFoundError,
|
||||
ConnectionError,
|
||||
HFValidationError,
|
||||
ValueError,
|
||||
):
|
||||
pass
|
||||
|
||||
ds_from_cloud = False
|
||||
|
||||
Reference in New Issue
Block a user