reduce test concurrency to avoid HF rate limiting, test suite parity (#2128)
* reduce test concurrency to avoid HF rate limiting, test suite parity * make val_set_size smaller to speed up e2e tests * more retries for pytest fixture downloads * val_set_size was too small * move retry_on_request_exceptions to data utils and add retry strategy * pre-download ultrafeedback as a test fixture * refactor download retry into it's own fn * don't import from data utils * use retry mechanism now for fixtures
This commit is contained in:
@@ -2,11 +2,9 @@
|
||||
|
||||
import functools
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
import requests
|
||||
from datasets import (
|
||||
Dataset,
|
||||
DatasetDict,
|
||||
@@ -44,7 +42,11 @@ from axolotl.prompters import (
|
||||
UnsupportedPrompter,
|
||||
)
|
||||
from axolotl.utils.data.pretraining import wrap_pretraining_dataset
|
||||
from axolotl.utils.data.utils import deduplicate_and_log_datasets, md5
|
||||
from axolotl.utils.data.utils import (
|
||||
deduplicate_and_log_datasets,
|
||||
md5,
|
||||
retry_on_request_exceptions,
|
||||
)
|
||||
from axolotl.utils.dict import DictDefault
|
||||
from axolotl.utils.distributed import is_local_main_process, zero_first
|
||||
from axolotl.utils.trainer import (
|
||||
@@ -55,27 +57,6 @@ from axolotl.utils.trainer import (
|
||||
LOG = logging.getLogger("axolotl")
|
||||
|
||||
|
||||
def retry_on_request_exceptions(max_retries=3, delay=1):
|
||||
def decorator(func):
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs): # pylint: disable=inconsistent-return-statements
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except (
|
||||
requests.exceptions.ReadTimeout,
|
||||
requests.exceptions.ConnectionError,
|
||||
) as exc:
|
||||
if attempt < max_retries - 1:
|
||||
time.sleep(delay)
|
||||
else:
|
||||
raise exc
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
@retry_on_request_exceptions(max_retries=3, delay=5)
|
||||
def prepare_dataset(cfg, tokenizer, processor=None):
|
||||
prompters = []
|
||||
|
||||
Reference in New Issue
Block a user