reduce test concurrency to avoid HF rate limiting, test suite parity (#2128)

* reduce test concurrency to avoid HF rate limiting, test suite parity * make val_set_size smaller to speed up e2e tests * more retries for pytest fixture downloads * val_set_size was too small * move retry_on_request_exceptions to data utils and add retry strategy * pre-download ultrafeedback as a test fixture * refactor download retry into it's own fn * don't import from data utils * use retry mechanism now for fixtures
2024-12-06 10:20:20 -05:00
parent 08fa133177
commit 5e9fa33f3d
12 changed files with 126 additions and 47 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,47 +1,77 @@
 """
 shared pytest fixtures
 """
+import functools
 import shutil
 import tempfile
+import time

 import pytest
+import requests
 from huggingface_hub import snapshot_download


+def retry_on_request_exceptions(max_retries=3, delay=1):
+    # pylint: disable=duplicate-code
+    def decorator(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):  # pylint: disable=inconsistent-return-statements
+            for attempt in range(max_retries):
+                try:
+                    return func(*args, **kwargs)
+                except (
+                    requests.exceptions.ReadTimeout,
+                    requests.exceptions.ConnectionError,
+                ) as exc:
+                    if attempt < max_retries - 1:
+                        time.sleep(delay)
+                    else:
+                        raise exc
+
+        return wrapper
+
+    return decorator
+
+
+@retry_on_request_exceptions(max_retries=3, delay=5)
+def snapshot_download_w_retry(*args, **kwargs):
+    return snapshot_download(*args, **kwargs)
+
+
@pytest.fixture(scope="session", autouse=True)
 def download_smollm2_135m_model():
    # download the model
-    snapshot_download("HuggingFaceTB/SmolLM2-135M")
+    snapshot_download_w_retry("HuggingFaceTB/SmolLM2-135M")


@pytest.fixture(scope="session", autouse=True)
 def download_llama_68m_random_model():
    # download the model
-    snapshot_download("JackFram/llama-68m")
+    snapshot_download_w_retry("JackFram/llama-68m")


@pytest.fixture(scope="session", autouse=True)
 def download_qwen_2_5_half_billion_model():
    # download the model
-    snapshot_download("Qwen/Qwen2.5-0.5B")
+    snapshot_download_w_retry("Qwen/Qwen2.5-0.5B")


@pytest.fixture(scope="session", autouse=True)
 def download_tatsu_lab_alpaca_dataset():
    # download the dataset
-    snapshot_download("tatsu-lab/alpaca", repo_type="dataset")
+    snapshot_download_w_retry("tatsu-lab/alpaca", repo_type="dataset")


@pytest.fixture(scope="session", autouse=True)
 def download_mhenrichsen_alpaca_2k_dataset():
    # download the dataset
-    snapshot_download("mhenrichsen/alpaca_2k_test", repo_type="dataset")
+    snapshot_download_w_retry("mhenrichsen/alpaca_2k_test", repo_type="dataset")


@pytest.fixture(scope="session", autouse=True)
 def download_mhenrichsen_alpaca_2k_w_revision_dataset():
    # download the dataset
-    snapshot_download(
+    snapshot_download_w_retry(
        "mhenrichsen/alpaca_2k_test", repo_type="dataset", revision="d05c1cb"
    )

@@ -49,21 +79,29 @@ def download_mhenrichsen_alpaca_2k_w_revision_dataset():
@pytest.fixture(scope="session", autouse=True)
 def download_mlabonne_finetome_100k_dataset():
    # download the dataset
-    snapshot_download("mlabonne/FineTome-100k", repo_type="dataset")
+    snapshot_download_w_retry("mlabonne/FineTome-100k", repo_type="dataset")


-@pytest.fixture
+@pytest.fixture(scope="session", autouse=True)
 def download_argilla_distilabel_capybara_dpo_7k_binarized_dataset():
    # download the dataset
-    snapshot_download(
+    snapshot_download_w_retry(
        "argilla/distilabel-capybara-dpo-7k-binarized", repo_type="dataset"
    )


-@pytest.fixture
+@pytest.fixture(scope="session", autouse=True)
+def download_argilla_ultrafeedback_binarized_preferences_cleaned_dataset():
+    # download the dataset
+    snapshot_download_w_retry(
+        "argilla/ultrafeedback-binarized-preferences-cleaned", repo_type="dataset"
+    )
+
+
+@pytest.fixture(scope="session", autouse=True)
 def download_arcee_ai_distilabel_intel_orca_dpo_pairs_dataset():
    # download the dataset
-    snapshot_download(
+    snapshot_download_w_retry(
        "arcee-ai/distilabel-intel-orca-dpo-pairs-binarized", repo_type="dataset"
    )

--- a/tests/e2e/patched/test_4d_multipack_llama.py
+++ b/tests/e2e/patched/test_4d_multipack_llama.py
@@ -42,7 +42,7 @@ class Test4dMultipackLlama(unittest.TestCase):
                "lora_dropout": 0.05,
                "lora_target_linear": True,
                "sequence_len": 1024,
-                "val_set_size": 0.1,
+                "val_set_size": 0.02,
                "datasets": [
                    {
                        "path": "mhenrichsen/alpaca_2k_test",
@@ -86,7 +86,7 @@ class Test4dMultipackLlama(unittest.TestCase):
                "lora_alpha": 16,
                "lora_dropout": 0.05,
                "lora_target_linear": True,
-                "val_set_size": 0.1,
+                "val_set_size": 0.02,
                "datasets": [
                    {
                        "path": "mhenrichsen/alpaca_2k_test",
--- a/tests/e2e/patched/test_falcon_samplepack.py
+++ b/tests/e2e/patched/test_falcon_samplepack.py
@@ -40,7 +40,7 @@ class TestFalconPatched(unittest.TestCase):
                "lora_dropout": 0.1,
                "lora_target_linear": True,
                "lora_modules_to_save": ["word_embeddings", "lm_head"],
-                "val_set_size": 0.1,
+                "val_set_size": 0.05,
                "special_tokens": {
                    "bos_token": "<|endoftext|>",
                    "pad_token": "<|endoftext|>",
@@ -80,7 +80,7 @@ class TestFalconPatched(unittest.TestCase):
                "flash_attention": True,
                "sample_packing": True,
                "sequence_len": 2048,
-                "val_set_size": 0.1,
+                "val_set_size": 0.05,
                "special_tokens": {
                    "bos_token": "<|endoftext|>",
                    "pad_token": "<|endoftext|>",
--- a/tests/e2e/patched/test_fused_llama.py
+++ b/tests/e2e/patched/test_fused_llama.py
@@ -38,7 +38,7 @@ class TestFusedLlama(unittest.TestCase):
                "flash_attn_fuse_mlp": True,
                "sample_packing": True,
                "sequence_len": 1024,
-                "val_set_size": 0.1,
+                "val_set_size": 0.02,
                "special_tokens": {
                    "unk_token": "<unk>",
                    "bos_token": "<s>",
--- a/tests/e2e/patched/test_lora_llama_multipack.py
+++ b/tests/e2e/patched/test_lora_llama_multipack.py
@@ -98,7 +98,7 @@ class TestLoraLlama(unittest.TestCase):
                "lora_alpha": 64,
                "lora_dropout": 0.05,
                "lora_target_linear": True,
-                "val_set_size": 0.1,
+                "val_set_size": 0.02,
                "special_tokens": {
                    "unk_token": "<unk>",
                    "bos_token": "<s>",
--- a/tests/e2e/patched/test_mistral_samplepack.py
+++ b/tests/e2e/patched/test_mistral_samplepack.py
@@ -39,7 +39,7 @@ class TestMistral(unittest.TestCase):
                "lora_alpha": 64,
                "lora_dropout": 0.05,
                "lora_target_linear": True,
-                "val_set_size": 0.1,
+                "val_set_size": 0.05,
                "special_tokens": {
                    "unk_token": "<unk>",
                    "bos_token": "<s>",
@@ -80,7 +80,7 @@ class TestMistral(unittest.TestCase):
                "flash_attention": True,
                "sample_packing": True,
                "sequence_len": 1024,
-                "val_set_size": 0.1,
+                "val_set_size": 0.05,
                "special_tokens": {
                    "unk_token": "<unk>",
                    "bos_token": "<s>",
--- a/tests/e2e/patched/test_mixtral_samplepack.py
+++ b/tests/e2e/patched/test_mixtral_samplepack.py
@@ -40,7 +40,7 @@ class TestMixtral(unittest.TestCase):
                "lora_alpha": 32,
                "lora_dropout": 0.1,
                "lora_target_linear": True,
-                "val_set_size": 0.1,
+                "val_set_size": 0.05,
                "special_tokens": {},
                "datasets": [
                    {
@@ -78,7 +78,7 @@ class TestMixtral(unittest.TestCase):
                "flash_attention": True,
                "sample_packing": True,
                "sequence_len": 2048,
-                "val_set_size": 0.1,
+                "val_set_size": 0.05,
                "special_tokens": {},
                "datasets": [
                    {
--- a/tests/e2e/patched/test_phi_multipack.py
+++ b/tests/e2e/patched/test_phi_multipack.py
@@ -38,7 +38,7 @@ class TestPhiMultipack(unittest.TestCase):
                "pad_to_sequence_len": True,
                "load_in_8bit": False,
                "adapter": None,
-                "val_set_size": 0.1,
+                "val_set_size": 0.05,
                "special_tokens": {
                    "pad_token": "<|endoftext|>",
                },