From 7563e1bd3055a03a743d26fbef9dab4b8af32c77 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 29 Jun 2025 22:05:21 -0400 Subject: [PATCH] set a different triton cache for each test to avoid blocking writes to cache (#2843) * set a different triton cache for each test to avoid blocking writes to cache * set log level * disable debug logging for filelock --- cicd/single_gpu.py | 2 ++ tests/conftest.py | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/cicd/single_gpu.py b/cicd/single_gpu.py index 2ce3b0662..357aa41ee 100644 --- a/cicd/single_gpu.py +++ b/cicd/single_gpu.py @@ -32,6 +32,8 @@ df_args = { "NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""), "CODECOV_TOKEN": os.environ.get("CODECOV_TOKEN", ""), "HF_HOME": "/workspace/data/huggingface-cache/hub", + "PYTHONUNBUFFERED": os.environ.get("PYTHONUNBUFFERED", "1"), + "DEEPSPEED_LOG_LEVEL": os.environ.get("DEEPSPEED_LOG_LEVEL", "WARNING"), } dockerfile_contents = df_template.render(**df_args) diff --git a/tests/conftest.py b/tests/conftest.py index 12e79c0e3..b8dff2477 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,7 @@ shared pytest fixtures import functools import importlib +import logging import os import shutil import sys @@ -25,6 +26,8 @@ from tests.hf_offline_utils import ( hf_offline_context, ) +logging.getLogger("filelock").setLevel(logging.CRITICAL) + def retry_on_request_exceptions(max_retries=3, delay=1): # pylint: disable=duplicate-code @@ -420,6 +423,11 @@ def temp_dir() -> Generator[str, None, None]: shutil.rmtree(_temp_dir) +@pytest.fixture(scope="function", autouse=True) +def unique_triton_cache_dir(temp_dir): + os.environ["TRITON_CACHE_DIR"] = temp_dir + "/~.triton/cache" + + @pytest.fixture(scope="function", autouse=True) def cleanup_monkeypatches(): from transformers import Trainer