* add more test cases for gradient accumulation and fix zero3 * swap out for smaller model * fix missing return * fix missing pad_token in config * support concurrency for multigpu testing * cast empty deepspeed to empty string for zero3 check * fix temp_dir as fixture so parametrize works properly * fix test file for multigpu evals * don't use default * don't use default for fsdp_state_dict_type * don't use llama tokenizer w smollm * also automatically cancel multigpu for concurrency
17 lines
273 B
Python
17 lines
273 B
Python
"""
|
|
shared pytest fixtures
|
|
"""
|
|
import shutil
|
|
import tempfile
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture
|
|
def temp_dir():
|
|
# Create a temporary directory
|
|
_temp_dir = tempfile.mkdtemp()
|
|
yield _temp_dir
|
|
# Clean up the directory after the test
|
|
shutil.rmtree(_temp_dir)
|