fix cfg merge
This commit is contained in:
@@ -10,7 +10,7 @@ import shutil
|
|||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
from pathlib import Path, PosixPath
|
from pathlib import Path
|
||||||
from typing import Generator
|
from typing import Generator
|
||||||
|
|
||||||
import datasets
|
import datasets
|
||||||
@@ -432,11 +432,6 @@ def module_temp_dir() -> Generator[str, None, None]:
|
|||||||
shutil.rmtree(_temp_dir)
|
shutil.rmtree(_temp_dir)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function", autouse=True)
|
|
||||||
def unique_triton_cache_dir(temp_dir: str | PosixPath) -> None:
|
|
||||||
os.environ["TRITON_CACHE_DIR"] = str(temp_dir) + "/.triton/cache"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function", autouse=True)
|
@pytest.fixture(scope="function", autouse=True)
|
||||||
def cleanup_monkeypatches():
|
def cleanup_monkeypatches():
|
||||||
from transformers import Trainer
|
from transformers import Trainer
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ def sft_base_cfg():
|
|||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
base_model="HuggingFaceTB/SmolLM2-135M",
|
base_model="HuggingFaceTB/SmolLM2-135M",
|
||||||
tokenizer_config="HuggingFaceTB/SmolLM2-135M", # this has to be manually set since we haven't done validation
|
tokenizer_config="HuggingFaceTB/SmolLM2-135M", # this has to be manually set since we haven't done validation
|
||||||
sequence_len=2048,
|
sequence_len=1024,
|
||||||
special_tokens={
|
special_tokens={
|
||||||
"pad_token": "<|endoftext|>",
|
"pad_token": "<|endoftext|>",
|
||||||
},
|
},
|
||||||
@@ -442,7 +442,8 @@ class TestMultiGPULlama:
|
|||||||
self, temp_dir, sft_prepared_dataset_alpaca_cfg, fsdp_state_dict_type
|
self, temp_dir, sft_prepared_dataset_alpaca_cfg, fsdp_state_dict_type
|
||||||
):
|
):
|
||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = (
|
||||||
|
DictDefault(
|
||||||
{
|
{
|
||||||
"pad_to_sequence_len": True,
|
"pad_to_sequence_len": True,
|
||||||
"num_epochs": 1,
|
"num_epochs": 1,
|
||||||
@@ -472,6 +473,7 @@ class TestMultiGPULlama:
|
|||||||
},
|
},
|
||||||
"use_tensorboard": True,
|
"use_tensorboard": True,
|
||||||
}
|
}
|
||||||
|
)
|
||||||
| sft_prepared_dataset_alpaca_cfg
|
| sft_prepared_dataset_alpaca_cfg
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user