replace tensorboard checks with helper function (#2120) [skip ci]
* replace tensorboard checks with helper function * move helper function * use relative
This commit is contained in:
@@ -7,12 +7,11 @@ from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from accelerate.test_utils import execute_subprocess_async
|
||||
from tbparse import SummaryReader
|
||||
from transformers.testing_utils import get_torch_dist_unique_port
|
||||
|
||||
from axolotl.utils.dict import DictDefault
|
||||
|
||||
from ..utils import most_recent_subdir
|
||||
from ..utils import check_tensorboard
|
||||
|
||||
LOG = logging.getLogger("axolotl.tests.e2e.multigpu")
|
||||
os.environ["WANDB_DISABLED"] = "true"
|
||||
@@ -91,12 +90,8 @@ class TestMultiGPUEval:
|
||||
str(Path(temp_dir) / "config.yaml"),
|
||||
]
|
||||
)
|
||||
tb_log_path = most_recent_subdir(temp_dir + "/runs")
|
||||
event_file = os.path.join(tb_log_path, sorted(os.listdir(tb_log_path))[0])
|
||||
reader = SummaryReader(event_file)
|
||||
df = reader.scalars # pylint: disable=invalid-name
|
||||
df = df[(df.tag == "eval/loss")] # pylint: disable=invalid-name
|
||||
assert df.value.values[-1] < 2.5, "Loss is too high"
|
||||
|
||||
check_tensorboard(temp_dir + "/runs", "eval/loss", 2.5, "Eval Loss is too high")
|
||||
|
||||
def test_eval(self, temp_dir):
|
||||
# pylint: disable=duplicate-code
|
||||
@@ -164,9 +159,5 @@ class TestMultiGPUEval:
|
||||
str(Path(temp_dir) / "config.yaml"),
|
||||
]
|
||||
)
|
||||
tb_log_path = most_recent_subdir(temp_dir + "/runs")
|
||||
event_file = os.path.join(tb_log_path, sorted(os.listdir(tb_log_path))[0])
|
||||
reader = SummaryReader(event_file)
|
||||
df = reader.scalars # pylint: disable=invalid-name
|
||||
df = df[(df.tag == "eval/loss")] # pylint: disable=invalid-name
|
||||
assert df.value.values[-1] < 2.9, "Loss is too high"
|
||||
|
||||
check_tensorboard(temp_dir + "/runs", "eval/loss", 2.9, "Eval Loss is too high")
|
||||
|
||||
Reference in New Issue
Block a user