adding runtime metrics / system info additional accelerator support, etc.

This commit is contained in:
Dan Saunders
2025-02-24 19:36:31 +00:00
parent 71ae6f9f87
commit 17310f9acc
8 changed files with 206 additions and 60 deletions

View File

@@ -151,12 +151,12 @@ def test_system_info_collection(manager):
# Check essential keys
assert "os" in system_info
assert "python_version" in system_info
assert "pytorch_version" in system_info
assert "torch_version" in system_info
assert "transformers_version" in system_info
assert "axolotl_version" in system_info
assert "cpu_count" in system_info
assert "memory_total" in system_info
assert "gpu_count" in system_info
assert "accelerator_count" in system_info
def test_send_event(manager):

View File

@@ -331,30 +331,26 @@ class TestRuntimeMetricsTracker:
}
# Get memory metrics
result = tracker.get_memory_metrics()
# Verify structure
assert "memory" in result
memory = result["memory"]
memory_metrics = tracker.get_memory_metrics()
# Verify CPU memory
assert (
memory["cpu_memory_bytes"] == 1 * 1024 * 1024 * 1024
memory_metrics["cpu_memory_bytes"] == 1 * 1024 * 1024 * 1024
) # Current value from mock
assert (
memory["peak_cpu_memory_bytes"] == 2 * 1024 * 1024 * 1024
memory_metrics["peak_cpu_memory_bytes"] == 2 * 1024 * 1024 * 1024
) # Peak value we set
# Verify GPU memory
assert (
memory["gpu_0_memory_bytes"] == 1 * 1024 * 1024 * 1024
memory_metrics["gpu_0_memory_bytes"] == 1 * 1024 * 1024 * 1024
) # Current value from mock
assert (
memory["gpu_0_peak_memory_bytes"] == 3 * 1024 * 1024 * 1024
memory_metrics["gpu_0_peak_memory_bytes"] == 3 * 1024 * 1024 * 1024
) # Peak value we set
assert (
memory["gpu_1_memory_bytes"] == 2 * 1024 * 1024 * 1024
memory_metrics["gpu_1_memory_bytes"] == 2 * 1024 * 1024 * 1024
) # Current value from mock
assert (
memory["gpu_1_peak_memory_bytes"] == 4 * 1024 * 1024 * 1024
memory_metrics["gpu_1_peak_memory_bytes"] == 4 * 1024 * 1024 * 1024
) # Peak value we set