This commit is contained in:
Dan Saunders
2025-02-24 01:31:35 +00:00
parent d3d63c1432
commit c9af72cd7a
2 changed files with 11 additions and 9 deletions

View File

@@ -165,10 +165,10 @@ class TelemetryManager:
if not properties: if not properties:
return {} return {}
# TODO: Keep this up to date with any config schema changes # NOTE: Keep this up to date with any config schema changes
path_indicators = {"path", "dir"} path_indicators = {"path", "dir"}
def redact_value(value: str, key: Any = None) -> Any: def redact_value(value: Any, key: str = "") -> Any:
"""Recursively sanitize values, redacting those with path-like keys""" """Recursively sanitize values, redacting those with path-like keys"""
# Special case: base_model should be redacted if org is not whitelisted # Special case: base_model should be redacted if org is not whitelisted
if key == "base_model": if key == "base_model":
@@ -181,11 +181,11 @@ class TelemetryManager:
if any(indicator in key.lower() for indicator in path_indicators): if any(indicator in key.lower() for indicator in path_indicators):
return "[REDACTED]" return "[REDACTED]"
# Handle nested structures # Handle nested structures
if isinstance(value, dict): if isinstance(value, dict):
return {k: redact_value(v, k) for k, v in value.items()} return {k: redact_value(v, k) for k, v in value.items()}
if isinstance(value, list): if isinstance(value, list):
return [redact_value(item) for item in value] return [redact_value(item) for item in value]
return value return value

View File

@@ -196,8 +196,9 @@ def test_redacted_properties(manager):
"path_to_model": "models/llama/7b", "path_to_model": "models/llama/7b",
"message": "Training started", # Should not be redacted "message": "Training started", # Should not be redacted
"metrics": {"loss": 0.5, "accuracy": 0.95}, # Should not be redacted "metrics": {"loss": 0.5, "accuracy": 0.95}, # Should not be redacted
"base_model": "models/local_model",
"nested": { "nested": {
"model_path": "/models/local/weights.pt", "model_path": "/models/my_model",
"root_dir": "/home/user/projects", "root_dir": "/home/user/projects",
"stats": {"steps": 1000, "epochs": 3}, # Should not be redacted "stats": {"steps": 1000, "epochs": 3}, # Should not be redacted
}, },
@@ -211,10 +212,11 @@ def test_redacted_properties(manager):
# Get the sanitized properties that were sent # Get the sanitized properties that were sent
sanitized = mock_capture.call_args[1]["properties"] sanitized = mock_capture.call_args[1]["properties"]
# Check that path-like keys were redacted # Check that path-like and base_model keys were redacted
assert sanitized["filepath"] == "[REDACTED]" assert sanitized["filepath"] == "[REDACTED]"
assert sanitized["windows_path"] == "[REDACTED]" assert sanitized["windows_path"] == "[REDACTED]"
assert sanitized["path_to_model"] == "[REDACTED]" assert sanitized["path_to_model"] == "[REDACTED]"
assert sanitized["base_model"] == "[REDACTED]"
# Check that non-path values were preserved # Check that non-path values were preserved
assert sanitized["message"] == "Training started" assert sanitized["message"] == "Training started"