diff --git a/src/axolotl/cli/config.py b/src/axolotl/cli/config.py index c88cf44b5..33594e3e5 100644 --- a/src/axolotl/cli/config.py +++ b/src/axolotl/cli/config.py @@ -176,7 +176,6 @@ def load_cfg(config: Union[str, Path] = Path("examples/"), **kwargs) -> DictDefa # Load the config from the yaml file with open(config, encoding="utf-8") as file: cfg: DictDefault = DictDefault(yaml.safe_load(file)) - TELEMETRY_MANAGER.send_event(event_type="config-loaded", properties=cfg) # If there are any options passed in the cli, if it is something that seems valid diff --git a/src/axolotl/telemetry/manager.py b/src/axolotl/telemetry/manager.py index 06699ebff..7f2d6da44 100644 --- a/src/axolotl/telemetry/manager.py +++ b/src/axolotl/telemetry/manager.py @@ -42,6 +42,14 @@ ENABLED_WARNING = ( WHITELIST_PATH = str(Path(__file__).parent / "whitelist.yaml") +FIELDS_WITH_ORGS = [ + "base_model", + "tokenizer_config", + "base_model_config", +] +FIELDS_TO_REDACT = ["resume_from_checkpoint", "hub_model_id"] +PREFIXES_TO_REDACT = ["wandb_", "comet_", "mlflow_", "gradio_"] + class TelemetryManager: """Manages telemetry collection and transmission""" @@ -154,13 +162,14 @@ class TelemetryManager: def _redact_paths(self, properties: dict[str, Any]) -> dict[str, Any]: """ Redact properties to remove any paths, so as to avoid inadvertently collecting - private or personally identifiable information (PII). + private or personally identifiable information (PII). We also remove + information related to Wandb, MLflow, etc. configuration. Args: properties: Dictionary of properties to redact. Returns: - Properties dictionary with paths redacted. + Properties dictionary with redaction applied. """ if not properties: return {} @@ -170,15 +179,19 @@ class TelemetryManager: def redact_value(value: Any, key: str = "") -> Any: """Recursively sanitize values, redacting those with path-like keys""" - # Special case: base_model should be redacted if org is not whitelisted - if key == "base_model": - org = value.split("/")[0] - if org not in self.whitelist["organizations"]: - return "[REDACTED]" + if isinstance(key, str) and isinstance(value, str): + # Fields that should be redacted if org is not whitelisted + if key in FIELDS_WITH_ORGS: + org = value.split("/")[0] + if org not in self.whitelist["organizations"]: + return "[REDACTED]" - if isinstance(value, str): - # If the key suggests this is a path, redact it - if any(indicator in key.lower() for indicator in path_indicators): + # Other redaction special cases + if ( + key in FIELDS_TO_REDACT + or any(prefix in key for prefix in PREFIXES_TO_REDACT) + or any(indicator in key.lower() for indicator in path_indicators) + ): return "[REDACTED]" # Handle nested structures @@ -231,17 +244,21 @@ class TelemetryManager: # Wrap PostHog errors in try / except to not raise errors during Axolotl usage try: - LOG.warning(f"*** Sending telemetry for {event_type} ***") - # Send event via PostHog posthog.capture( distinct_id=self.run_id, event=event_type, properties=properties, + disable_geoip=True, ) except Exception as e: # pylint: disable=broad-exception-caught LOG.warning(f"Failed to send telemetry event: {e}") + # Additionally, send system info telemetry when loading config. + # NOTE: Is this the best place for this? + if event_type == "config-loaded": + self.send_system_info() + def send_system_info(self): """Helper method for sending system info""" self.send_event(event_type="system-info", properties=self.system_info) diff --git a/tests/conftest.py b/tests/conftest.py index 85e276722..1d03973d3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,5 @@ -""" -shared pytest fixtures -""" +"""Shared pytest fixtures""" + import functools import importlib import shutil @@ -171,3 +170,9 @@ def cleanup_monkeypatches(): module_globals = module_name_tuple[1] for module_global in module_globals: globals().pop(module_global, None) + + +@pytest.fixture(autouse=True) +def disable_telemetry(monkeypatch): + monkeypatch.setenv("AXOLOTL_DO_NOT_TRACK", "1") + yield diff --git a/tests/telemetry/conftest.py b/tests/telemetry/conftest.py new file mode 100644 index 000000000..0055eeb52 --- /dev/null +++ b/tests/telemetry/conftest.py @@ -0,0 +1,9 @@ +"""Shared pytest fixtures for telemetry tests.""" + +import pytest + + +@pytest.fixture(autouse=True) +def disable_telemetry(monkeypatch): + monkeypatch.delenv("AXOLOTL_DO_NOT_TRACK") + yield diff --git a/tests/telemetry/test_manager.py b/tests/telemetry/test_manager.py index 4608b337c..b813b8892 100644 --- a/tests/telemetry/test_manager.py +++ b/tests/telemetry/test_manager.py @@ -146,7 +146,7 @@ def test_is_whitelisted(manager, mock_whitelist): def test_system_info_collection(manager): """Test system information collection""" - system_info = manager.system_info + system_info = manager._get_system_info() # Check essential keys assert "os" in system_info