opt-in version of telemetry

This commit is contained in:
Dan Saunders
2025-02-26 11:13:38 -05:00
committed by Dan Saunders
parent b2f1fc109a
commit 1c74ab175f
3 changed files with 97 additions and 89 deletions

View File

@@ -1,6 +1,6 @@
---
title: Telemetry
description: A description of the opt-out telemetry implementation in Axolotl.
description: A description of the opt-in telemetry implementation in Axolotl.
---
# Telemetry in Axolotl
@@ -41,13 +41,13 @@ aware of data collection, unless telemetry is explicitly enabled or disabled.
## Opt-Out Mechanism
Telemetry is **enabled by default** on an opt-out basis. To disable it, set either:
Telemetry is **disable by default** on an opt-in basis. To enable it, set: `AXOLOTL_DO_NOT_TRACK=0`.
- `AXOLOTL_DO_NOT_TRACK=1` (Axolotl-specific)
- `DO_NOT_TRACK=1` (Global standard; see https://consoledonottrack.com/)
To remove the warning message about telemetry that is displayed on train, etc. startup,
explicitly set: `AXOLOTL_DO_NOT_TRACK=0` (enable telemetry) or `AXOLOTL_DO_NOT_TRACK=1`
(explicitly disable telemetry).
To acknowledge and explicitly enable telemetry (and remove the warning message), set:
`AXOLOTL_DO_NOT_TRACK=0`.
**Note**: Telemetry will move to an opt-out model in a later release.
## Privacy

View File

@@ -20,21 +20,21 @@ LOG = logging.getLogger(__name__)
POSTHOG_HOST = "https://app.posthog.com"
POSTHOG_WRITE_KEY = "phc_1kUR0o04oJKKTTeSsIz2Mfm5mpiVsQEf2WOlzljMD7y"
ENABLED_WARNING_SLEEP_SECONDS = 15
ENABLED_WARNING = (
"\nTelemetry is enabled. This helps Axolotl's maintainers by providing insights into:\n"
"- Which models and configurations are most commonly used\n"
"- What hardware setups need to be supported\n"
OPT_IN_WARNING_SLEEP_SECONDS = 15
OPT_IN_INFO = (
"\nTelemetry is currently disabled by default. If you'd like to help improve "
"Axolotl, consider enabling it by setting:\n"
"AXOLOTL_DO_NOT_TRACK=0\n\n"
"Telemetry data helps us understand:\n"
"- Which features are most used\n"
"- What hardware configurations to prioritize\n"
"- Where users encounter errors\n\n"
"This data helps us prioritize features, optimize performance, and fix bugs.\n\n"
"To disable telemetry, set either:\n"
"- AXOLOTL_DO_NOT_TRACK=1 (Axolotl-specific)\n"
"- DO_NOT_TRACK=1 (Global standard; see https://consoledonottrack.com/)\n\n"
"To remove this warning and continue with telemetry enabled,"
"explicitly set AXOLOTL_DO_NOT_TRACK=0 (and leave DO_NOT_TRACK unset / set to 0)\n\n"
"No personally identifiable information is collected."
"For details, see: https://axolotl-ai-cloud.github.io/axolotl/docs/telemetry.html\n\n"
f"Sleeping for {ENABLED_WARNING_SLEEP_SECONDS}s..."
"No personally identifiable information is collected.\n"
"To remove this warning, explicitly set AXOLOTL_DO_NOT_TRACK=0 (enable telemetry) "
"or AXOLOTL_DO_NOT_TRACK=1 (explicitly disable telemetry).\n\n"
"NOTE: Telemetry will move to an opt-out in a later release.\n"
"For details, see: https://axolotl-ai-cloud.github.io/axolotl/docs/telemetry.html\n"
f"Sleeping for {OPT_IN_WARNING_SLEEP_SECONDS}s..."
)
WHITELIST_PATH = str(Path(__file__).parent / "whitelist.yaml")
@@ -134,7 +134,7 @@ class TelemetryManager:
if self._initialized:
return
self.enabled, self.explicit_enable = self._check_telemetry_enabled()
self.enabled = self._check_telemetry_enabled()
if self.enabled:
self.run_id = str(uuid.uuid4())
@@ -160,30 +160,33 @@ class TelemetryManager:
return cls._instance
def _check_telemetry_enabled(self) -> tuple[bool, bool]:
def _check_telemetry_enabled(self) -> bool:
"""
Check if telemetry is enabled based on environment variables. We also check
whether this is the main process (for the distributed setting and to avoid
sending duplicate PostHog events per GPU).
Note: This is enabled by default on an opt-out basis. Set either
`AXOLOTL_DO_NOT_TRACK=1` or `DO_NOT_TRACK=1` to disable telemetry. For more
details, see https://axolotl-ai-cloud.github.io/axolotl/docs/telemetry.html.
Note: This is disabled by default on an opt-in basis. Set
`AXOLOTL_DO_NOT_TRACK=0` to enable telemetry. We plan to move to an opt-out
model in a later release. For more details, see
https://axolotl-ai-cloud.github.io/axolotl/docs/telemetry.html.
Returns:
Tuple containing:
- Boolean denoting whether telemetry is enabled or disabled.
- Boolean denoting whether telemetry is explicitly enabled or not.
- Boolean denoting whether telemetry is enabled or not.
"""
# Parse relevant env vars and fill opt-out default values
axolotl_do_not_track = os.getenv("AXOLOTL_DO_NOT_TRACK")
do_not_track = os.getenv("DO_NOT_TRACK")
# If explicitly enabled, we'll disable the telemetry warning message
explicit_enabled = axolotl_do_not_track in ["0", "false"]
# Default to disabled (opt-in model for initial release)
if axolotl_do_not_track is None:
axolotl_do_not_track = "0"
# Print opt-in info message for main process only
if is_main_process():
LOG.info(OPT_IN_INFO)
time.sleep(OPT_IN_WARNING_SLEEP_SECONDS)
return False
if do_not_track is None:
do_not_track = "0"
@@ -194,17 +197,11 @@ class TelemetryManager:
"true",
) and do_not_track.lower() not in ("1", "true")
# Show warning (and sleep on all ranks) unless explicitly enabled
if enabled and not explicit_enabled:
if is_main_process():
LOG.warning(ENABLED_WARNING)
time.sleep(ENABLED_WARNING_SLEEP_SECONDS)
# Only rank 0 will send telemetry
if not is_main_process():
return False, False
return False
return enabled, explicit_enabled
return enabled
def _load_whitelist(self) -> dict:
"""Load HuggingFace Hub organization whitelist"""

View File

@@ -58,68 +58,72 @@ def test_singleton_instance(telemetry_manager_class):
assert telemetry_manager_class.get_instance() is first
def test_telemetry_disabled_by_default(telemetry_manager_class):
"""Test that telemetry is disabled by default (opt-in)"""
with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch("time.sleep"), patch(
"logging.Logger.info"
):
manager = telemetry_manager_class()
assert not manager.enabled
def test_telemetry_enabled_with_explicit_opt_in(telemetry_manager_class):
"""Test that telemetry is enabled when AXOLOTL_DO_NOT_TRACK=0"""
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "0"}), patch(
"time.sleep"
):
manager = telemetry_manager_class()
assert manager.enabled
def test_telemetry_disabled_with_axolotl_do_not_track(telemetry_manager_class):
"""Test that telemetry is disabled when AXOLOTL_DO_NOT_TRACK=1"""
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "1", "RANK": "0"}):
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "1", "RANK": "0"}), patch(
"time.sleep"
):
manager = telemetry_manager_class()
assert not manager.enabled
def test_telemetry_disabled_with_do_not_track(telemetry_manager_class):
"""Test that telemetry is disabled when DO_NOT_TRACK=1"""
with patch.dict(os.environ, {"DO_NOT_TRACK": "1", "RANK": "0"}):
with patch.dict(
os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "DO_NOT_TRACK": "1", "RANK": "0"}
), patch("time.sleep"):
manager = telemetry_manager_class()
assert not manager.enabled
def test_telemetry_disabled_for_non_main_process(telemetry_manager_class):
"""Test that telemetry is disabled for non-main processes"""
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "1"}):
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "1"}), patch(
"time.sleep"
):
manager = telemetry_manager_class()
assert not manager.enabled
def test_telemetry_enabled_by_default(telemetry_manager_class):
"""Test that telemetry is enabled by default"""
with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch("time.sleep"), patch(
"logging.Logger.warning"
def test_opt_in_info_displayed(telemetry_manager_class):
"""Test that opt-in info is displayed when telemetry is not configured"""
with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch(
"logging.Logger.info"
) as mock_info, patch("time.sleep"):
telemetry_manager_class()
info_displayed = False
for call in mock_info.call_args_list:
if "Telemetry is currently disabled by default" in str(call):
info_displayed = True
break
assert info_displayed
def test_is_whitelisted(telemetry_manager_class, mock_whitelist):
"""Test org whitelist functionality"""
with patch("axolotl.telemetry.manager.WHITELIST_PATH", mock_whitelist), patch.dict(
os.environ, {"AXOLOTL_DO_NOT_TRACK": "0"}
):
manager = telemetry_manager_class()
assert manager.enabled
assert not manager.explicit_enable
def test_explicit_enable_disables_warning(telemetry_manager_class):
"""Test that explicit enabling prevents warning"""
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "0"}), patch(
"logging.Logger.warning"
) as mock_warning, patch("time.sleep"):
manager = telemetry_manager_class()
assert manager.enabled
assert manager.explicit_enable
for call in mock_warning.call_args_list:
assert "Telemetry is enabled" not in str(call)
def test_warning_displayed_for_implicit_enable(telemetry_manager_class):
"""Test that warning is displayed when telemetry is implicitly enabled"""
with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch(
"logging.Logger.warning"
) as mock_warning, patch("time.sleep"):
manager = telemetry_manager_class()
assert manager.enabled
assert not manager.explicit_enable
warning_displayed = False
for call in mock_warning.call_args_list:
if "Telemetry is enabled" in str(call):
warning_displayed = True
break
assert warning_displayed
def test_is_whitelisted(manager, mock_whitelist):
"""Test org whitelist functionality"""
with patch("axolotl.telemetry.manager.WHITELIST_PATH", mock_whitelist):
# Should match organizations from the mock whitelist
assert manager._is_whitelisted("meta-llama/llama-7b")
assert manager._is_whitelisted("mistralai/mistral-7b-instruct")
@@ -139,17 +143,18 @@ def test_system_info_collection(manager):
# Check essential keys
assert "os" in system_info
assert "python_version" in system_info
assert "torch_version" in system_info
assert "transformers_version" in system_info
assert "axolotl_version" in system_info
assert "cpu_count" in system_info
assert "memory_total" in system_info
assert "accelerator_count" in system_info
def test_send_event(manager):
def test_send_event(telemetry_manager_class):
"""Test basic event sending"""
with patch("posthog.capture") as mock_capture:
with patch("posthog.capture") as mock_capture, patch.dict(
os.environ, {"AXOLOTL_DO_NOT_TRACK": "0"}
):
manager = telemetry_manager_class()
# Test with clean properties (no PII)
manager.send_event("test_event", {"key": "value"})
assert mock_capture.called
@@ -164,18 +169,24 @@ def test_send_event(manager):
assert mock_capture.call_args[1]["properties"] == {}
def test_send_system_info(manager):
def test_send_system_info(telemetry_manager_class):
"""Test sending system info"""
with patch("posthog.capture") as mock_capture:
with patch("posthog.capture") as mock_capture, patch.dict(
os.environ, {"AXOLOTL_DO_NOT_TRACK": "0"}
):
manager = telemetry_manager_class()
manager.send_system_info()
assert mock_capture.called
assert mock_capture.call_args[1]["event"] == "system-info"
assert mock_capture.call_args[1]["properties"] == manager.system_info
def test_redacted_properties(manager):
def test_redacted_properties(telemetry_manager_class):
"""Test path redaction in send_event method"""
with patch("posthog.capture") as mock_capture:
with patch("posthog.capture") as mock_capture, patch.dict(
os.environ, {"AXOLOTL_DO_NOT_TRACK": "0"}
):
manager = telemetry_manager_class()
# Test with properties containing various paths and non-paths
test_properties = {
"filepath": "/home/user/sensitive/data.txt",