opt-in version of telemetry
This commit is contained in:
committed by
Dan Saunders
parent
b2f1fc109a
commit
1c74ab175f
@@ -1,6 +1,6 @@
|
||||
---
|
||||
title: Telemetry
|
||||
description: A description of the opt-out telemetry implementation in Axolotl.
|
||||
description: A description of the opt-in telemetry implementation in Axolotl.
|
||||
---
|
||||
|
||||
# Telemetry in Axolotl
|
||||
@@ -41,13 +41,13 @@ aware of data collection, unless telemetry is explicitly enabled or disabled.
|
||||
|
||||
## Opt-Out Mechanism
|
||||
|
||||
Telemetry is **enabled by default** on an opt-out basis. To disable it, set either:
|
||||
Telemetry is **disable by default** on an opt-in basis. To enable it, set: `AXOLOTL_DO_NOT_TRACK=0`.
|
||||
|
||||
- `AXOLOTL_DO_NOT_TRACK=1` (Axolotl-specific)
|
||||
- `DO_NOT_TRACK=1` (Global standard; see https://consoledonottrack.com/)
|
||||
To remove the warning message about telemetry that is displayed on train, etc. startup,
|
||||
explicitly set: `AXOLOTL_DO_NOT_TRACK=0` (enable telemetry) or `AXOLOTL_DO_NOT_TRACK=1`
|
||||
(explicitly disable telemetry).
|
||||
|
||||
To acknowledge and explicitly enable telemetry (and remove the warning message), set:
|
||||
`AXOLOTL_DO_NOT_TRACK=0`.
|
||||
**Note**: Telemetry will move to an opt-out model in a later release.
|
||||
|
||||
## Privacy
|
||||
|
||||
|
||||
@@ -20,21 +20,21 @@ LOG = logging.getLogger(__name__)
|
||||
POSTHOG_HOST = "https://app.posthog.com"
|
||||
POSTHOG_WRITE_KEY = "phc_1kUR0o04oJKKTTeSsIz2Mfm5mpiVsQEf2WOlzljMD7y"
|
||||
|
||||
ENABLED_WARNING_SLEEP_SECONDS = 15
|
||||
ENABLED_WARNING = (
|
||||
"\nTelemetry is enabled. This helps Axolotl's maintainers by providing insights into:\n"
|
||||
"- Which models and configurations are most commonly used\n"
|
||||
"- What hardware setups need to be supported\n"
|
||||
OPT_IN_WARNING_SLEEP_SECONDS = 15
|
||||
OPT_IN_INFO = (
|
||||
"\nTelemetry is currently disabled by default. If you'd like to help improve "
|
||||
"Axolotl, consider enabling it by setting:\n"
|
||||
"AXOLOTL_DO_NOT_TRACK=0\n\n"
|
||||
"Telemetry data helps us understand:\n"
|
||||
"- Which features are most used\n"
|
||||
"- What hardware configurations to prioritize\n"
|
||||
"- Where users encounter errors\n\n"
|
||||
"This data helps us prioritize features, optimize performance, and fix bugs.\n\n"
|
||||
"To disable telemetry, set either:\n"
|
||||
"- AXOLOTL_DO_NOT_TRACK=1 (Axolotl-specific)\n"
|
||||
"- DO_NOT_TRACK=1 (Global standard; see https://consoledonottrack.com/)\n\n"
|
||||
"To remove this warning and continue with telemetry enabled,"
|
||||
"explicitly set AXOLOTL_DO_NOT_TRACK=0 (and leave DO_NOT_TRACK unset / set to 0)\n\n"
|
||||
"No personally identifiable information is collected."
|
||||
"For details, see: https://axolotl-ai-cloud.github.io/axolotl/docs/telemetry.html\n\n"
|
||||
f"Sleeping for {ENABLED_WARNING_SLEEP_SECONDS}s..."
|
||||
"No personally identifiable information is collected.\n"
|
||||
"To remove this warning, explicitly set AXOLOTL_DO_NOT_TRACK=0 (enable telemetry) "
|
||||
"or AXOLOTL_DO_NOT_TRACK=1 (explicitly disable telemetry).\n\n"
|
||||
"NOTE: Telemetry will move to an opt-out in a later release.\n"
|
||||
"For details, see: https://axolotl-ai-cloud.github.io/axolotl/docs/telemetry.html\n"
|
||||
f"Sleeping for {OPT_IN_WARNING_SLEEP_SECONDS}s..."
|
||||
)
|
||||
|
||||
WHITELIST_PATH = str(Path(__file__).parent / "whitelist.yaml")
|
||||
@@ -134,7 +134,7 @@ class TelemetryManager:
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
self.enabled, self.explicit_enable = self._check_telemetry_enabled()
|
||||
self.enabled = self._check_telemetry_enabled()
|
||||
|
||||
if self.enabled:
|
||||
self.run_id = str(uuid.uuid4())
|
||||
@@ -160,30 +160,33 @@ class TelemetryManager:
|
||||
|
||||
return cls._instance
|
||||
|
||||
def _check_telemetry_enabled(self) -> tuple[bool, bool]:
|
||||
def _check_telemetry_enabled(self) -> bool:
|
||||
"""
|
||||
Check if telemetry is enabled based on environment variables. We also check
|
||||
whether this is the main process (for the distributed setting and to avoid
|
||||
sending duplicate PostHog events per GPU).
|
||||
|
||||
Note: This is enabled by default on an opt-out basis. Set either
|
||||
`AXOLOTL_DO_NOT_TRACK=1` or `DO_NOT_TRACK=1` to disable telemetry. For more
|
||||
details, see https://axolotl-ai-cloud.github.io/axolotl/docs/telemetry.html.
|
||||
Note: This is disabled by default on an opt-in basis. Set
|
||||
`AXOLOTL_DO_NOT_TRACK=0` to enable telemetry. We plan to move to an opt-out
|
||||
model in a later release. For more details, see
|
||||
https://axolotl-ai-cloud.github.io/axolotl/docs/telemetry.html.
|
||||
|
||||
Returns:
|
||||
Tuple containing:
|
||||
- Boolean denoting whether telemetry is enabled or disabled.
|
||||
- Boolean denoting whether telemetry is explicitly enabled or not.
|
||||
- Boolean denoting whether telemetry is enabled or not.
|
||||
"""
|
||||
# Parse relevant env vars and fill opt-out default values
|
||||
axolotl_do_not_track = os.getenv("AXOLOTL_DO_NOT_TRACK")
|
||||
do_not_track = os.getenv("DO_NOT_TRACK")
|
||||
|
||||
# If explicitly enabled, we'll disable the telemetry warning message
|
||||
explicit_enabled = axolotl_do_not_track in ["0", "false"]
|
||||
|
||||
# Default to disabled (opt-in model for initial release)
|
||||
if axolotl_do_not_track is None:
|
||||
axolotl_do_not_track = "0"
|
||||
# Print opt-in info message for main process only
|
||||
if is_main_process():
|
||||
LOG.info(OPT_IN_INFO)
|
||||
time.sleep(OPT_IN_WARNING_SLEEP_SECONDS)
|
||||
|
||||
return False
|
||||
|
||||
if do_not_track is None:
|
||||
do_not_track = "0"
|
||||
@@ -194,17 +197,11 @@ class TelemetryManager:
|
||||
"true",
|
||||
) and do_not_track.lower() not in ("1", "true")
|
||||
|
||||
# Show warning (and sleep on all ranks) unless explicitly enabled
|
||||
if enabled and not explicit_enabled:
|
||||
if is_main_process():
|
||||
LOG.warning(ENABLED_WARNING)
|
||||
time.sleep(ENABLED_WARNING_SLEEP_SECONDS)
|
||||
|
||||
# Only rank 0 will send telemetry
|
||||
if not is_main_process():
|
||||
return False, False
|
||||
return False
|
||||
|
||||
return enabled, explicit_enabled
|
||||
return enabled
|
||||
|
||||
def _load_whitelist(self) -> dict:
|
||||
"""Load HuggingFace Hub organization whitelist"""
|
||||
|
||||
@@ -58,68 +58,72 @@ def test_singleton_instance(telemetry_manager_class):
|
||||
assert telemetry_manager_class.get_instance() is first
|
||||
|
||||
|
||||
def test_telemetry_disabled_by_default(telemetry_manager_class):
|
||||
"""Test that telemetry is disabled by default (opt-in)"""
|
||||
with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch("time.sleep"), patch(
|
||||
"logging.Logger.info"
|
||||
):
|
||||
manager = telemetry_manager_class()
|
||||
assert not manager.enabled
|
||||
|
||||
|
||||
def test_telemetry_enabled_with_explicit_opt_in(telemetry_manager_class):
|
||||
"""Test that telemetry is enabled when AXOLOTL_DO_NOT_TRACK=0"""
|
||||
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "0"}), patch(
|
||||
"time.sleep"
|
||||
):
|
||||
manager = telemetry_manager_class()
|
||||
assert manager.enabled
|
||||
|
||||
|
||||
def test_telemetry_disabled_with_axolotl_do_not_track(telemetry_manager_class):
|
||||
"""Test that telemetry is disabled when AXOLOTL_DO_NOT_TRACK=1"""
|
||||
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "1", "RANK": "0"}):
|
||||
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "1", "RANK": "0"}), patch(
|
||||
"time.sleep"
|
||||
):
|
||||
manager = telemetry_manager_class()
|
||||
assert not manager.enabled
|
||||
|
||||
|
||||
def test_telemetry_disabled_with_do_not_track(telemetry_manager_class):
|
||||
"""Test that telemetry is disabled when DO_NOT_TRACK=1"""
|
||||
with patch.dict(os.environ, {"DO_NOT_TRACK": "1", "RANK": "0"}):
|
||||
with patch.dict(
|
||||
os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "DO_NOT_TRACK": "1", "RANK": "0"}
|
||||
), patch("time.sleep"):
|
||||
manager = telemetry_manager_class()
|
||||
assert not manager.enabled
|
||||
|
||||
|
||||
def test_telemetry_disabled_for_non_main_process(telemetry_manager_class):
|
||||
"""Test that telemetry is disabled for non-main processes"""
|
||||
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "1"}):
|
||||
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "1"}), patch(
|
||||
"time.sleep"
|
||||
):
|
||||
manager = telemetry_manager_class()
|
||||
assert not manager.enabled
|
||||
|
||||
|
||||
def test_telemetry_enabled_by_default(telemetry_manager_class):
|
||||
"""Test that telemetry is enabled by default"""
|
||||
with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch("time.sleep"), patch(
|
||||
"logging.Logger.warning"
|
||||
def test_opt_in_info_displayed(telemetry_manager_class):
|
||||
"""Test that opt-in info is displayed when telemetry is not configured"""
|
||||
with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch(
|
||||
"logging.Logger.info"
|
||||
) as mock_info, patch("time.sleep"):
|
||||
telemetry_manager_class()
|
||||
info_displayed = False
|
||||
for call in mock_info.call_args_list:
|
||||
if "Telemetry is currently disabled by default" in str(call):
|
||||
info_displayed = True
|
||||
break
|
||||
assert info_displayed
|
||||
|
||||
|
||||
def test_is_whitelisted(telemetry_manager_class, mock_whitelist):
|
||||
"""Test org whitelist functionality"""
|
||||
with patch("axolotl.telemetry.manager.WHITELIST_PATH", mock_whitelist), patch.dict(
|
||||
os.environ, {"AXOLOTL_DO_NOT_TRACK": "0"}
|
||||
):
|
||||
manager = telemetry_manager_class()
|
||||
assert manager.enabled
|
||||
assert not manager.explicit_enable
|
||||
|
||||
|
||||
def test_explicit_enable_disables_warning(telemetry_manager_class):
|
||||
"""Test that explicit enabling prevents warning"""
|
||||
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "0"}), patch(
|
||||
"logging.Logger.warning"
|
||||
) as mock_warning, patch("time.sleep"):
|
||||
manager = telemetry_manager_class()
|
||||
assert manager.enabled
|
||||
assert manager.explicit_enable
|
||||
for call in mock_warning.call_args_list:
|
||||
assert "Telemetry is enabled" not in str(call)
|
||||
|
||||
|
||||
def test_warning_displayed_for_implicit_enable(telemetry_manager_class):
|
||||
"""Test that warning is displayed when telemetry is implicitly enabled"""
|
||||
with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch(
|
||||
"logging.Logger.warning"
|
||||
) as mock_warning, patch("time.sleep"):
|
||||
manager = telemetry_manager_class()
|
||||
assert manager.enabled
|
||||
assert not manager.explicit_enable
|
||||
warning_displayed = False
|
||||
for call in mock_warning.call_args_list:
|
||||
if "Telemetry is enabled" in str(call):
|
||||
warning_displayed = True
|
||||
break
|
||||
assert warning_displayed
|
||||
|
||||
|
||||
def test_is_whitelisted(manager, mock_whitelist):
|
||||
"""Test org whitelist functionality"""
|
||||
with patch("axolotl.telemetry.manager.WHITELIST_PATH", mock_whitelist):
|
||||
# Should match organizations from the mock whitelist
|
||||
assert manager._is_whitelisted("meta-llama/llama-7b")
|
||||
assert manager._is_whitelisted("mistralai/mistral-7b-instruct")
|
||||
@@ -139,17 +143,18 @@ def test_system_info_collection(manager):
|
||||
# Check essential keys
|
||||
assert "os" in system_info
|
||||
assert "python_version" in system_info
|
||||
assert "torch_version" in system_info
|
||||
assert "transformers_version" in system_info
|
||||
assert "axolotl_version" in system_info
|
||||
assert "cpu_count" in system_info
|
||||
assert "memory_total" in system_info
|
||||
assert "accelerator_count" in system_info
|
||||
|
||||
|
||||
def test_send_event(manager):
|
||||
def test_send_event(telemetry_manager_class):
|
||||
"""Test basic event sending"""
|
||||
with patch("posthog.capture") as mock_capture:
|
||||
with patch("posthog.capture") as mock_capture, patch.dict(
|
||||
os.environ, {"AXOLOTL_DO_NOT_TRACK": "0"}
|
||||
):
|
||||
manager = telemetry_manager_class()
|
||||
|
||||
# Test with clean properties (no PII)
|
||||
manager.send_event("test_event", {"key": "value"})
|
||||
assert mock_capture.called
|
||||
@@ -164,18 +169,24 @@ def test_send_event(manager):
|
||||
assert mock_capture.call_args[1]["properties"] == {}
|
||||
|
||||
|
||||
def test_send_system_info(manager):
|
||||
def test_send_system_info(telemetry_manager_class):
|
||||
"""Test sending system info"""
|
||||
with patch("posthog.capture") as mock_capture:
|
||||
with patch("posthog.capture") as mock_capture, patch.dict(
|
||||
os.environ, {"AXOLOTL_DO_NOT_TRACK": "0"}
|
||||
):
|
||||
manager = telemetry_manager_class()
|
||||
manager.send_system_info()
|
||||
assert mock_capture.called
|
||||
assert mock_capture.call_args[1]["event"] == "system-info"
|
||||
assert mock_capture.call_args[1]["properties"] == manager.system_info
|
||||
|
||||
|
||||
def test_redacted_properties(manager):
|
||||
def test_redacted_properties(telemetry_manager_class):
|
||||
"""Test path redaction in send_event method"""
|
||||
with patch("posthog.capture") as mock_capture:
|
||||
with patch("posthog.capture") as mock_capture, patch.dict(
|
||||
os.environ, {"AXOLOTL_DO_NOT_TRACK": "0"}
|
||||
):
|
||||
manager = telemetry_manager_class()
|
||||
# Test with properties containing various paths and non-paths
|
||||
test_properties = {
|
||||
"filepath": "/home/user/sensitive/data.txt",
|
||||
|
||||
Reference in New Issue
Block a user