opt-in version of telemetry
This commit is contained in:
committed by
Dan Saunders
parent
b2f1fc109a
commit
1c74ab175f
@@ -1,6 +1,6 @@
|
|||||||
---
|
---
|
||||||
title: Telemetry
|
title: Telemetry
|
||||||
description: A description of the opt-out telemetry implementation in Axolotl.
|
description: A description of the opt-in telemetry implementation in Axolotl.
|
||||||
---
|
---
|
||||||
|
|
||||||
# Telemetry in Axolotl
|
# Telemetry in Axolotl
|
||||||
@@ -41,13 +41,13 @@ aware of data collection, unless telemetry is explicitly enabled or disabled.
|
|||||||
|
|
||||||
## Opt-Out Mechanism
|
## Opt-Out Mechanism
|
||||||
|
|
||||||
Telemetry is **enabled by default** on an opt-out basis. To disable it, set either:
|
Telemetry is **disable by default** on an opt-in basis. To enable it, set: `AXOLOTL_DO_NOT_TRACK=0`.
|
||||||
|
|
||||||
- `AXOLOTL_DO_NOT_TRACK=1` (Axolotl-specific)
|
To remove the warning message about telemetry that is displayed on train, etc. startup,
|
||||||
- `DO_NOT_TRACK=1` (Global standard; see https://consoledonottrack.com/)
|
explicitly set: `AXOLOTL_DO_NOT_TRACK=0` (enable telemetry) or `AXOLOTL_DO_NOT_TRACK=1`
|
||||||
|
(explicitly disable telemetry).
|
||||||
|
|
||||||
To acknowledge and explicitly enable telemetry (and remove the warning message), set:
|
**Note**: Telemetry will move to an opt-out model in a later release.
|
||||||
`AXOLOTL_DO_NOT_TRACK=0`.
|
|
||||||
|
|
||||||
## Privacy
|
## Privacy
|
||||||
|
|
||||||
|
|||||||
@@ -20,21 +20,21 @@ LOG = logging.getLogger(__name__)
|
|||||||
POSTHOG_HOST = "https://app.posthog.com"
|
POSTHOG_HOST = "https://app.posthog.com"
|
||||||
POSTHOG_WRITE_KEY = "phc_1kUR0o04oJKKTTeSsIz2Mfm5mpiVsQEf2WOlzljMD7y"
|
POSTHOG_WRITE_KEY = "phc_1kUR0o04oJKKTTeSsIz2Mfm5mpiVsQEf2WOlzljMD7y"
|
||||||
|
|
||||||
ENABLED_WARNING_SLEEP_SECONDS = 15
|
OPT_IN_WARNING_SLEEP_SECONDS = 15
|
||||||
ENABLED_WARNING = (
|
OPT_IN_INFO = (
|
||||||
"\nTelemetry is enabled. This helps Axolotl's maintainers by providing insights into:\n"
|
"\nTelemetry is currently disabled by default. If you'd like to help improve "
|
||||||
"- Which models and configurations are most commonly used\n"
|
"Axolotl, consider enabling it by setting:\n"
|
||||||
"- What hardware setups need to be supported\n"
|
"AXOLOTL_DO_NOT_TRACK=0\n\n"
|
||||||
|
"Telemetry data helps us understand:\n"
|
||||||
|
"- Which features are most used\n"
|
||||||
|
"- What hardware configurations to prioritize\n"
|
||||||
"- Where users encounter errors\n\n"
|
"- Where users encounter errors\n\n"
|
||||||
"This data helps us prioritize features, optimize performance, and fix bugs.\n\n"
|
"No personally identifiable information is collected.\n"
|
||||||
"To disable telemetry, set either:\n"
|
"To remove this warning, explicitly set AXOLOTL_DO_NOT_TRACK=0 (enable telemetry) "
|
||||||
"- AXOLOTL_DO_NOT_TRACK=1 (Axolotl-specific)\n"
|
"or AXOLOTL_DO_NOT_TRACK=1 (explicitly disable telemetry).\n\n"
|
||||||
"- DO_NOT_TRACK=1 (Global standard; see https://consoledonottrack.com/)\n\n"
|
"NOTE: Telemetry will move to an opt-out in a later release.\n"
|
||||||
"To remove this warning and continue with telemetry enabled,"
|
"For details, see: https://axolotl-ai-cloud.github.io/axolotl/docs/telemetry.html\n"
|
||||||
"explicitly set AXOLOTL_DO_NOT_TRACK=0 (and leave DO_NOT_TRACK unset / set to 0)\n\n"
|
f"Sleeping for {OPT_IN_WARNING_SLEEP_SECONDS}s..."
|
||||||
"No personally identifiable information is collected."
|
|
||||||
"For details, see: https://axolotl-ai-cloud.github.io/axolotl/docs/telemetry.html\n\n"
|
|
||||||
f"Sleeping for {ENABLED_WARNING_SLEEP_SECONDS}s..."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
WHITELIST_PATH = str(Path(__file__).parent / "whitelist.yaml")
|
WHITELIST_PATH = str(Path(__file__).parent / "whitelist.yaml")
|
||||||
@@ -134,7 +134,7 @@ class TelemetryManager:
|
|||||||
if self._initialized:
|
if self._initialized:
|
||||||
return
|
return
|
||||||
|
|
||||||
self.enabled, self.explicit_enable = self._check_telemetry_enabled()
|
self.enabled = self._check_telemetry_enabled()
|
||||||
|
|
||||||
if self.enabled:
|
if self.enabled:
|
||||||
self.run_id = str(uuid.uuid4())
|
self.run_id = str(uuid.uuid4())
|
||||||
@@ -160,30 +160,33 @@ class TelemetryManager:
|
|||||||
|
|
||||||
return cls._instance
|
return cls._instance
|
||||||
|
|
||||||
def _check_telemetry_enabled(self) -> tuple[bool, bool]:
|
def _check_telemetry_enabled(self) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if telemetry is enabled based on environment variables. We also check
|
Check if telemetry is enabled based on environment variables. We also check
|
||||||
whether this is the main process (for the distributed setting and to avoid
|
whether this is the main process (for the distributed setting and to avoid
|
||||||
sending duplicate PostHog events per GPU).
|
sending duplicate PostHog events per GPU).
|
||||||
|
|
||||||
Note: This is enabled by default on an opt-out basis. Set either
|
Note: This is disabled by default on an opt-in basis. Set
|
||||||
`AXOLOTL_DO_NOT_TRACK=1` or `DO_NOT_TRACK=1` to disable telemetry. For more
|
`AXOLOTL_DO_NOT_TRACK=0` to enable telemetry. We plan to move to an opt-out
|
||||||
details, see https://axolotl-ai-cloud.github.io/axolotl/docs/telemetry.html.
|
model in a later release. For more details, see
|
||||||
|
https://axolotl-ai-cloud.github.io/axolotl/docs/telemetry.html.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple containing:
|
Tuple containing:
|
||||||
- Boolean denoting whether telemetry is enabled or disabled.
|
- Boolean denoting whether telemetry is enabled or not.
|
||||||
- Boolean denoting whether telemetry is explicitly enabled or not.
|
|
||||||
"""
|
"""
|
||||||
# Parse relevant env vars and fill opt-out default values
|
# Parse relevant env vars and fill opt-out default values
|
||||||
axolotl_do_not_track = os.getenv("AXOLOTL_DO_NOT_TRACK")
|
axolotl_do_not_track = os.getenv("AXOLOTL_DO_NOT_TRACK")
|
||||||
do_not_track = os.getenv("DO_NOT_TRACK")
|
do_not_track = os.getenv("DO_NOT_TRACK")
|
||||||
|
|
||||||
# If explicitly enabled, we'll disable the telemetry warning message
|
# Default to disabled (opt-in model for initial release)
|
||||||
explicit_enabled = axolotl_do_not_track in ["0", "false"]
|
|
||||||
|
|
||||||
if axolotl_do_not_track is None:
|
if axolotl_do_not_track is None:
|
||||||
axolotl_do_not_track = "0"
|
# Print opt-in info message for main process only
|
||||||
|
if is_main_process():
|
||||||
|
LOG.info(OPT_IN_INFO)
|
||||||
|
time.sleep(OPT_IN_WARNING_SLEEP_SECONDS)
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
if do_not_track is None:
|
if do_not_track is None:
|
||||||
do_not_track = "0"
|
do_not_track = "0"
|
||||||
@@ -194,17 +197,11 @@ class TelemetryManager:
|
|||||||
"true",
|
"true",
|
||||||
) and do_not_track.lower() not in ("1", "true")
|
) and do_not_track.lower() not in ("1", "true")
|
||||||
|
|
||||||
# Show warning (and sleep on all ranks) unless explicitly enabled
|
|
||||||
if enabled and not explicit_enabled:
|
|
||||||
if is_main_process():
|
|
||||||
LOG.warning(ENABLED_WARNING)
|
|
||||||
time.sleep(ENABLED_WARNING_SLEEP_SECONDS)
|
|
||||||
|
|
||||||
# Only rank 0 will send telemetry
|
# Only rank 0 will send telemetry
|
||||||
if not is_main_process():
|
if not is_main_process():
|
||||||
return False, False
|
return False
|
||||||
|
|
||||||
return enabled, explicit_enabled
|
return enabled
|
||||||
|
|
||||||
def _load_whitelist(self) -> dict:
|
def _load_whitelist(self) -> dict:
|
||||||
"""Load HuggingFace Hub organization whitelist"""
|
"""Load HuggingFace Hub organization whitelist"""
|
||||||
|
|||||||
@@ -58,68 +58,72 @@ def test_singleton_instance(telemetry_manager_class):
|
|||||||
assert telemetry_manager_class.get_instance() is first
|
assert telemetry_manager_class.get_instance() is first
|
||||||
|
|
||||||
|
|
||||||
|
def test_telemetry_disabled_by_default(telemetry_manager_class):
|
||||||
|
"""Test that telemetry is disabled by default (opt-in)"""
|
||||||
|
with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch("time.sleep"), patch(
|
||||||
|
"logging.Logger.info"
|
||||||
|
):
|
||||||
|
manager = telemetry_manager_class()
|
||||||
|
assert not manager.enabled
|
||||||
|
|
||||||
|
|
||||||
|
def test_telemetry_enabled_with_explicit_opt_in(telemetry_manager_class):
|
||||||
|
"""Test that telemetry is enabled when AXOLOTL_DO_NOT_TRACK=0"""
|
||||||
|
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "0"}), patch(
|
||||||
|
"time.sleep"
|
||||||
|
):
|
||||||
|
manager = telemetry_manager_class()
|
||||||
|
assert manager.enabled
|
||||||
|
|
||||||
|
|
||||||
def test_telemetry_disabled_with_axolotl_do_not_track(telemetry_manager_class):
|
def test_telemetry_disabled_with_axolotl_do_not_track(telemetry_manager_class):
|
||||||
"""Test that telemetry is disabled when AXOLOTL_DO_NOT_TRACK=1"""
|
"""Test that telemetry is disabled when AXOLOTL_DO_NOT_TRACK=1"""
|
||||||
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "1", "RANK": "0"}):
|
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "1", "RANK": "0"}), patch(
|
||||||
|
"time.sleep"
|
||||||
|
):
|
||||||
manager = telemetry_manager_class()
|
manager = telemetry_manager_class()
|
||||||
assert not manager.enabled
|
assert not manager.enabled
|
||||||
|
|
||||||
|
|
||||||
def test_telemetry_disabled_with_do_not_track(telemetry_manager_class):
|
def test_telemetry_disabled_with_do_not_track(telemetry_manager_class):
|
||||||
"""Test that telemetry is disabled when DO_NOT_TRACK=1"""
|
"""Test that telemetry is disabled when DO_NOT_TRACK=1"""
|
||||||
with patch.dict(os.environ, {"DO_NOT_TRACK": "1", "RANK": "0"}):
|
with patch.dict(
|
||||||
|
os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "DO_NOT_TRACK": "1", "RANK": "0"}
|
||||||
|
), patch("time.sleep"):
|
||||||
manager = telemetry_manager_class()
|
manager = telemetry_manager_class()
|
||||||
assert not manager.enabled
|
assert not manager.enabled
|
||||||
|
|
||||||
|
|
||||||
def test_telemetry_disabled_for_non_main_process(telemetry_manager_class):
|
def test_telemetry_disabled_for_non_main_process(telemetry_manager_class):
|
||||||
"""Test that telemetry is disabled for non-main processes"""
|
"""Test that telemetry is disabled for non-main processes"""
|
||||||
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "1"}):
|
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "1"}), patch(
|
||||||
|
"time.sleep"
|
||||||
|
):
|
||||||
manager = telemetry_manager_class()
|
manager = telemetry_manager_class()
|
||||||
assert not manager.enabled
|
assert not manager.enabled
|
||||||
|
|
||||||
|
|
||||||
def test_telemetry_enabled_by_default(telemetry_manager_class):
|
def test_opt_in_info_displayed(telemetry_manager_class):
|
||||||
"""Test that telemetry is enabled by default"""
|
"""Test that opt-in info is displayed when telemetry is not configured"""
|
||||||
with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch("time.sleep"), patch(
|
with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch(
|
||||||
"logging.Logger.warning"
|
"logging.Logger.info"
|
||||||
|
) as mock_info, patch("time.sleep"):
|
||||||
|
telemetry_manager_class()
|
||||||
|
info_displayed = False
|
||||||
|
for call in mock_info.call_args_list:
|
||||||
|
if "Telemetry is currently disabled by default" in str(call):
|
||||||
|
info_displayed = True
|
||||||
|
break
|
||||||
|
assert info_displayed
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_whitelisted(telemetry_manager_class, mock_whitelist):
|
||||||
|
"""Test org whitelist functionality"""
|
||||||
|
with patch("axolotl.telemetry.manager.WHITELIST_PATH", mock_whitelist), patch.dict(
|
||||||
|
os.environ, {"AXOLOTL_DO_NOT_TRACK": "0"}
|
||||||
):
|
):
|
||||||
manager = telemetry_manager_class()
|
manager = telemetry_manager_class()
|
||||||
assert manager.enabled
|
|
||||||
assert not manager.explicit_enable
|
|
||||||
|
|
||||||
|
|
||||||
def test_explicit_enable_disables_warning(telemetry_manager_class):
|
|
||||||
"""Test that explicit enabling prevents warning"""
|
|
||||||
with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "0"}), patch(
|
|
||||||
"logging.Logger.warning"
|
|
||||||
) as mock_warning, patch("time.sleep"):
|
|
||||||
manager = telemetry_manager_class()
|
|
||||||
assert manager.enabled
|
|
||||||
assert manager.explicit_enable
|
|
||||||
for call in mock_warning.call_args_list:
|
|
||||||
assert "Telemetry is enabled" not in str(call)
|
|
||||||
|
|
||||||
|
|
||||||
def test_warning_displayed_for_implicit_enable(telemetry_manager_class):
|
|
||||||
"""Test that warning is displayed when telemetry is implicitly enabled"""
|
|
||||||
with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch(
|
|
||||||
"logging.Logger.warning"
|
|
||||||
) as mock_warning, patch("time.sleep"):
|
|
||||||
manager = telemetry_manager_class()
|
|
||||||
assert manager.enabled
|
|
||||||
assert not manager.explicit_enable
|
|
||||||
warning_displayed = False
|
|
||||||
for call in mock_warning.call_args_list:
|
|
||||||
if "Telemetry is enabled" in str(call):
|
|
||||||
warning_displayed = True
|
|
||||||
break
|
|
||||||
assert warning_displayed
|
|
||||||
|
|
||||||
|
|
||||||
def test_is_whitelisted(manager, mock_whitelist):
|
|
||||||
"""Test org whitelist functionality"""
|
|
||||||
with patch("axolotl.telemetry.manager.WHITELIST_PATH", mock_whitelist):
|
|
||||||
# Should match organizations from the mock whitelist
|
# Should match organizations from the mock whitelist
|
||||||
assert manager._is_whitelisted("meta-llama/llama-7b")
|
assert manager._is_whitelisted("meta-llama/llama-7b")
|
||||||
assert manager._is_whitelisted("mistralai/mistral-7b-instruct")
|
assert manager._is_whitelisted("mistralai/mistral-7b-instruct")
|
||||||
@@ -139,17 +143,18 @@ def test_system_info_collection(manager):
|
|||||||
# Check essential keys
|
# Check essential keys
|
||||||
assert "os" in system_info
|
assert "os" in system_info
|
||||||
assert "python_version" in system_info
|
assert "python_version" in system_info
|
||||||
assert "torch_version" in system_info
|
|
||||||
assert "transformers_version" in system_info
|
|
||||||
assert "axolotl_version" in system_info
|
|
||||||
assert "cpu_count" in system_info
|
assert "cpu_count" in system_info
|
||||||
assert "memory_total" in system_info
|
assert "memory_total" in system_info
|
||||||
assert "accelerator_count" in system_info
|
assert "accelerator_count" in system_info
|
||||||
|
|
||||||
|
|
||||||
def test_send_event(manager):
|
def test_send_event(telemetry_manager_class):
|
||||||
"""Test basic event sending"""
|
"""Test basic event sending"""
|
||||||
with patch("posthog.capture") as mock_capture:
|
with patch("posthog.capture") as mock_capture, patch.dict(
|
||||||
|
os.environ, {"AXOLOTL_DO_NOT_TRACK": "0"}
|
||||||
|
):
|
||||||
|
manager = telemetry_manager_class()
|
||||||
|
|
||||||
# Test with clean properties (no PII)
|
# Test with clean properties (no PII)
|
||||||
manager.send_event("test_event", {"key": "value"})
|
manager.send_event("test_event", {"key": "value"})
|
||||||
assert mock_capture.called
|
assert mock_capture.called
|
||||||
@@ -164,18 +169,24 @@ def test_send_event(manager):
|
|||||||
assert mock_capture.call_args[1]["properties"] == {}
|
assert mock_capture.call_args[1]["properties"] == {}
|
||||||
|
|
||||||
|
|
||||||
def test_send_system_info(manager):
|
def test_send_system_info(telemetry_manager_class):
|
||||||
"""Test sending system info"""
|
"""Test sending system info"""
|
||||||
with patch("posthog.capture") as mock_capture:
|
with patch("posthog.capture") as mock_capture, patch.dict(
|
||||||
|
os.environ, {"AXOLOTL_DO_NOT_TRACK": "0"}
|
||||||
|
):
|
||||||
|
manager = telemetry_manager_class()
|
||||||
manager.send_system_info()
|
manager.send_system_info()
|
||||||
assert mock_capture.called
|
assert mock_capture.called
|
||||||
assert mock_capture.call_args[1]["event"] == "system-info"
|
assert mock_capture.call_args[1]["event"] == "system-info"
|
||||||
assert mock_capture.call_args[1]["properties"] == manager.system_info
|
assert mock_capture.call_args[1]["properties"] == manager.system_info
|
||||||
|
|
||||||
|
|
||||||
def test_redacted_properties(manager):
|
def test_redacted_properties(telemetry_manager_class):
|
||||||
"""Test path redaction in send_event method"""
|
"""Test path redaction in send_event method"""
|
||||||
with patch("posthog.capture") as mock_capture:
|
with patch("posthog.capture") as mock_capture, patch.dict(
|
||||||
|
os.environ, {"AXOLOTL_DO_NOT_TRACK": "0"}
|
||||||
|
):
|
||||||
|
manager = telemetry_manager_class()
|
||||||
# Test with properties containing various paths and non-paths
|
# Test with properties containing various paths and non-paths
|
||||||
test_properties = {
|
test_properties = {
|
||||||
"filepath": "/home/user/sensitive/data.txt",
|
"filepath": "/home/user/sensitive/data.txt",
|
||||||
|
|||||||
Reference in New Issue
Block a user