diff --git a/docs/telemetry.qmd b/docs/telemetry.qmd index aab3ff529..b7c4cb11d 100644 --- a/docs/telemetry.qmd +++ b/docs/telemetry.qmd @@ -1,6 +1,6 @@ --- title: Telemetry -description: A description of the opt-out telemetry implementation in Axolotl. +description: A description of the opt-in telemetry implementation in Axolotl. --- # Telemetry in Axolotl @@ -41,13 +41,13 @@ aware of data collection, unless telemetry is explicitly enabled or disabled. ## Opt-Out Mechanism -Telemetry is **enabled by default** on an opt-out basis. To disable it, set either: +Telemetry is **disable by default** on an opt-in basis. To enable it, set: `AXOLOTL_DO_NOT_TRACK=0`. -- `AXOLOTL_DO_NOT_TRACK=1` (Axolotl-specific) -- `DO_NOT_TRACK=1` (Global standard; see https://consoledonottrack.com/) +To remove the warning message about telemetry that is displayed on train, etc. startup, +explicitly set: `AXOLOTL_DO_NOT_TRACK=0` (enable telemetry) or `AXOLOTL_DO_NOT_TRACK=1` +(explicitly disable telemetry). -To acknowledge and explicitly enable telemetry (and remove the warning message), set: -`AXOLOTL_DO_NOT_TRACK=0`. +**Note**: Telemetry will move to an opt-out model in a later release. ## Privacy diff --git a/src/axolotl/telemetry/manager.py b/src/axolotl/telemetry/manager.py index 71bb5ca2a..ae756d0e3 100644 --- a/src/axolotl/telemetry/manager.py +++ b/src/axolotl/telemetry/manager.py @@ -20,21 +20,21 @@ LOG = logging.getLogger(__name__) POSTHOG_HOST = "https://app.posthog.com" POSTHOG_WRITE_KEY = "phc_1kUR0o04oJKKTTeSsIz2Mfm5mpiVsQEf2WOlzljMD7y" -ENABLED_WARNING_SLEEP_SECONDS = 15 -ENABLED_WARNING = ( - "\nTelemetry is enabled. This helps Axolotl's maintainers by providing insights into:\n" - "- Which models and configurations are most commonly used\n" - "- What hardware setups need to be supported\n" +OPT_IN_WARNING_SLEEP_SECONDS = 15 +OPT_IN_INFO = ( + "\nTelemetry is currently disabled by default. If you'd like to help improve " + "Axolotl, consider enabling it by setting:\n" + "AXOLOTL_DO_NOT_TRACK=0\n\n" + "Telemetry data helps us understand:\n" + "- Which features are most used\n" + "- What hardware configurations to prioritize\n" "- Where users encounter errors\n\n" - "This data helps us prioritize features, optimize performance, and fix bugs.\n\n" - "To disable telemetry, set either:\n" - "- AXOLOTL_DO_NOT_TRACK=1 (Axolotl-specific)\n" - "- DO_NOT_TRACK=1 (Global standard; see https://consoledonottrack.com/)\n\n" - "To remove this warning and continue with telemetry enabled," - "explicitly set AXOLOTL_DO_NOT_TRACK=0 (and leave DO_NOT_TRACK unset / set to 0)\n\n" - "No personally identifiable information is collected." - "For details, see: https://axolotl-ai-cloud.github.io/axolotl/docs/telemetry.html\n\n" - f"Sleeping for {ENABLED_WARNING_SLEEP_SECONDS}s..." + "No personally identifiable information is collected.\n" + "To remove this warning, explicitly set AXOLOTL_DO_NOT_TRACK=0 (enable telemetry) " + "or AXOLOTL_DO_NOT_TRACK=1 (explicitly disable telemetry).\n\n" + "NOTE: Telemetry will move to an opt-out in a later release.\n" + "For details, see: https://axolotl-ai-cloud.github.io/axolotl/docs/telemetry.html\n" + f"Sleeping for {OPT_IN_WARNING_SLEEP_SECONDS}s..." ) WHITELIST_PATH = str(Path(__file__).parent / "whitelist.yaml") @@ -134,7 +134,7 @@ class TelemetryManager: if self._initialized: return - self.enabled, self.explicit_enable = self._check_telemetry_enabled() + self.enabled = self._check_telemetry_enabled() if self.enabled: self.run_id = str(uuid.uuid4()) @@ -160,30 +160,33 @@ class TelemetryManager: return cls._instance - def _check_telemetry_enabled(self) -> tuple[bool, bool]: + def _check_telemetry_enabled(self) -> bool: """ Check if telemetry is enabled based on environment variables. We also check whether this is the main process (for the distributed setting and to avoid sending duplicate PostHog events per GPU). - Note: This is enabled by default on an opt-out basis. Set either - `AXOLOTL_DO_NOT_TRACK=1` or `DO_NOT_TRACK=1` to disable telemetry. For more - details, see https://axolotl-ai-cloud.github.io/axolotl/docs/telemetry.html. + Note: This is disabled by default on an opt-in basis. Set + `AXOLOTL_DO_NOT_TRACK=0` to enable telemetry. We plan to move to an opt-out + model in a later release. For more details, see + https://axolotl-ai-cloud.github.io/axolotl/docs/telemetry.html. Returns: Tuple containing: - - Boolean denoting whether telemetry is enabled or disabled. - - Boolean denoting whether telemetry is explicitly enabled or not. + - Boolean denoting whether telemetry is enabled or not. """ # Parse relevant env vars and fill opt-out default values axolotl_do_not_track = os.getenv("AXOLOTL_DO_NOT_TRACK") do_not_track = os.getenv("DO_NOT_TRACK") - # If explicitly enabled, we'll disable the telemetry warning message - explicit_enabled = axolotl_do_not_track in ["0", "false"] - + # Default to disabled (opt-in model for initial release) if axolotl_do_not_track is None: - axolotl_do_not_track = "0" + # Print opt-in info message for main process only + if is_main_process(): + LOG.info(OPT_IN_INFO) + time.sleep(OPT_IN_WARNING_SLEEP_SECONDS) + + return False if do_not_track is None: do_not_track = "0" @@ -194,17 +197,11 @@ class TelemetryManager: "true", ) and do_not_track.lower() not in ("1", "true") - # Show warning (and sleep on all ranks) unless explicitly enabled - if enabled and not explicit_enabled: - if is_main_process(): - LOG.warning(ENABLED_WARNING) - time.sleep(ENABLED_WARNING_SLEEP_SECONDS) - # Only rank 0 will send telemetry if not is_main_process(): - return False, False + return False - return enabled, explicit_enabled + return enabled def _load_whitelist(self) -> dict: """Load HuggingFace Hub organization whitelist""" diff --git a/tests/telemetry/test_manager.py b/tests/telemetry/test_manager.py index e01ab9339..3e8391393 100644 --- a/tests/telemetry/test_manager.py +++ b/tests/telemetry/test_manager.py @@ -58,68 +58,72 @@ def test_singleton_instance(telemetry_manager_class): assert telemetry_manager_class.get_instance() is first +def test_telemetry_disabled_by_default(telemetry_manager_class): + """Test that telemetry is disabled by default (opt-in)""" + with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch("time.sleep"), patch( + "logging.Logger.info" + ): + manager = telemetry_manager_class() + assert not manager.enabled + + +def test_telemetry_enabled_with_explicit_opt_in(telemetry_manager_class): + """Test that telemetry is enabled when AXOLOTL_DO_NOT_TRACK=0""" + with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "0"}), patch( + "time.sleep" + ): + manager = telemetry_manager_class() + assert manager.enabled + + def test_telemetry_disabled_with_axolotl_do_not_track(telemetry_manager_class): """Test that telemetry is disabled when AXOLOTL_DO_NOT_TRACK=1""" - with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "1", "RANK": "0"}): + with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "1", "RANK": "0"}), patch( + "time.sleep" + ): manager = telemetry_manager_class() assert not manager.enabled def test_telemetry_disabled_with_do_not_track(telemetry_manager_class): """Test that telemetry is disabled when DO_NOT_TRACK=1""" - with patch.dict(os.environ, {"DO_NOT_TRACK": "1", "RANK": "0"}): + with patch.dict( + os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "DO_NOT_TRACK": "1", "RANK": "0"} + ), patch("time.sleep"): manager = telemetry_manager_class() assert not manager.enabled def test_telemetry_disabled_for_non_main_process(telemetry_manager_class): """Test that telemetry is disabled for non-main processes""" - with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "1"}): + with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "1"}), patch( + "time.sleep" + ): manager = telemetry_manager_class() assert not manager.enabled -def test_telemetry_enabled_by_default(telemetry_manager_class): - """Test that telemetry is enabled by default""" - with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch("time.sleep"), patch( - "logging.Logger.warning" +def test_opt_in_info_displayed(telemetry_manager_class): + """Test that opt-in info is displayed when telemetry is not configured""" + with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch( + "logging.Logger.info" + ) as mock_info, patch("time.sleep"): + telemetry_manager_class() + info_displayed = False + for call in mock_info.call_args_list: + if "Telemetry is currently disabled by default" in str(call): + info_displayed = True + break + assert info_displayed + + +def test_is_whitelisted(telemetry_manager_class, mock_whitelist): + """Test org whitelist functionality""" + with patch("axolotl.telemetry.manager.WHITELIST_PATH", mock_whitelist), patch.dict( + os.environ, {"AXOLOTL_DO_NOT_TRACK": "0"} ): manager = telemetry_manager_class() - assert manager.enabled - assert not manager.explicit_enable - -def test_explicit_enable_disables_warning(telemetry_manager_class): - """Test that explicit enabling prevents warning""" - with patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "0"}), patch( - "logging.Logger.warning" - ) as mock_warning, patch("time.sleep"): - manager = telemetry_manager_class() - assert manager.enabled - assert manager.explicit_enable - for call in mock_warning.call_args_list: - assert "Telemetry is enabled" not in str(call) - - -def test_warning_displayed_for_implicit_enable(telemetry_manager_class): - """Test that warning is displayed when telemetry is implicitly enabled""" - with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch( - "logging.Logger.warning" - ) as mock_warning, patch("time.sleep"): - manager = telemetry_manager_class() - assert manager.enabled - assert not manager.explicit_enable - warning_displayed = False - for call in mock_warning.call_args_list: - if "Telemetry is enabled" in str(call): - warning_displayed = True - break - assert warning_displayed - - -def test_is_whitelisted(manager, mock_whitelist): - """Test org whitelist functionality""" - with patch("axolotl.telemetry.manager.WHITELIST_PATH", mock_whitelist): # Should match organizations from the mock whitelist assert manager._is_whitelisted("meta-llama/llama-7b") assert manager._is_whitelisted("mistralai/mistral-7b-instruct") @@ -139,17 +143,18 @@ def test_system_info_collection(manager): # Check essential keys assert "os" in system_info assert "python_version" in system_info - assert "torch_version" in system_info - assert "transformers_version" in system_info - assert "axolotl_version" in system_info assert "cpu_count" in system_info assert "memory_total" in system_info assert "accelerator_count" in system_info -def test_send_event(manager): +def test_send_event(telemetry_manager_class): """Test basic event sending""" - with patch("posthog.capture") as mock_capture: + with patch("posthog.capture") as mock_capture, patch.dict( + os.environ, {"AXOLOTL_DO_NOT_TRACK": "0"} + ): + manager = telemetry_manager_class() + # Test with clean properties (no PII) manager.send_event("test_event", {"key": "value"}) assert mock_capture.called @@ -164,18 +169,24 @@ def test_send_event(manager): assert mock_capture.call_args[1]["properties"] == {} -def test_send_system_info(manager): +def test_send_system_info(telemetry_manager_class): """Test sending system info""" - with patch("posthog.capture") as mock_capture: + with patch("posthog.capture") as mock_capture, patch.dict( + os.environ, {"AXOLOTL_DO_NOT_TRACK": "0"} + ): + manager = telemetry_manager_class() manager.send_system_info() assert mock_capture.called assert mock_capture.call_args[1]["event"] == "system-info" assert mock_capture.call_args[1]["properties"] == manager.system_info -def test_redacted_properties(manager): +def test_redacted_properties(telemetry_manager_class): """Test path redaction in send_event method""" - with patch("posthog.capture") as mock_capture: + with patch("posthog.capture") as mock_capture, patch.dict( + os.environ, {"AXOLOTL_DO_NOT_TRACK": "0"} + ): + manager = telemetry_manager_class() # Test with properties containing various paths and non-paths test_properties = { "filepath": "/home/user/sensitive/data.txt",