Fix DO_NOT_TRACK not being correctly handled (#3580)

* Fix DO_NOT_TRACK not being correctly handled

* add unit tests and lint

---------

Co-authored-by: Wing Lian <wing@axolotl.ai>
This commit is contained in:
Maxime
2026-04-04 05:16:58 -04:00
committed by GitHub
parent 08fc7de87e
commit 900eec7988
2 changed files with 58 additions and 61 deletions

View File

@@ -160,29 +160,16 @@ class TelemetryManager:
if not is_main_process():
return False
# Parse relevant env vars
axolotl_do_not_track = os.getenv("AXOLOTL_DO_NOT_TRACK")
do_not_track = os.getenv("DO_NOT_TRACK")
def is_truthy_env(var_name: str) -> bool:
value = os.getenv(var_name)
if value is None:
return False
return value.strip().lower() in ("1", "true")
# Default to enabled (opt-out model)
if axolotl_do_not_track is None or axolotl_do_not_track.lower() not in (
"0",
"1",
"false",
"true",
):
return True
if do_not_track is None:
do_not_track = "0"
# Respect AXOLOTL_DO_NOT_TRACK, DO_NOT_TRACK if enabled
enabled = axolotl_do_not_track.lower() not in (
"1",
"true",
) and do_not_track.lower() not in ("1", "true")
return enabled
# Telemetry is enabled by default unless either opt-out var is set
return not (
is_truthy_env("AXOLOTL_DO_NOT_TRACK") or is_truthy_env("DO_NOT_TRACK")
)
def _load_whitelist(self) -> dict:
"""Load HuggingFace Hub organization whitelist"""

View File

@@ -65,47 +65,57 @@ def test_singleton_instance(telemetry_manager_class):
assert telemetry_manager_class.get_instance() is first
def test_telemetry_enabled_by_default(telemetry_manager_class):
"""Test that telemetry is enabled by default (opt-out)"""
with (
patch.dict(os.environ, {"RANK": "0"}, clear=True),
patch("time.sleep"),
patch("logging.Logger.info"),
class TestTelemetryOptOut:
"""
Telemetry is opt-out: enabled by default, disabled by AXOLOTL_DO_NOT_TRACK
or DO_NOT_TRACK. Each env var is checked independently — setting either one
to a truthy value ("1" or "true") disables telemetry.
The parametrized table below is the source of truth for expected behavior.
"""
# fmt: off
# AXOLOTL_DO_NOT_TRACK DO_NOT_TRACK expected
@pytest.mark.parametrize("axolotl_dnt, dnt, expected", [
# --- Neither var set: telemetry ON ---
(None, None, True),
# --- Only AXOLOTL_DO_NOT_TRACK set ---
("0", None, True), # explicit opt-in
("false", None, True), # explicit opt-in
("1", None, False), # opt-out
("true", None, False), # opt-out
(" 1 ", None, False), # whitespace-padded opt-out
# --- Only DO_NOT_TRACK set (was broken before fix) ---
(None, "0", True), # explicit opt-in
(None, "false", True), # explicit opt-in
(None, "1", False), # opt-out
(None, "true", False), # opt-out
# --- Both set: either truthy → disabled ---
("0", "1", False), # DO_NOT_TRACK wins
("1", "0", False), # AXOLOTL_DO_NOT_TRACK wins
("1", "1", False), # both opt-out
("0", "0", True), # both opt-in
])
# fmt: on
def test_do_not_track_env_vars(
self, telemetry_manager_class, axolotl_dnt, dnt, expected
):
manager = telemetry_manager_class()
assert manager.enabled
env = {"RANK": "0"}
if axolotl_dnt is not None:
env["AXOLOTL_DO_NOT_TRACK"] = axolotl_dnt
if dnt is not None:
env["DO_NOT_TRACK"] = dnt
def test_telemetry_enabled_with_explicit_opt_in(telemetry_manager_class):
"""Test that telemetry is enabled when AXOLOTL_DO_NOT_TRACK=0"""
with (
patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "RANK": "0"}),
patch("time.sleep"),
):
manager = telemetry_manager_class()
assert manager.enabled
def test_telemetry_disabled_with_axolotl_do_not_track(telemetry_manager_class):
"""Test that telemetry is disabled when AXOLOTL_DO_NOT_TRACK=1"""
with (
patch.dict(os.environ, {"AXOLOTL_DO_NOT_TRACK": "1", "RANK": "0"}),
patch("time.sleep"),
):
manager = telemetry_manager_class()
assert not manager.enabled
def test_telemetry_disabled_with_do_not_track(telemetry_manager_class):
"""Test that telemetry is disabled when DO_NOT_TRACK=1"""
with (
patch.dict(
os.environ, {"AXOLOTL_DO_NOT_TRACK": "0", "DO_NOT_TRACK": "1", "RANK": "0"}
),
patch("time.sleep"),
):
manager = telemetry_manager_class()
assert not manager.enabled
with (
patch.dict(os.environ, env, clear=True),
patch("time.sleep"),
patch("logging.Logger.info"),
):
manager = telemetry_manager_class()
assert manager.enabled is expected
def test_telemetry_disabled_for_non_main_process(telemetry_manager_class):