simplifying
This commit is contained in:
committed by
Dan Saunders
parent
2d36c11264
commit
035e7a2f4c
@@ -21,7 +21,7 @@ POSTHOG_HOST = "https://app.posthog.com"
|
||||
POSTHOG_WRITE_KEY = "phc_1kUR0o04oJKKTTeSsIz2Mfm5mpiVsQEf2WOlzljMD7y"
|
||||
|
||||
OPT_IN_WARNING_SLEEP_SECONDS = 10
|
||||
OPT_IN_INFO = (
|
||||
OPT_IN_WARNING = (
|
||||
"\nTelemetry is currently disabled by default. If you'd like to help improve "
|
||||
"Axolotl, consider enabling it by setting AXOLOTL_DO_NOT_TRACK=0 in your environment.\n\n"
|
||||
"Telemetry data helps us understand:\n"
|
||||
@@ -38,14 +38,15 @@ OPT_IN_INFO = (
|
||||
|
||||
WHITELIST_PATH = str(Path(__file__).parent / "whitelist.yaml")
|
||||
|
||||
# NOTE: Keep these up to date with any config schema changes
|
||||
FIELDS_WITH_ORGS = {
|
||||
# NOTE: Need to keep these up to date with any config schema changes
|
||||
FIELDS_TO_REDACT = {
|
||||
"base_model",
|
||||
"tokenizer_config",
|
||||
"base_model_config",
|
||||
"pretraining_dataset", # NOTE: this field may be a string or a dictionary
|
||||
"resume_from_checkpoint",
|
||||
"hub_model_id",
|
||||
}
|
||||
FIELDS_TO_REDACT = {"resume_from_checkpoint", "hub_model_id"}
|
||||
PREFIXES_TO_REDACT = {"wandb_", "comet_", "mlflow_", "gradio_"}
|
||||
PATH_INDICATORS = {"path", "dir"}
|
||||
|
||||
@@ -187,7 +188,7 @@ class TelemetryManager:
|
||||
):
|
||||
# Print opt-in info message for main process only
|
||||
if is_main_process():
|
||||
LOG.info(OPT_IN_INFO)
|
||||
LOG.warning(OPT_IN_WARNING)
|
||||
time.sleep(OPT_IN_WARNING_SLEEP_SECONDS)
|
||||
|
||||
return False
|
||||
@@ -224,7 +225,8 @@ class TelemetryManager:
|
||||
Check if model / dataset / etc. org is in whitelist.
|
||||
|
||||
Args:
|
||||
value: Value for one of FIELDS_WITH_ORGS ("base_model", etc.).
|
||||
value: Value for one of `axolotl.telemetry.manager.FIELDS_WITH_ORGS`
|
||||
("base_model", etc.).
|
||||
|
||||
Returns:
|
||||
Boolean indicating whitelist membership.
|
||||
@@ -259,20 +261,17 @@ class TelemetryManager:
|
||||
def redact_value(value: Any, key: str = "") -> Any:
|
||||
"""Recursively sanitize values, redacting those with path-like keys"""
|
||||
if isinstance(key, str) and isinstance(value, str):
|
||||
# Fields that should be redacted if org is not whitelisted
|
||||
if key in FIELDS_WITH_ORGS:
|
||||
if not self._is_whitelisted(value):
|
||||
return "[REDACTED]"
|
||||
|
||||
# Other redaction special cases
|
||||
if (
|
||||
key in FIELDS_TO_REDACT
|
||||
or any(prefix in key for prefix in PREFIXES_TO_REDACT)
|
||||
or any(indicator in key.lower() for indicator in PATH_INDICATORS)
|
||||
):
|
||||
return "[REDACTED]"
|
||||
# Fields with whitelisted orgs don't need to be redacted
|
||||
if not self._is_whitelisted(value):
|
||||
return "[REDACTED]"
|
||||
|
||||
# Handle nested structures
|
||||
# Handle nested values
|
||||
if isinstance(value, dict):
|
||||
return {k: redact_value(v, k) for k, v in value.items()}
|
||||
if isinstance(value, list):
|
||||
|
||||
@@ -106,11 +106,12 @@ def test_telemetry_disabled_for_non_main_process(telemetry_manager_class):
|
||||
def test_opt_in_info_displayed(telemetry_manager_class):
|
||||
"""Test that opt-in info is displayed when telemetry is not configured"""
|
||||
with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch(
|
||||
"logging.Logger.info"
|
||||
) as mock_info, patch("time.sleep"):
|
||||
"logging.Logger.warning"
|
||||
) as mock_warning, patch("time.sleep"):
|
||||
telemetry_manager_class()
|
||||
info_displayed = False
|
||||
for call in mock_info.call_args_list:
|
||||
for call in mock_warning.call_args_list:
|
||||
print(f"call: {call}")
|
||||
if "Telemetry is currently disabled by default" in str(call):
|
||||
info_displayed = True
|
||||
break
|
||||
|
||||
Reference in New Issue
Block a user