simplifying
This commit is contained in:
committed by
Dan Saunders
parent
2d36c11264
commit
035e7a2f4c
@@ -21,7 +21,7 @@ POSTHOG_HOST = "https://app.posthog.com"
|
|||||||
POSTHOG_WRITE_KEY = "phc_1kUR0o04oJKKTTeSsIz2Mfm5mpiVsQEf2WOlzljMD7y"
|
POSTHOG_WRITE_KEY = "phc_1kUR0o04oJKKTTeSsIz2Mfm5mpiVsQEf2WOlzljMD7y"
|
||||||
|
|
||||||
OPT_IN_WARNING_SLEEP_SECONDS = 10
|
OPT_IN_WARNING_SLEEP_SECONDS = 10
|
||||||
OPT_IN_INFO = (
|
OPT_IN_WARNING = (
|
||||||
"\nTelemetry is currently disabled by default. If you'd like to help improve "
|
"\nTelemetry is currently disabled by default. If you'd like to help improve "
|
||||||
"Axolotl, consider enabling it by setting AXOLOTL_DO_NOT_TRACK=0 in your environment.\n\n"
|
"Axolotl, consider enabling it by setting AXOLOTL_DO_NOT_TRACK=0 in your environment.\n\n"
|
||||||
"Telemetry data helps us understand:\n"
|
"Telemetry data helps us understand:\n"
|
||||||
@@ -38,14 +38,15 @@ OPT_IN_INFO = (
|
|||||||
|
|
||||||
WHITELIST_PATH = str(Path(__file__).parent / "whitelist.yaml")
|
WHITELIST_PATH = str(Path(__file__).parent / "whitelist.yaml")
|
||||||
|
|
||||||
# NOTE: Keep these up to date with any config schema changes
|
# NOTE: Need to keep these up to date with any config schema changes
|
||||||
FIELDS_WITH_ORGS = {
|
FIELDS_TO_REDACT = {
|
||||||
"base_model",
|
"base_model",
|
||||||
"tokenizer_config",
|
"tokenizer_config",
|
||||||
"base_model_config",
|
"base_model_config",
|
||||||
"pretraining_dataset", # NOTE: this field may be a string or a dictionary
|
"pretraining_dataset", # NOTE: this field may be a string or a dictionary
|
||||||
|
"resume_from_checkpoint",
|
||||||
|
"hub_model_id",
|
||||||
}
|
}
|
||||||
FIELDS_TO_REDACT = {"resume_from_checkpoint", "hub_model_id"}
|
|
||||||
PREFIXES_TO_REDACT = {"wandb_", "comet_", "mlflow_", "gradio_"}
|
PREFIXES_TO_REDACT = {"wandb_", "comet_", "mlflow_", "gradio_"}
|
||||||
PATH_INDICATORS = {"path", "dir"}
|
PATH_INDICATORS = {"path", "dir"}
|
||||||
|
|
||||||
@@ -187,7 +188,7 @@ class TelemetryManager:
|
|||||||
):
|
):
|
||||||
# Print opt-in info message for main process only
|
# Print opt-in info message for main process only
|
||||||
if is_main_process():
|
if is_main_process():
|
||||||
LOG.info(OPT_IN_INFO)
|
LOG.warning(OPT_IN_WARNING)
|
||||||
time.sleep(OPT_IN_WARNING_SLEEP_SECONDS)
|
time.sleep(OPT_IN_WARNING_SLEEP_SECONDS)
|
||||||
|
|
||||||
return False
|
return False
|
||||||
@@ -224,7 +225,8 @@ class TelemetryManager:
|
|||||||
Check if model / dataset / etc. org is in whitelist.
|
Check if model / dataset / etc. org is in whitelist.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
value: Value for one of FIELDS_WITH_ORGS ("base_model", etc.).
|
value: Value for one of `axolotl.telemetry.manager.FIELDS_WITH_ORGS`
|
||||||
|
("base_model", etc.).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Boolean indicating whitelist membership.
|
Boolean indicating whitelist membership.
|
||||||
@@ -259,20 +261,17 @@ class TelemetryManager:
|
|||||||
def redact_value(value: Any, key: str = "") -> Any:
|
def redact_value(value: Any, key: str = "") -> Any:
|
||||||
"""Recursively sanitize values, redacting those with path-like keys"""
|
"""Recursively sanitize values, redacting those with path-like keys"""
|
||||||
if isinstance(key, str) and isinstance(value, str):
|
if isinstance(key, str) and isinstance(value, str):
|
||||||
# Fields that should be redacted if org is not whitelisted
|
|
||||||
if key in FIELDS_WITH_ORGS:
|
|
||||||
if not self._is_whitelisted(value):
|
|
||||||
return "[REDACTED]"
|
|
||||||
|
|
||||||
# Other redaction special cases
|
# Other redaction special cases
|
||||||
if (
|
if (
|
||||||
key in FIELDS_TO_REDACT
|
key in FIELDS_TO_REDACT
|
||||||
or any(prefix in key for prefix in PREFIXES_TO_REDACT)
|
or any(prefix in key for prefix in PREFIXES_TO_REDACT)
|
||||||
or any(indicator in key.lower() for indicator in PATH_INDICATORS)
|
or any(indicator in key.lower() for indicator in PATH_INDICATORS)
|
||||||
):
|
):
|
||||||
return "[REDACTED]"
|
# Fields with whitelisted orgs don't need to be redacted
|
||||||
|
if not self._is_whitelisted(value):
|
||||||
|
return "[REDACTED]"
|
||||||
|
|
||||||
# Handle nested structures
|
# Handle nested values
|
||||||
if isinstance(value, dict):
|
if isinstance(value, dict):
|
||||||
return {k: redact_value(v, k) for k, v in value.items()}
|
return {k: redact_value(v, k) for k, v in value.items()}
|
||||||
if isinstance(value, list):
|
if isinstance(value, list):
|
||||||
|
|||||||
@@ -106,11 +106,12 @@ def test_telemetry_disabled_for_non_main_process(telemetry_manager_class):
|
|||||||
def test_opt_in_info_displayed(telemetry_manager_class):
|
def test_opt_in_info_displayed(telemetry_manager_class):
|
||||||
"""Test that opt-in info is displayed when telemetry is not configured"""
|
"""Test that opt-in info is displayed when telemetry is not configured"""
|
||||||
with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch(
|
with patch.dict(os.environ, {"RANK": "0"}, clear=True), patch(
|
||||||
"logging.Logger.info"
|
"logging.Logger.warning"
|
||||||
) as mock_info, patch("time.sleep"):
|
) as mock_warning, patch("time.sleep"):
|
||||||
telemetry_manager_class()
|
telemetry_manager_class()
|
||||||
info_displayed = False
|
info_displayed = False
|
||||||
for call in mock_info.call_args_list:
|
for call in mock_warning.call_args_list:
|
||||||
|
print(f"call: {call}")
|
||||||
if "Telemetry is currently disabled by default" in str(call):
|
if "Telemetry is currently disabled by default" in str(call):
|
||||||
info_displayed = True
|
info_displayed = True
|
||||||
break
|
break
|
||||||
|
|||||||
Reference in New Issue
Block a user