minor fixes

This commit is contained in:
Dan Saunders
2025-02-27 15:37:00 -05:00
committed by Dan Saunders
parent b8ec5bdccf
commit 2d36c11264
4 changed files with 27 additions and 16 deletions

View File

@@ -21,7 +21,7 @@ version, etc.
- Error tracking: Stack traces and error messages (sanitized to remove personal
information)
No personally identifiable information (PII) is collected.
Personally identifiable information (PII) is not collected.
## Implementation

View File

@@ -20,7 +20,7 @@ LOG = logging.getLogger(__name__)
POSTHOG_HOST = "https://app.posthog.com"
POSTHOG_WRITE_KEY = "phc_1kUR0o04oJKKTTeSsIz2Mfm5mpiVsQEf2WOlzljMD7y"
OPT_IN_WARNING_SLEEP_SECONDS = 15
OPT_IN_WARNING_SLEEP_SECONDS = 10
OPT_IN_INFO = (
"\nTelemetry is currently disabled by default. If you'd like to help improve "
"Axolotl, consider enabling it by setting AXOLOTL_DO_NOT_TRACK=0 in your environment.\n\n"
@@ -28,7 +28,7 @@ OPT_IN_INFO = (
"- Which features are most used\n"
"- What hardware configurations to prioritize\n"
"- Where users encounter errors\n\n"
"No personally identifiable information is collected.\n\n"
"Personally identifiable information (PII) is not collected.\n\n"
"To remove this warning, explicitly set AXOLOTL_DO_NOT_TRACK=0 (enable telemetry) "
"or AXOLOTL_DO_NOT_TRACK=1 (explicitly disable telemetry).\n\n"
"Note: Telemetry will move to an opt-out in a later release.\n\n"
@@ -210,17 +210,31 @@ class TelemetryManager:
def _load_whitelist(self) -> dict:
"""Load HuggingFace Hub organization whitelist"""
with open(WHITELIST_PATH, encoding="utf-8") as f:
return yaml.safe_load(f)
whitelist = yaml.safe_load(f)
def _is_whitelisted(self, base_model: str) -> bool:
"""Check if model org is in whitelist"""
if not base_model:
return False
# Send org strings to lowercase since model names are case insensitive
whitelist["organizations"] = {
org.lower() for org in whitelist["organizations"]
}
base_model = base_model.lower()
return any(
org.lower() in base_model for org in self.whitelist.get("organizations", [])
)
return whitelist
def _is_whitelisted(self, value: str) -> bool:
"""
Check if model / dataset / etc. org is in whitelist.
Args:
value: Value for one of FIELDS_WITH_ORGS ("base_model", etc.).
Returns:
Boolean indicating whitelist membership.
"""
# NOTE: This membership-checking logic can be improved.
# What happens when a local model path matches a whitelisted org?
org = value.split("/")[0]
whitelisted = org.lower() in self.whitelist["organizations"]
return whitelisted
def _init_posthog(self):
"""Initialize PostHog client"""
@@ -247,8 +261,7 @@ class TelemetryManager:
if isinstance(key, str) and isinstance(value, str):
# Fields that should be redacted if org is not whitelisted
if key in FIELDS_WITH_ORGS:
org = value.split("/")[0]
if org not in self.whitelist["organizations"]:
if not self._is_whitelisted(value):
return "[REDACTED]"
# Other redaction special cases

View File

@@ -10,7 +10,6 @@ organizations:
- "HuggingFaceTB"
- "mistralai"
- "Qwen"
- "briaai"
- "unsloth"
- "NousResearch"
- "allenai"

View File

@@ -133,7 +133,6 @@ def test_is_whitelisted(telemetry_manager_class, mock_whitelist):
assert manager._is_whitelisted("META-LLAMA/Llama-7B")
# Should handle empty input
assert not manager._is_whitelisted("")
assert not manager._is_whitelisted(None)
def test_system_info_collection(manager):