minor fixes
This commit is contained in:
committed by
Dan Saunders
parent
b8ec5bdccf
commit
2d36c11264
@@ -21,7 +21,7 @@ version, etc.
|
|||||||
- Error tracking: Stack traces and error messages (sanitized to remove personal
|
- Error tracking: Stack traces and error messages (sanitized to remove personal
|
||||||
information)
|
information)
|
||||||
|
|
||||||
No personally identifiable information (PII) is collected.
|
Personally identifiable information (PII) is not collected.
|
||||||
|
|
||||||
## Implementation
|
## Implementation
|
||||||
|
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ LOG = logging.getLogger(__name__)
|
|||||||
POSTHOG_HOST = "https://app.posthog.com"
|
POSTHOG_HOST = "https://app.posthog.com"
|
||||||
POSTHOG_WRITE_KEY = "phc_1kUR0o04oJKKTTeSsIz2Mfm5mpiVsQEf2WOlzljMD7y"
|
POSTHOG_WRITE_KEY = "phc_1kUR0o04oJKKTTeSsIz2Mfm5mpiVsQEf2WOlzljMD7y"
|
||||||
|
|
||||||
OPT_IN_WARNING_SLEEP_SECONDS = 15
|
OPT_IN_WARNING_SLEEP_SECONDS = 10
|
||||||
OPT_IN_INFO = (
|
OPT_IN_INFO = (
|
||||||
"\nTelemetry is currently disabled by default. If you'd like to help improve "
|
"\nTelemetry is currently disabled by default. If you'd like to help improve "
|
||||||
"Axolotl, consider enabling it by setting AXOLOTL_DO_NOT_TRACK=0 in your environment.\n\n"
|
"Axolotl, consider enabling it by setting AXOLOTL_DO_NOT_TRACK=0 in your environment.\n\n"
|
||||||
@@ -28,7 +28,7 @@ OPT_IN_INFO = (
|
|||||||
"- Which features are most used\n"
|
"- Which features are most used\n"
|
||||||
"- What hardware configurations to prioritize\n"
|
"- What hardware configurations to prioritize\n"
|
||||||
"- Where users encounter errors\n\n"
|
"- Where users encounter errors\n\n"
|
||||||
"No personally identifiable information is collected.\n\n"
|
"Personally identifiable information (PII) is not collected.\n\n"
|
||||||
"To remove this warning, explicitly set AXOLOTL_DO_NOT_TRACK=0 (enable telemetry) "
|
"To remove this warning, explicitly set AXOLOTL_DO_NOT_TRACK=0 (enable telemetry) "
|
||||||
"or AXOLOTL_DO_NOT_TRACK=1 (explicitly disable telemetry).\n\n"
|
"or AXOLOTL_DO_NOT_TRACK=1 (explicitly disable telemetry).\n\n"
|
||||||
"Note: Telemetry will move to an opt-out in a later release.\n\n"
|
"Note: Telemetry will move to an opt-out in a later release.\n\n"
|
||||||
@@ -210,17 +210,31 @@ class TelemetryManager:
|
|||||||
def _load_whitelist(self) -> dict:
|
def _load_whitelist(self) -> dict:
|
||||||
"""Load HuggingFace Hub organization whitelist"""
|
"""Load HuggingFace Hub organization whitelist"""
|
||||||
with open(WHITELIST_PATH, encoding="utf-8") as f:
|
with open(WHITELIST_PATH, encoding="utf-8") as f:
|
||||||
return yaml.safe_load(f)
|
whitelist = yaml.safe_load(f)
|
||||||
|
|
||||||
def _is_whitelisted(self, base_model: str) -> bool:
|
# Send org strings to lowercase since model names are case insensitive
|
||||||
"""Check if model org is in whitelist"""
|
whitelist["organizations"] = {
|
||||||
if not base_model:
|
org.lower() for org in whitelist["organizations"]
|
||||||
return False
|
}
|
||||||
|
|
||||||
base_model = base_model.lower()
|
return whitelist
|
||||||
return any(
|
|
||||||
org.lower() in base_model for org in self.whitelist.get("organizations", [])
|
def _is_whitelisted(self, value: str) -> bool:
|
||||||
)
|
"""
|
||||||
|
Check if model / dataset / etc. org is in whitelist.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
value: Value for one of FIELDS_WITH_ORGS ("base_model", etc.).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Boolean indicating whitelist membership.
|
||||||
|
"""
|
||||||
|
# NOTE: This membership-checking logic can be improved.
|
||||||
|
# What happens when a local model path matches a whitelisted org?
|
||||||
|
org = value.split("/")[0]
|
||||||
|
whitelisted = org.lower() in self.whitelist["organizations"]
|
||||||
|
|
||||||
|
return whitelisted
|
||||||
|
|
||||||
def _init_posthog(self):
|
def _init_posthog(self):
|
||||||
"""Initialize PostHog client"""
|
"""Initialize PostHog client"""
|
||||||
@@ -247,8 +261,7 @@ class TelemetryManager:
|
|||||||
if isinstance(key, str) and isinstance(value, str):
|
if isinstance(key, str) and isinstance(value, str):
|
||||||
# Fields that should be redacted if org is not whitelisted
|
# Fields that should be redacted if org is not whitelisted
|
||||||
if key in FIELDS_WITH_ORGS:
|
if key in FIELDS_WITH_ORGS:
|
||||||
org = value.split("/")[0]
|
if not self._is_whitelisted(value):
|
||||||
if org not in self.whitelist["organizations"]:
|
|
||||||
return "[REDACTED]"
|
return "[REDACTED]"
|
||||||
|
|
||||||
# Other redaction special cases
|
# Other redaction special cases
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ organizations:
|
|||||||
- "HuggingFaceTB"
|
- "HuggingFaceTB"
|
||||||
- "mistralai"
|
- "mistralai"
|
||||||
- "Qwen"
|
- "Qwen"
|
||||||
- "briaai"
|
|
||||||
- "unsloth"
|
- "unsloth"
|
||||||
- "NousResearch"
|
- "NousResearch"
|
||||||
- "allenai"
|
- "allenai"
|
||||||
|
|||||||
@@ -133,7 +133,6 @@ def test_is_whitelisted(telemetry_manager_class, mock_whitelist):
|
|||||||
assert manager._is_whitelisted("META-LLAMA/Llama-7B")
|
assert manager._is_whitelisted("META-LLAMA/Llama-7B")
|
||||||
# Should handle empty input
|
# Should handle empty input
|
||||||
assert not manager._is_whitelisted("")
|
assert not manager._is_whitelisted("")
|
||||||
assert not manager._is_whitelisted(None)
|
|
||||||
|
|
||||||
|
|
||||||
def test_system_info_collection(manager):
|
def test_system_info_collection(manager):
|
||||||
|
|||||||
Reference in New Issue
Block a user