update error file path sanitization function; adding more error tracking
This commit is contained in:
@@ -3,4 +3,44 @@ title: Telemetry
|
|||||||
description: A description of the opt-out telemetry implementation in Axolotl.
|
description: A description of the opt-out telemetry implementation in Axolotl.
|
||||||
---
|
---
|
||||||
|
|
||||||
TODO.
|
# Telemetry in Axolotl
|
||||||
|
|
||||||
|
Axolotl implements anonymous telemetry to help maintainers understand how the library
|
||||||
|
is used and where users encounter issues. This data helps prioritize features, optimize
|
||||||
|
performance, and fix bugs.
|
||||||
|
|
||||||
|
## Data Collection
|
||||||
|
|
||||||
|
We collect:
|
||||||
|
|
||||||
|
- **System info**: OS, Python version, PyTorch version, Transformers version, Axolotl version
|
||||||
|
- **Hardware info**: CPU count, memory, GPU count and models
|
||||||
|
- **Usage patterns**: Models (from a whitelist) and configurations used
|
||||||
|
- **Error tracking**: Stack traces and error messages (sanitized to remove personal information)
|
||||||
|
|
||||||
|
No personally identifiable information (PII) is collected.
|
||||||
|
|
||||||
|
## Implementation
|
||||||
|
|
||||||
|
Telemetry is implemented using PostHog and consists of:
|
||||||
|
|
||||||
|
1. `axolotl.telemetry.TelemetryManager`: A singleton class that initializes the telemetry system and provides methods for tracking events.
|
||||||
|
2. `axolotl.telemetry.errors.track_errors`: A decorator that captures exceptions and sends sanitized stack traces.
|
||||||
|
|
||||||
|
## Opt-Out Mechanism
|
||||||
|
|
||||||
|
Telemetry is **enabled by default** on an opt-out basis. To disable it, set either:
|
||||||
|
|
||||||
|
- `AXOLOTL_DO_NOT_TRACK=1` (Axolotl-specific)
|
||||||
|
- `DO_NOT_TRACK=1` (Global standard)
|
||||||
|
|
||||||
|
To acknowledge and explicitly enable telemetry (and remove the warning message), set:
|
||||||
|
`AXOLOTL_DO_NOT_TRACK=0`
|
||||||
|
|
||||||
|
## Privacy
|
||||||
|
|
||||||
|
- Stack traces are sanitized to remove personal file paths, keeping only the Axolotl code paths
|
||||||
|
- Each run generates a unique anonymous ID
|
||||||
|
- Only whitelisted organization information is tracked
|
||||||
|
- See `axolotl/telemetry/whitelist.yaml` for the set of whitelisted organizations
|
||||||
|
- Telemetry is only sent from the main process to avoid duplicate events
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ from transformers.utils import is_torch_bf16_gpu_available
|
|||||||
|
|
||||||
from axolotl.integrations.base import PluginManager
|
from axolotl.integrations.base import PluginManager
|
||||||
from axolotl.telemetry import TelemetryManager
|
from axolotl.telemetry import TelemetryManager
|
||||||
from axolotl.telemetry.manager import track_errors
|
from axolotl.telemetry.errors import send_errors
|
||||||
from axolotl.utils.comet_ import setup_comet_env_vars
|
from axolotl.utils.comet_ import setup_comet_env_vars
|
||||||
from axolotl.utils.config import (
|
from axolotl.utils.config import (
|
||||||
normalize_cfg_datasets,
|
normalize_cfg_datasets,
|
||||||
@@ -156,7 +156,7 @@ def prepare_plugins(cfg: DictDefault):
|
|||||||
plugin_manager.register(plugin_name)
|
plugin_manager.register(plugin_name)
|
||||||
|
|
||||||
|
|
||||||
@track_errors
|
@send_errors
|
||||||
def load_cfg(config: Union[str, Path] = Path("examples/"), **kwargs) -> DictDefault:
|
def load_cfg(config: Union[str, Path] = Path("examples/"), **kwargs) -> DictDefault:
|
||||||
"""
|
"""
|
||||||
Loads the `axolotl` configuration stored at `config`, validates it, and performs
|
Loads the `axolotl` configuration stored at `config`, validates it, and performs
|
||||||
@@ -177,7 +177,7 @@ def load_cfg(config: Union[str, Path] = Path("examples/"), **kwargs) -> DictDefa
|
|||||||
with open(config, encoding="utf-8") as file:
|
with open(config, encoding="utf-8") as file:
|
||||||
cfg: DictDefault = DictDefault(yaml.safe_load(file))
|
cfg: DictDefault = DictDefault(yaml.safe_load(file))
|
||||||
|
|
||||||
TELEMETRY_MANAGER.track_event(event_type="config-loaded", properties=cfg)
|
TELEMETRY_MANAGER.send_event(event_type="config-loaded", properties=cfg)
|
||||||
|
|
||||||
# If there are any options passed in the cli, if it is something that seems valid
|
# If there are any options passed in the cli, if it is something that seems valid
|
||||||
# from the yaml, then overwrite the value
|
# from the yaml, then overwrite the value
|
||||||
@@ -221,6 +221,6 @@ def load_cfg(config: Union[str, Path] = Path("examples/"), **kwargs) -> DictDefa
|
|||||||
setup_mlflow_env_vars(cfg)
|
setup_mlflow_env_vars(cfg)
|
||||||
setup_comet_env_vars(cfg)
|
setup_comet_env_vars(cfg)
|
||||||
|
|
||||||
TELEMETRY_MANAGER.track_event(event_type="config-processed", properties=cfg)
|
TELEMETRY_MANAGER.send_event(event_type="config-processed", properties=cfg)
|
||||||
|
|
||||||
return cfg
|
return cfg
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ from axolotl.cli.args import InferenceCliArgs
|
|||||||
from axolotl.cli.art import print_axolotl_text_art
|
from axolotl.cli.art import print_axolotl_text_art
|
||||||
from axolotl.cli.config import load_cfg
|
from axolotl.cli.config import load_cfg
|
||||||
from axolotl.cli.utils import load_model_and_tokenizer
|
from axolotl.cli.utils import load_model_and_tokenizer
|
||||||
|
from axolotl.telemetry.errors import send_errors
|
||||||
from axolotl.utils.chat_templates import (
|
from axolotl.utils.chat_templates import (
|
||||||
get_chat_template,
|
get_chat_template,
|
||||||
get_chat_template_from_config,
|
get_chat_template_from_config,
|
||||||
@@ -42,6 +43,7 @@ def get_multi_line_input() -> str:
|
|||||||
return instruction
|
return instruction
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def do_inference(
|
def do_inference(
|
||||||
*,
|
*,
|
||||||
cfg: DictDefault,
|
cfg: DictDefault,
|
||||||
@@ -135,6 +137,7 @@ def do_inference(
|
|||||||
print(tokenizer.decode(generated["sequences"].cpu().tolist()[0]))
|
print(tokenizer.decode(generated["sequences"].cpu().tolist()[0]))
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def do_inference_gradio(
|
def do_inference_gradio(
|
||||||
*,
|
*,
|
||||||
cfg: DictDefault,
|
cfg: DictDefault,
|
||||||
|
|||||||
@@ -12,11 +12,13 @@ from axolotl.cli.args import TrainerCliArgs
|
|||||||
from axolotl.cli.art import print_axolotl_text_art
|
from axolotl.cli.art import print_axolotl_text_art
|
||||||
from axolotl.cli.config import load_cfg
|
from axolotl.cli.config import load_cfg
|
||||||
from axolotl.cli.utils import load_model_and_tokenizer
|
from axolotl.cli.utils import load_model_and_tokenizer
|
||||||
|
from axolotl.telemetry.errors import send_errors
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def do_merge_lora(*, cfg: DictDefault) -> None:
|
def do_merge_lora(*, cfg: DictDefault) -> None:
|
||||||
"""
|
"""
|
||||||
Calls `transformers`' `merge_and_unload` on the model given in the `axolotl` config
|
Calls `transformers`' `merge_and_unload` on the model given in the `axolotl` config
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ from torch.distributed.checkpoint.format_utils import _EmptyStateDictLoadPlanner
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.cli.art import print_axolotl_text_art
|
from axolotl.cli.art import print_axolotl_text_art
|
||||||
from axolotl.cli.config import load_cfg
|
from axolotl.cli.config import load_cfg
|
||||||
|
from axolotl.telemetry.errors import send_errors
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -120,6 +121,7 @@ def _distributed_checkpoint_to_merged_weights(
|
|||||||
return save_path_
|
return save_path_
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def merge_fsdp_weights(
|
def merge_fsdp_weights(
|
||||||
checkpoint_dir: str,
|
checkpoint_dir: str,
|
||||||
output_path: str,
|
output_path: str,
|
||||||
|
|||||||
@@ -18,12 +18,14 @@ from axolotl.cli.checks import check_accelerate_default_config, check_user_token
|
|||||||
from axolotl.cli.config import load_cfg
|
from axolotl.cli.config import load_cfg
|
||||||
from axolotl.common.const import DEFAULT_DATASET_PREPARED_PATH
|
from axolotl.common.const import DEFAULT_DATASET_PREPARED_PATH
|
||||||
from axolotl.common.datasets import load_datasets, load_preference_datasets
|
from axolotl.common.datasets import load_datasets, load_preference_datasets
|
||||||
|
from axolotl.telemetry.errors import send_errors
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
from axolotl.utils.trainer import disable_datasets_caching
|
from axolotl.utils.trainer import disable_datasets_caching
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def do_preprocess(cfg: DictDefault, cli_args: PreprocessCliArgs) -> None:
|
def do_preprocess(cfg: DictDefault, cli_args: PreprocessCliArgs) -> None:
|
||||||
"""
|
"""
|
||||||
Preprocesses dataset specified in axolotl config.
|
Preprocesses dataset specified in axolotl config.
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ from datasets import Dataset
|
|||||||
|
|
||||||
import axolotl.monkeypatch.data.batch_dataset_fetcher # pylint: disable=unused-import # noqa: F401
|
import axolotl.monkeypatch.data.batch_dataset_fetcher # pylint: disable=unused-import # noqa: F401
|
||||||
from axolotl.cli.args import PreprocessCliArgs, TrainerCliArgs
|
from axolotl.cli.args import PreprocessCliArgs, TrainerCliArgs
|
||||||
|
from axolotl.telemetry.errors import send_errors
|
||||||
from axolotl.utils.data import prepare_dataset
|
from axolotl.utils.data import prepare_dataset
|
||||||
from axolotl.utils.data.rl import load_prepare_preference_datasets
|
from axolotl.utils.data.rl import load_prepare_preference_datasets
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
@@ -44,6 +45,7 @@ def sample_dataset(dataset: Dataset, num_samples: int) -> Dataset:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def load_datasets(
|
def load_datasets(
|
||||||
*,
|
*,
|
||||||
cfg: DictDefault,
|
cfg: DictDefault,
|
||||||
@@ -103,6 +105,7 @@ def load_datasets(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def load_preference_datasets(
|
def load_preference_datasets(
|
||||||
*,
|
*,
|
||||||
cfg: DictDefault,
|
cfg: DictDefault,
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import torch
|
|||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
|
|
||||||
from axolotl.logging_config import configure_logging
|
from axolotl.logging_config import configure_logging
|
||||||
|
from axolotl.telemetry.errors import send_errors
|
||||||
from axolotl.train import TrainDatasetMeta
|
from axolotl.train import TrainDatasetMeta
|
||||||
from axolotl.utils import set_pytorch_cuda_alloc_conf
|
from axolotl.utils import set_pytorch_cuda_alloc_conf
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
@@ -61,6 +62,7 @@ def evaluate_dataset(
|
|||||||
return metrics
|
return metrics
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def evaluate(*, cfg: DictDefault, dataset_meta: TrainDatasetMeta) -> Dict[str, float]:
|
def evaluate(*, cfg: DictDefault, dataset_meta: TrainDatasetMeta) -> Dict[str, float]:
|
||||||
"""
|
"""
|
||||||
Evaluate a model on training and validation datasets
|
Evaluate a model on training and validation datasets
|
||||||
|
|||||||
112
src/axolotl/telemetry/errors.py
Normal file
112
src/axolotl/telemetry/errors.py
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
"""Telemetry utilities for exception and traceback information."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import traceback
|
||||||
|
from functools import wraps
|
||||||
|
from inspect import getmodule
|
||||||
|
from typing import Any, Callable
|
||||||
|
|
||||||
|
from axolotl.telemetry.manager import TelemetryManager
|
||||||
|
|
||||||
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
ERROR_HANDLED = False
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_stack_trace(stack_trace: str) -> str:
|
||||||
|
"""
|
||||||
|
Remove personal information from stack trace messages while keeping Axolotl codepaths.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
stack_trace: The original stack trace string.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A sanitized version of the stack trace with only axolotl paths preserved.
|
||||||
|
"""
|
||||||
|
# Split the stack trace into lines to process each file path separately
|
||||||
|
lines = stack_trace.split("\n")
|
||||||
|
sanitized_lines = []
|
||||||
|
|
||||||
|
# Regular expression to find file paths in the stack trace
|
||||||
|
path_pattern = re.compile(r'(?:File ")(.*?)(?:")')
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
# Check if this line contains a file path
|
||||||
|
path_match = path_pattern.search(line)
|
||||||
|
|
||||||
|
if path_match:
|
||||||
|
full_path = path_match.group(1)
|
||||||
|
|
||||||
|
if "axolotl/" in full_path:
|
||||||
|
# Keep only the 'axolotl' part and onward
|
||||||
|
axolotl_idx = full_path.rfind("axolotl/")
|
||||||
|
if axolotl_idx >= 0:
|
||||||
|
# Replace the original path with the sanitized one
|
||||||
|
sanitized_path = full_path[axolotl_idx:]
|
||||||
|
line = line.replace(full_path, sanitized_path)
|
||||||
|
else:
|
||||||
|
# For non-axolotl paths, replace with an empty string or a placeholder
|
||||||
|
line = line.replace(full_path, "")
|
||||||
|
|
||||||
|
sanitized_lines.append(line)
|
||||||
|
|
||||||
|
return "\n".join(sanitized_lines)
|
||||||
|
|
||||||
|
|
||||||
|
def send_errors(func: Callable) -> Callable:
|
||||||
|
"""
|
||||||
|
Decorator to send exception info in a function. If an exception is raised, we send
|
||||||
|
telemetry containing the stack trace and error message.
|
||||||
|
|
||||||
|
If an error occurs in a decorated function that is called by another decorated
|
||||||
|
function, we'll only send telemetry corresponding to the lower-level function.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
func: Function to decorate.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Decorated function.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@wraps(func)
|
||||||
|
def wrapper(*args, **kwargs) -> Any:
|
||||||
|
telemetry_manager = TelemetryManager.get_instance()
|
||||||
|
if not telemetry_manager.enabled:
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
|
||||||
|
try:
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
except Exception as exception:
|
||||||
|
# Only track if we're not already handling an error. This prevents us from
|
||||||
|
# capturing an error more than once in nested decorated function calls.
|
||||||
|
global ERROR_HANDLED # pylint: disable=global-statement
|
||||||
|
if not ERROR_HANDLED:
|
||||||
|
ERROR_HANDLED = True
|
||||||
|
|
||||||
|
# Get function module path
|
||||||
|
module = getmodule(func)
|
||||||
|
module_path = (
|
||||||
|
f"{module.__name__}.{func.__name__}" if module else func.__name__
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get stack trace
|
||||||
|
stack_trace = "".join(
|
||||||
|
traceback.format_exception(
|
||||||
|
type(exception), exception, exception.__traceback__
|
||||||
|
)
|
||||||
|
)
|
||||||
|
stack_trace = sanitize_stack_trace(stack_trace)
|
||||||
|
|
||||||
|
# Send error telemetry
|
||||||
|
telemetry_manager.send_event(
|
||||||
|
event_type=f"{module_path}-error",
|
||||||
|
properties={
|
||||||
|
"exception": str(exception),
|
||||||
|
"stack_trace": stack_trace,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
raise
|
||||||
|
|
||||||
|
return wrapper
|
||||||
@@ -5,13 +5,10 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import time
|
import time
|
||||||
import traceback
|
|
||||||
import uuid
|
import uuid
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from functools import wraps
|
|
||||||
from inspect import getmodule
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Callable
|
from typing import Any
|
||||||
|
|
||||||
import posthog
|
import posthog
|
||||||
import psutil
|
import psutil
|
||||||
@@ -24,8 +21,8 @@ from axolotl.utils.distributed import is_main_process
|
|||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
POSTHOG_WRITE_KEY = "phc_RbAa7Bxu6TLIN9xd8gbg1PLemrStaymi8pxQbRbIwfC"
|
POSTHOG_WRITE_KEY = "phc_1kUR0o04oJKKTTeSsIz2Mfm5mpiVsQEf2WOlzljMD7y"
|
||||||
ENABLED_WARNING_SLEEP_SECONDS = 10
|
ENABLED_WARNING_SLEEP_SECONDS = 15
|
||||||
ENABLED_WARNING = (
|
ENABLED_WARNING = (
|
||||||
"\nTelemetry is enabled. This helps Axolotl's maintainers by providing insights into:\n"
|
"\nTelemetry is enabled. This helps Axolotl's maintainers by providing insights into:\n"
|
||||||
"- Which models and configurations are most commonly used\n"
|
"- Which models and configurations are most commonly used\n"
|
||||||
@@ -166,18 +163,6 @@ class TelemetryManager:
|
|||||||
"""Remove personal information from file paths"""
|
"""Remove personal information from file paths"""
|
||||||
return Path(path).name
|
return Path(path).name
|
||||||
|
|
||||||
def _sanitize_error(self, error: str) -> str:
|
|
||||||
"""Remove personal information from error messages"""
|
|
||||||
# Replace file paths with just filename
|
|
||||||
sanitized = error
|
|
||||||
try:
|
|
||||||
for path in Path(error).parents:
|
|
||||||
sanitized = sanitized.replace(str(path), "")
|
|
||||||
except (ValueError, RuntimeError) as e:
|
|
||||||
LOG.debug(f"Could not parse path in error message: {e}")
|
|
||||||
|
|
||||||
return sanitized
|
|
||||||
|
|
||||||
def _get_system_info(self) -> dict[str, Any]:
|
def _get_system_info(self) -> dict[str, Any]:
|
||||||
"""Collect system information"""
|
"""Collect system information"""
|
||||||
gpu_info = []
|
gpu_info = []
|
||||||
@@ -202,8 +187,8 @@ class TelemetryManager:
|
|||||||
"gpu_info": gpu_info,
|
"gpu_info": gpu_info,
|
||||||
}
|
}
|
||||||
|
|
||||||
def track_event(self, event_type: str, properties: dict[str, Any] | None = None):
|
def send_event(self, event_type: str, properties: dict[str, Any] | None = None):
|
||||||
"""Track a telemetry event"""
|
"""Send a telemetry event"""
|
||||||
if not self.enabled:
|
if not self.enabled:
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -218,63 +203,16 @@ class TelemetryManager:
|
|||||||
posthog.capture(
|
posthog.capture(
|
||||||
distinct_id=self.run_id,
|
distinct_id=self.run_id,
|
||||||
event=event_type,
|
event=event_type,
|
||||||
properties={
|
properties=properties,
|
||||||
"system_info": self.system_info,
|
|
||||||
**properties,
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
except Exception as e: # pylint: disable=broad-exception-caught
|
except Exception as e: # pylint: disable=broad-exception-caught
|
||||||
LOG.warning(f"Failed to send telemetry event: {e}")
|
LOG.warning(f"Failed to send telemetry event: {e}")
|
||||||
|
|
||||||
|
def send_system_info(self):
|
||||||
|
"""Helper method for sending system info"""
|
||||||
|
self.send_event(event_type="system-info", properties=self.system_info)
|
||||||
|
|
||||||
def shutdown(self):
|
def shutdown(self):
|
||||||
"""Ensure all queued events are processed before shutdown"""
|
"""Ensure all queued events are processed before shutdown"""
|
||||||
if self.enabled:
|
if self.enabled:
|
||||||
posthog.flush()
|
posthog.flush()
|
||||||
|
|
||||||
|
|
||||||
ERROR_HANDLED = False
|
|
||||||
|
|
||||||
|
|
||||||
def track_errors(func: Callable) -> Callable:
|
|
||||||
"""Decorator to track errors in a function"""
|
|
||||||
|
|
||||||
@wraps(func)
|
|
||||||
def wrapper(*args, **kwargs) -> Any:
|
|
||||||
telemetry_manager = TelemetryManager.get_instance()
|
|
||||||
if not telemetry_manager.enabled:
|
|
||||||
return func(*args, **kwargs)
|
|
||||||
|
|
||||||
try:
|
|
||||||
return func(*args, **kwargs)
|
|
||||||
except Exception as exception:
|
|
||||||
# Only track if we're not already handling an error. This prevents us from
|
|
||||||
# capturing an error more than once in nested decorated function calls.
|
|
||||||
global ERROR_HANDLED # pylint: disable=global-statement
|
|
||||||
if not ERROR_HANDLED:
|
|
||||||
ERROR_HANDLED = True
|
|
||||||
|
|
||||||
# Get function module path
|
|
||||||
module = getmodule(func)
|
|
||||||
module_path = (
|
|
||||||
f"{module.__name__}.{func.__name__}" if module else func.__name__
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get stack trace
|
|
||||||
stack_trace = "".join(
|
|
||||||
traceback.format_exception(
|
|
||||||
type(exception), exception, exception.__traceback__
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Send error telemetry
|
|
||||||
telemetry_manager.track_event(
|
|
||||||
event_type=f"{module_path}-error",
|
|
||||||
properties={
|
|
||||||
"exception": str(exception),
|
|
||||||
"stack_trace": stack_trace,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
raise
|
|
||||||
|
|
||||||
return wrapper
|
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ from axolotl.contribs.lgpl.unsloth import ( # pylint: disable = no-name-in-modu
|
|||||||
)
|
)
|
||||||
from axolotl.logging_config import configure_logging
|
from axolotl.logging_config import configure_logging
|
||||||
from axolotl.telemetry import TelemetryManager
|
from axolotl.telemetry import TelemetryManager
|
||||||
from axolotl.telemetry.manager import track_errors
|
from axolotl.telemetry.errors import send_errors
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
from axolotl.utils.freeze import freeze_layers_except
|
from axolotl.utils.freeze import freeze_layers_except
|
||||||
from axolotl.utils.models import load_model, load_processor, load_tokenizer
|
from axolotl.utils.models import load_model, load_processor, load_tokenizer
|
||||||
@@ -44,7 +44,7 @@ LOG = get_logger(__name__)
|
|||||||
TELEMETRY_MANAGER = TelemetryManager.get_instance()
|
TELEMETRY_MANAGER = TelemetryManager.get_instance()
|
||||||
|
|
||||||
|
|
||||||
@track_errors
|
@send_errors
|
||||||
def train(
|
def train(
|
||||||
*, cfg: DictDefault, dataset_meta: TrainDatasetMeta
|
*, cfg: DictDefault, dataset_meta: TrainDatasetMeta
|
||||||
) -> Tuple[Union[PeftModel, PreTrainedModel], PreTrainedTokenizer]:
|
) -> Tuple[Union[PeftModel, PreTrainedModel], PreTrainedTokenizer]:
|
||||||
@@ -89,11 +89,11 @@ def train(
|
|||||||
if model.generation_config is not None:
|
if model.generation_config is not None:
|
||||||
model.generation_config.do_sample = True
|
model.generation_config.do_sample = True
|
||||||
|
|
||||||
TELEMETRY_MANAGER.track_event(
|
TELEMETRY_MANAGER.send_event(
|
||||||
event_type="model-load", properties=model.config.to_dict()
|
event_type="model-load", properties=model.config.to_dict()
|
||||||
)
|
)
|
||||||
if peft_config:
|
if peft_config:
|
||||||
TELEMETRY_MANAGER.track_event(
|
TELEMETRY_MANAGER.send_event(
|
||||||
event_type="peft-config-load", properties=peft_config.to_dict()
|
event_type="peft-config-load", properties=peft_config.to_dict()
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -187,7 +187,7 @@ def train(
|
|||||||
if cfg.group_by_length:
|
if cfg.group_by_length:
|
||||||
LOG.info("hang tight... sorting dataset for group_by_length")
|
LOG.info("hang tight... sorting dataset for group_by_length")
|
||||||
|
|
||||||
TELEMETRY_MANAGER.track_event(event_type="train-start")
|
TELEMETRY_MANAGER.send_event(event_type="train-start")
|
||||||
|
|
||||||
pretrain_hooks(cfg, trainer)
|
pretrain_hooks(cfg, trainer)
|
||||||
|
|
||||||
@@ -204,7 +204,7 @@ def train(
|
|||||||
|
|
||||||
post_train_hooks(cfg, trainer)
|
post_train_hooks(cfg, trainer)
|
||||||
|
|
||||||
TELEMETRY_MANAGER.track_event(event_type="train-end")
|
TELEMETRY_MANAGER.send_event(event_type="train-end")
|
||||||
|
|
||||||
LOG.info(f"Training Completed!!! Saving pre-trained model to {cfg.output_dir}")
|
LOG.info(f"Training Completed!!! Saving pre-trained model to {cfg.output_dir}")
|
||||||
|
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ from axolotl.monkeypatch.multipack import (
|
|||||||
patch_for_multipack,
|
patch_for_multipack,
|
||||||
)
|
)
|
||||||
from axolotl.prompt_tokenizers import LLAMA_DEFAULT_EOS_TOKEN
|
from axolotl.prompt_tokenizers import LLAMA_DEFAULT_EOS_TOKEN
|
||||||
from axolotl.telemetry.manager import track_errors
|
from axolotl.telemetry.errors import send_errors
|
||||||
from axolotl.utils.bench import log_gpu_memory_usage
|
from axolotl.utils.bench import log_gpu_memory_usage
|
||||||
from axolotl.utils.chat_templates import get_chat_template_from_config
|
from axolotl.utils.chat_templates import get_chat_template_from_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
@@ -166,7 +166,7 @@ def load_model_config(cfg):
|
|||||||
return model_config
|
return model_config
|
||||||
|
|
||||||
|
|
||||||
@track_errors
|
@send_errors
|
||||||
def load_tokenizer(cfg):
|
def load_tokenizer(cfg):
|
||||||
model_config = load_model_config(cfg)
|
model_config = load_model_config(cfg)
|
||||||
tokenizer_kwargs = {}
|
tokenizer_kwargs = {}
|
||||||
@@ -320,7 +320,7 @@ def load_tokenizer(cfg):
|
|||||||
return tokenizer
|
return tokenizer
|
||||||
|
|
||||||
|
|
||||||
@track_errors
|
@send_errors
|
||||||
def load_processor(cfg: DictDefault, tokenizer: PreTrainedTokenizerBase):
|
def load_processor(cfg: DictDefault, tokenizer: PreTrainedTokenizerBase):
|
||||||
processor_kwargs: Dict[str, Any] = {} # do we actually need this?
|
processor_kwargs: Dict[str, Any] = {} # do we actually need this?
|
||||||
|
|
||||||
@@ -1195,7 +1195,7 @@ class ModelLoader:
|
|||||||
return self.model, lora_config
|
return self.model, lora_config
|
||||||
|
|
||||||
|
|
||||||
@track_errors
|
@send_errors
|
||||||
def load_model(
|
def load_model(
|
||||||
cfg: DictDefault,
|
cfg: DictDefault,
|
||||||
tokenizer: PreTrainedTokenizerBase,
|
tokenizer: PreTrainedTokenizerBase,
|
||||||
@@ -1217,7 +1217,7 @@ def load_model(
|
|||||||
return loader.load_model()
|
return loader.load_model()
|
||||||
|
|
||||||
|
|
||||||
@track_errors
|
@send_errors
|
||||||
def load_adapter(model, cfg, adapter, inference=False):
|
def load_adapter(model, cfg, adapter, inference=False):
|
||||||
# type: (PreTrainedModel, DictDefault, Optional[str], bool) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
|
# type: (PreTrainedModel, DictDefault, Optional[str], bool) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user