lint
This commit is contained in:
@@ -11,12 +11,11 @@ from accelerate.logging import get_logger
|
||||
from datasets import Dataset
|
||||
from transformers.trainer import Trainer
|
||||
|
||||
from axolotl.telemetry.errors import send_errors
|
||||
from axolotl.train import (
|
||||
TrainDatasetMeta,
|
||||
setup_model_and_tokenizer,
|
||||
)
|
||||
from axolotl.telemetry.errors import send_errors
|
||||
from axolotl.train import TrainDatasetMeta
|
||||
from axolotl.utils.dict import DictDefault
|
||||
from axolotl.utils.distributed import cleanup_distributed
|
||||
from axolotl.utils.trainer import setup_trainer
|
||||
|
||||
@@ -19,6 +19,7 @@ from peft import (
|
||||
from transformers import PreTrainedModel
|
||||
|
||||
from axolotl.loaders.utils import get_linear_embedding_layers
|
||||
from axolotl.telemetry.errors import send_errors
|
||||
from axolotl.utils.dict import DictDefault
|
||||
from axolotl.utils.logging import get_logger
|
||||
|
||||
|
||||
@@ -46,6 +46,7 @@ from axolotl.loaders.utils import (
|
||||
load_model_config,
|
||||
)
|
||||
from axolotl.models.mamba import fix_mamba_attn_for_loss
|
||||
from axolotl.telemetry.errors import send_errors
|
||||
from axolotl.utils.bench import log_gpu_memory_usage
|
||||
from axolotl.utils.dict import DictDefault
|
||||
from axolotl.utils.distributed import (
|
||||
|
||||
@@ -8,6 +8,7 @@ from transformers import (
|
||||
PreTrainedTokenizerBase,
|
||||
)
|
||||
|
||||
from axolotl.telemetry.errors import send_errors
|
||||
from axolotl.utils.dict import DictDefault
|
||||
from axolotl.utils.logging import get_logger
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ from transformers import (
|
||||
from axolotl.integrations.base import PluginManager
|
||||
from axolotl.loaders.utils import get_linear_embedding_layers, load_model_config
|
||||
from axolotl.prompt_tokenizers import LLAMA_DEFAULT_EOS_TOKEN
|
||||
from axolotl.telemetry.errors import send_errors
|
||||
from axolotl.utils.chat_templates import get_chat_template_from_config
|
||||
from axolotl.utils.distributed import (
|
||||
barrier,
|
||||
|
||||
@@ -59,12 +59,14 @@ class TelemetryCallback(TrainerCallback):
|
||||
self.telemetry_manager.send_event(
|
||||
event_type="train-end",
|
||||
properties={
|
||||
"loss": state.log_history[-1].get("loss", 0)
|
||||
if state.log_history
|
||||
else None,
|
||||
"learning_rate": state.log_history[-1].get("learning_rate", 0)
|
||||
if state.log_history
|
||||
else None,
|
||||
"loss": (
|
||||
state.log_history[-1].get("loss", 0) if state.log_history else None
|
||||
),
|
||||
"learning_rate": (
|
||||
state.log_history[-1].get("learning_rate", 0)
|
||||
if state.log_history
|
||||
else None
|
||||
),
|
||||
}
|
||||
| self.tracker.metrics.to_dict(),
|
||||
)
|
||||
|
||||
@@ -307,9 +307,11 @@ class TelemetryManager:
|
||||
gpu_info.append(
|
||||
{
|
||||
"name": torch.hip.get_device_name(i),
|
||||
"memory": torch.hip.get_device_properties(i).total_memory
|
||||
if hasattr(torch.hip, "get_device_properties")
|
||||
else None,
|
||||
"memory": (
|
||||
torch.hip.get_device_properties(i).total_memory
|
||||
if hasattr(torch.hip, "get_device_properties")
|
||||
else None
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@@ -202,8 +202,8 @@ class RuntimeMetricsTracker:
|
||||
memory_used = self._get_allocated_memory()
|
||||
for i, memory in memory_used.items():
|
||||
memory_metrics[f"gpu_{i}_memory_bytes"] = memory
|
||||
memory_metrics[
|
||||
f"gpu_{i}_peak_memory_bytes"
|
||||
] = self.metrics.peak_gpu_memory.get(i, 0)
|
||||
memory_metrics[f"gpu_{i}_peak_memory_bytes"] = (
|
||||
self.metrics.peak_gpu_memory.get(i, 0)
|
||||
)
|
||||
|
||||
return memory_metrics
|
||||
|
||||
@@ -33,7 +33,6 @@ from axolotl.loaders import (
|
||||
load_tokenizer,
|
||||
)
|
||||
from axolotl.telemetry.errors import send_errors
|
||||
from axolotl.telemetry.errors import send_errors
|
||||
from axolotl.telemetry.manager import TelemetryManager
|
||||
from axolotl.utils.ctx_managers.sequence_parallel import SequenceParallelContextManager
|
||||
from axolotl.utils.dict import DictDefault
|
||||
@@ -91,11 +90,11 @@ def setup_model_and_tokenizer(
|
||||
if model.generation_config is not None:
|
||||
model.generation_config.do_sample = True
|
||||
|
||||
TELEMETRY_MANAGER.track_event(
|
||||
TELEMETRY_MANAGER.send_event(
|
||||
event_type="model-load", properties=model.config.to_dict()
|
||||
)
|
||||
if peft_config:
|
||||
TELEMETRY_MANAGER.track_event(
|
||||
TELEMETRY_MANAGER.send_event(
|
||||
event_type="peft-config-load", properties=peft_config.to_dict()
|
||||
)
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user