chore: logging cleanup (#3482) [skip ci]

This commit is contained in:
NanoCode012
2026-03-16 09:10:57 +07:00
committed by GitHub
parent a806704e94
commit d8a646c80d
9 changed files with 1 additions and 17 deletions

View File

@@ -196,12 +196,10 @@ def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs):
state.wait_for_everyone()
LOG.info(
f"FSDP SHARDED_STATE_DICT weights successfully merged to: {output_path}",
main_process_only=True,
)
LOG.info(
"Merged weights are only the safetensors and doesn't include the model configuration "
f"or tokenizer which may be found in {parsed_cfg.output_dir}.",
main_process_only=True,
)

View File

@@ -19,5 +19,4 @@ class CheckpointSaveMixin(Trainer):
f"Trainer does not support saving optimizer and scheduler: {exc}\n"
"Optimizer and scheduler states were not saved - resuming from checkpoints "
"for this training run will not be possible.",
main_process_only=True,
)

View File

@@ -64,15 +64,12 @@ def patch_flex_wrapper(**flex_attn_compile_kwargs):
LOG.info(
"Compiling flex attention with kwargs: %s. This may take a while...",
flex_attn_compile_kwargs,
main_process_only=True,
)
self._compiled_flex_attention = torch.compile(
flex_attention,
**flex_attn_compile_kwargs,
)
LOG.info(
"Flex attention compiled successfully.", main_process_only=True
)
LOG.info("Flex attention compiled successfully.")
self._is_flex_compiled = True

View File

@@ -154,7 +154,6 @@ def register_ring_attn_from_device_mesh(
LOG.info(
f"Enabling ring attention sequence parallelism using DeviceMesh "
f"dimension '{context_parallel_dim}'",
main_process_only=True,
)
# Extract the sequence parallel submesh

View File

@@ -85,7 +85,6 @@ def patch_tiled_mlp(model_type, use_original_mlp=True, cfg_num_shards=None):
mlp_cls._tiled_mlp_dist_impl = None
LOG.info(
f"Successfully monkey-patched TiledMLP for model_type: {model_type}",
main_process_only=True,
)
except (ImportError, AttributeError) as e:
raise RuntimeError(

View File

@@ -69,7 +69,6 @@ def setup_model_and_tokenizer(
# Load tokenizer
LOG.debug(
f"loading tokenizer... {cfg.tokenizer_config or cfg.base_model_config}",
main_process_only=True,
)
tokenizer = load_tokenizer(cfg)

View File

@@ -59,7 +59,6 @@ class DynamicCheckpointCallback(TrainerCallback):
f"Dynamic checkpoint enabled. To trigger checkpoint save:\n"
f" • File: touch {cfg.output_dir}/{self.trigger_filename}\n"
f" • Check interval: every {self.check_interval} steps",
main_process_only=True,
)
def on_step_end(
@@ -89,12 +88,10 @@ class DynamicCheckpointCallback(TrainerCallback):
LOG.info(
f"Dynamic checkpoint triggered via file '{self.trigger_filename}' "
f"at step {state.global_step}",
main_process_only=True,
)
except OSError as exc:
LOG.warning(
f"Failed to delete trigger file: {exc}",
main_process_only=True,
)
if self.should_save_checkpoint:
@@ -127,6 +124,5 @@ class DynamicCheckpointCallback(TrainerCallback):
control.should_save = True
LOG.info(
f"Saving dynamic checkpoint at step {state.global_step}",
main_process_only=True,
)
return control

View File

@@ -474,13 +474,11 @@ def load_preprocessed_dataset(cfg: DictDefault, dataset_hash: str) -> Dataset |
):
LOG.info(
f"Loading prepared dataset from disk at {prepared_ds_path}...",
main_process_only=True,
)
return load_from_disk(str(prepared_ds_path))
LOG.info(
f"Unable to find prepared dataset in {prepared_ds_path}",
main_process_only=True,
)
return None

View File

@@ -128,7 +128,6 @@ class DatasetValidationMixin:
):
LOG.info(
"explicitly setting `eval_sample_packing` to match `sample_packing`",
main_process_only=True,
)
data["eval_sample_packing"] = True