chore: logging cleanup (#3482) [skip ci]

This commit is contained in:
NanoCode012
2026-03-16 09:10:57 +07:00
committed by GitHub
parent a806704e94
commit d8a646c80d
9 changed files with 1 additions and 17 deletions

View File

@@ -196,12 +196,10 @@ def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs):
state.wait_for_everyone() state.wait_for_everyone()
LOG.info( LOG.info(
f"FSDP SHARDED_STATE_DICT weights successfully merged to: {output_path}", f"FSDP SHARDED_STATE_DICT weights successfully merged to: {output_path}",
main_process_only=True,
) )
LOG.info( LOG.info(
"Merged weights are only the safetensors and doesn't include the model configuration " "Merged weights are only the safetensors and doesn't include the model configuration "
f"or tokenizer which may be found in {parsed_cfg.output_dir}.", f"or tokenizer which may be found in {parsed_cfg.output_dir}.",
main_process_only=True,
) )

View File

@@ -19,5 +19,4 @@ class CheckpointSaveMixin(Trainer):
f"Trainer does not support saving optimizer and scheduler: {exc}\n" f"Trainer does not support saving optimizer and scheduler: {exc}\n"
"Optimizer and scheduler states were not saved - resuming from checkpoints " "Optimizer and scheduler states were not saved - resuming from checkpoints "
"for this training run will not be possible.", "for this training run will not be possible.",
main_process_only=True,
) )

View File

@@ -64,15 +64,12 @@ def patch_flex_wrapper(**flex_attn_compile_kwargs):
LOG.info( LOG.info(
"Compiling flex attention with kwargs: %s. This may take a while...", "Compiling flex attention with kwargs: %s. This may take a while...",
flex_attn_compile_kwargs, flex_attn_compile_kwargs,
main_process_only=True,
) )
self._compiled_flex_attention = torch.compile( self._compiled_flex_attention = torch.compile(
flex_attention, flex_attention,
**flex_attn_compile_kwargs, **flex_attn_compile_kwargs,
) )
LOG.info( LOG.info("Flex attention compiled successfully.")
"Flex attention compiled successfully.", main_process_only=True
)
self._is_flex_compiled = True self._is_flex_compiled = True

View File

@@ -154,7 +154,6 @@ def register_ring_attn_from_device_mesh(
LOG.info( LOG.info(
f"Enabling ring attention sequence parallelism using DeviceMesh " f"Enabling ring attention sequence parallelism using DeviceMesh "
f"dimension '{context_parallel_dim}'", f"dimension '{context_parallel_dim}'",
main_process_only=True,
) )
# Extract the sequence parallel submesh # Extract the sequence parallel submesh

View File

@@ -85,7 +85,6 @@ def patch_tiled_mlp(model_type, use_original_mlp=True, cfg_num_shards=None):
mlp_cls._tiled_mlp_dist_impl = None mlp_cls._tiled_mlp_dist_impl = None
LOG.info( LOG.info(
f"Successfully monkey-patched TiledMLP for model_type: {model_type}", f"Successfully monkey-patched TiledMLP for model_type: {model_type}",
main_process_only=True,
) )
except (ImportError, AttributeError) as e: except (ImportError, AttributeError) as e:
raise RuntimeError( raise RuntimeError(

View File

@@ -69,7 +69,6 @@ def setup_model_and_tokenizer(
# Load tokenizer # Load tokenizer
LOG.debug( LOG.debug(
f"loading tokenizer... {cfg.tokenizer_config or cfg.base_model_config}", f"loading tokenizer... {cfg.tokenizer_config or cfg.base_model_config}",
main_process_only=True,
) )
tokenizer = load_tokenizer(cfg) tokenizer = load_tokenizer(cfg)

View File

@@ -59,7 +59,6 @@ class DynamicCheckpointCallback(TrainerCallback):
f"Dynamic checkpoint enabled. To trigger checkpoint save:\n" f"Dynamic checkpoint enabled. To trigger checkpoint save:\n"
f" • File: touch {cfg.output_dir}/{self.trigger_filename}\n" f" • File: touch {cfg.output_dir}/{self.trigger_filename}\n"
f" • Check interval: every {self.check_interval} steps", f" • Check interval: every {self.check_interval} steps",
main_process_only=True,
) )
def on_step_end( def on_step_end(
@@ -89,12 +88,10 @@ class DynamicCheckpointCallback(TrainerCallback):
LOG.info( LOG.info(
f"Dynamic checkpoint triggered via file '{self.trigger_filename}' " f"Dynamic checkpoint triggered via file '{self.trigger_filename}' "
f"at step {state.global_step}", f"at step {state.global_step}",
main_process_only=True,
) )
except OSError as exc: except OSError as exc:
LOG.warning( LOG.warning(
f"Failed to delete trigger file: {exc}", f"Failed to delete trigger file: {exc}",
main_process_only=True,
) )
if self.should_save_checkpoint: if self.should_save_checkpoint:
@@ -127,6 +124,5 @@ class DynamicCheckpointCallback(TrainerCallback):
control.should_save = True control.should_save = True
LOG.info( LOG.info(
f"Saving dynamic checkpoint at step {state.global_step}", f"Saving dynamic checkpoint at step {state.global_step}",
main_process_only=True,
) )
return control return control

View File

@@ -474,13 +474,11 @@ def load_preprocessed_dataset(cfg: DictDefault, dataset_hash: str) -> Dataset |
): ):
LOG.info( LOG.info(
f"Loading prepared dataset from disk at {prepared_ds_path}...", f"Loading prepared dataset from disk at {prepared_ds_path}...",
main_process_only=True,
) )
return load_from_disk(str(prepared_ds_path)) return load_from_disk(str(prepared_ds_path))
LOG.info( LOG.info(
f"Unable to find prepared dataset in {prepared_ds_path}", f"Unable to find prepared dataset in {prepared_ds_path}",
main_process_only=True,
) )
return None return None

View File

@@ -128,7 +128,6 @@ class DatasetValidationMixin:
): ):
LOG.info( LOG.info(
"explicitly setting `eval_sample_packing` to match `sample_packing`", "explicitly setting `eval_sample_packing` to match `sample_packing`",
main_process_only=True,
) )
data["eval_sample_packing"] = True data["eval_sample_packing"] = True