chore: logging cleanup (#3482) [skip ci]
This commit is contained in:
@@ -196,12 +196,10 @@ def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs):
|
||||
state.wait_for_everyone()
|
||||
LOG.info(
|
||||
f"FSDP SHARDED_STATE_DICT weights successfully merged to: {output_path}",
|
||||
main_process_only=True,
|
||||
)
|
||||
LOG.info(
|
||||
"Merged weights are only the safetensors and doesn't include the model configuration "
|
||||
f"or tokenizer which may be found in {parsed_cfg.output_dir}.",
|
||||
main_process_only=True,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -19,5 +19,4 @@ class CheckpointSaveMixin(Trainer):
|
||||
f"Trainer does not support saving optimizer and scheduler: {exc}\n"
|
||||
"Optimizer and scheduler states were not saved - resuming from checkpoints "
|
||||
"for this training run will not be possible.",
|
||||
main_process_only=True,
|
||||
)
|
||||
|
||||
@@ -64,15 +64,12 @@ def patch_flex_wrapper(**flex_attn_compile_kwargs):
|
||||
LOG.info(
|
||||
"Compiling flex attention with kwargs: %s. This may take a while...",
|
||||
flex_attn_compile_kwargs,
|
||||
main_process_only=True,
|
||||
)
|
||||
self._compiled_flex_attention = torch.compile(
|
||||
flex_attention,
|
||||
**flex_attn_compile_kwargs,
|
||||
)
|
||||
LOG.info(
|
||||
"Flex attention compiled successfully.", main_process_only=True
|
||||
)
|
||||
LOG.info("Flex attention compiled successfully.")
|
||||
|
||||
self._is_flex_compiled = True
|
||||
|
||||
|
||||
@@ -154,7 +154,6 @@ def register_ring_attn_from_device_mesh(
|
||||
LOG.info(
|
||||
f"Enabling ring attention sequence parallelism using DeviceMesh "
|
||||
f"dimension '{context_parallel_dim}'",
|
||||
main_process_only=True,
|
||||
)
|
||||
|
||||
# Extract the sequence parallel submesh
|
||||
|
||||
@@ -85,7 +85,6 @@ def patch_tiled_mlp(model_type, use_original_mlp=True, cfg_num_shards=None):
|
||||
mlp_cls._tiled_mlp_dist_impl = None
|
||||
LOG.info(
|
||||
f"Successfully monkey-patched TiledMLP for model_type: {model_type}",
|
||||
main_process_only=True,
|
||||
)
|
||||
except (ImportError, AttributeError) as e:
|
||||
raise RuntimeError(
|
||||
|
||||
@@ -69,7 +69,6 @@ def setup_model_and_tokenizer(
|
||||
# Load tokenizer
|
||||
LOG.debug(
|
||||
f"loading tokenizer... {cfg.tokenizer_config or cfg.base_model_config}",
|
||||
main_process_only=True,
|
||||
)
|
||||
tokenizer = load_tokenizer(cfg)
|
||||
|
||||
|
||||
@@ -59,7 +59,6 @@ class DynamicCheckpointCallback(TrainerCallback):
|
||||
f"Dynamic checkpoint enabled. To trigger checkpoint save:\n"
|
||||
f" • File: touch {cfg.output_dir}/{self.trigger_filename}\n"
|
||||
f" • Check interval: every {self.check_interval} steps",
|
||||
main_process_only=True,
|
||||
)
|
||||
|
||||
def on_step_end(
|
||||
@@ -89,12 +88,10 @@ class DynamicCheckpointCallback(TrainerCallback):
|
||||
LOG.info(
|
||||
f"Dynamic checkpoint triggered via file '{self.trigger_filename}' "
|
||||
f"at step {state.global_step}",
|
||||
main_process_only=True,
|
||||
)
|
||||
except OSError as exc:
|
||||
LOG.warning(
|
||||
f"Failed to delete trigger file: {exc}",
|
||||
main_process_only=True,
|
||||
)
|
||||
|
||||
if self.should_save_checkpoint:
|
||||
@@ -127,6 +124,5 @@ class DynamicCheckpointCallback(TrainerCallback):
|
||||
control.should_save = True
|
||||
LOG.info(
|
||||
f"Saving dynamic checkpoint at step {state.global_step}",
|
||||
main_process_only=True,
|
||||
)
|
||||
return control
|
||||
|
||||
@@ -474,13 +474,11 @@ def load_preprocessed_dataset(cfg: DictDefault, dataset_hash: str) -> Dataset |
|
||||
):
|
||||
LOG.info(
|
||||
f"Loading prepared dataset from disk at {prepared_ds_path}...",
|
||||
main_process_only=True,
|
||||
)
|
||||
return load_from_disk(str(prepared_ds_path))
|
||||
|
||||
LOG.info(
|
||||
f"Unable to find prepared dataset in {prepared_ds_path}",
|
||||
main_process_only=True,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
@@ -128,7 +128,6 @@ class DatasetValidationMixin:
|
||||
):
|
||||
LOG.info(
|
||||
"explicitly setting `eval_sample_packing` to match `sample_packing`",
|
||||
main_process_only=True,
|
||||
)
|
||||
data["eval_sample_packing"] = True
|
||||
|
||||
|
||||
Reference in New Issue
Block a user