From 7b78a315934ec8915c256efe8c277a764c219ab9 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Wed, 13 Nov 2024 22:06:00 +0700 Subject: [PATCH] feat: print out dataset length even if not preprocess (#2034) [skip ci] --- src/axolotl/utils/trainer.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index a552905f7..2d3a6944f 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -185,11 +185,10 @@ def process_datasets_for_packing(cfg, train_dataset, eval_dataset): min_sequence_len=cfg.min_sample_len or 2, ) - if cfg.is_preprocess: - min_input_len = np.min(get_dataset_lengths(train_dataset)) - LOG.debug(f"min_input_len: {min_input_len}", main_process_only=True) - max_input_len = np.max(get_dataset_lengths(train_dataset)) - LOG.debug(f"max_input_len: {max_input_len}", main_process_only=True) + min_input_len = np.min(get_dataset_lengths(train_dataset)) + LOG.debug(f"min_input_len: {min_input_len}", main_process_only=True) + max_input_len = np.max(get_dataset_lengths(train_dataset)) + LOG.debug(f"max_input_len: {max_input_len}", main_process_only=True) if cfg.model_config_type == "mamba": LOG.info("dropping attention_mask column")