diff --git a/scripts/finetune.py b/scripts/finetune.py index c23f9bfbc..0530f486f 100644 --- a/scripts/finetune.py +++ b/scripts/finetune.py @@ -242,6 +242,24 @@ def train( model.save_pretrained(cfg.output_dir) return + if cfg.debug: + logging.info("check_dataset_labels...") + check_dataset_labels( + train_dataset.select( + [random.randrange(0, len(train_dataset) - 1) for i in range(5)] + ), + tokenizer, + ) + + if prepare_ds_only: + logging.info("Finished preparing dataset. Exiting...") + return + + try: + model.train() + except: + pass + trainer = setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer) model.config.use_cache = False @@ -284,7 +302,12 @@ def train( logging.info( f"Using Auto-resume functionality to start with checkpoint at {resume_from_checkpoint}" ) - trainer.train(resume_from_checkpoint=resume_from_checkpoint) + + if cfg.flash_optimum: + with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=True, enable_mem_efficient=True): + trainer.train(resume_from_checkpoint=resume_from_checkpoint) + else: + trainer.train(resume_from_checkpoint=resume_from_checkpoint) logging.info(f"Training Completed!!! Saving pre-trained model to {cfg.output_dir}")