Lint finetune.py

This commit is contained in:
NanoCode012
2023-05-29 13:40:12 +09:00
parent f4e5d86268
commit 82971e1565

View File

@@ -34,14 +34,16 @@ DEFAULT_DATASET_PREPARED_PATH = "last_run_prepared"
def choose_device(cfg): def choose_device(cfg):
def get_device(): def get_device():
if torch.cuda.is_available(): try:
return f"cuda:{cfg.local_rank}" if torch.cuda.is_available():
else: return f"cuda:{cfg.local_rank}"
try:
if torch.backends.mps.is_available(): if torch.backends.mps.is_available():
return "mps" return "mps"
except Exception: # pylint: disable=broad-exception-caught
return "cpu" raise SystemError("No CUDA/mps device found")
except Exception: # pylint: disable=broad-exception-caught
return "cpu"
cfg.device = get_device() cfg.device = get_device()
if cfg.device == "cuda": if cfg.device == "cuda":
@@ -54,7 +56,7 @@ def get_multi_line_input() -> Optional[str]:
print("Give me an instruction (Ctrl + D to finish): ") print("Give me an instruction (Ctrl + D to finish): ")
instruction = "" instruction = ""
for line in sys.stdin: for line in sys.stdin:
instruction += line instruction += line # pylint: disable=consider-using-join
# instruction = pathlib.Path("/proc/self/fd/0").read_text() # instruction = pathlib.Path("/proc/self/fd/0").read_text()
return instruction return instruction
@@ -76,7 +78,7 @@ def do_inference(cfg, model, tokenizer, prompter="AlpacaPrompter"):
model.eval() model.eval()
with torch.no_grad(): with torch.no_grad():
# gc = GenerationConfig() # TODO swap out and use this # pylint: disable=fixme # gc = GenerationConfig() # TODO swap out and use this
generated = model.generate( generated = model.generate(
inputs=batch["input_ids"].to(cfg.device), inputs=batch["input_ids"].to(cfg.device),
do_sample=True, do_sample=True,
@@ -95,7 +97,7 @@ def do_inference(cfg, model, tokenizer, prompter="AlpacaPrompter"):
def choose_config(path: Path): def choose_config(path: Path):
yaml_files = [file for file in path.glob("*.yml")] yaml_files = list(path.glob("*.yml"))
if not yaml_files: if not yaml_files:
raise ValueError( raise ValueError(
@@ -240,7 +242,7 @@ def train(
if cfg.local_rank == 0: if cfg.local_rank == 0:
signal.signal( signal.signal(
signal.SIGINT, signal.SIGINT,
lambda signal, frame: (model.save_pretrained(cfg.output_dir), exit(0)), lambda signal, frame: (model.save_pretrained(cfg.output_dir), sys.exit(0)),
) )
logging.info("Starting trainer...") logging.info("Starting trainer...")
@@ -263,13 +265,11 @@ def train(
logging.info(f"Training Completed!!! Saving pre-trained model to {cfg.output_dir}") logging.info(f"Training Completed!!! Saving pre-trained model to {cfg.output_dir}")
# pylint: disable=fixme
# TODO do we need this fix? https://huggingface.co/docs/accelerate/usage_guides/fsdp#saving-and-loading # TODO do we need this fix? https://huggingface.co/docs/accelerate/usage_guides/fsdp#saving-and-loading
# only save on rank 0, otherwise it corrupts output on multi-GPU when multiple processes attempt to write the same file # only save on rank 0, otherwise it corrupts output on multi-GPU when multiple processes attempt to write the same file
if cfg.local_rank == 0: if cfg.local_rank == 0:
model.save_pretrained(cfg.output_dir) model.save_pretrained(cfg.output_dir)
# pylint: disable=fixme
# trainer.save_model(cfg.output_dir) # TODO this may be needed for deepspeed to work? need to review another time # trainer.save_model(cfg.output_dir) # TODO this may be needed for deepspeed to work? need to review another time