Merge pull request #178 from PocketDocLabs/main

Update README.md to reflect current gradient checkpointing support
This commit is contained in:
NanoCode012
2023-06-10 08:26:48 +09:00
committed by GitHub

View File

@@ -387,7 +387,7 @@ train_on_inputs: false
# don't use this, leads to wonky training (according to someone on the internet)
group_by_length: false
# does not work with current implementation of 4-bit LoRA
# Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing
gradient_checkpointing: false
# stop training after this many evaluation losses have increased in a row