diff --git a/README.md b/README.md
index 2787c7a13..2b14fe94b 100644
--- a/README.md
+++ b/README.md
@@ -387,7 +387,7 @@ train_on_inputs: false
 # don't use this, leads to wonky training (according to someone on the internet)
 group_by_length: false
 
-# does not work with current implementation of 4-bit LoRA
+# Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing
 gradient_checkpointing: false
 
 # stop training after this many evaluation losses have increased in a row