Deprecate max packed sequence len (#1141)

2024-01-20 05:11:50 -05:00
parent 3db5f2fd17
commit 2ce5c0d68a
6 changed files with 38 additions and 170 deletions
--- a/README.md
+++ b/README.md
@@ -642,10 +642,6 @@ sequence_len: 2048
 # Pad inputs so each step uses constant sized buffers
 # This will reduce memory fragmentation and may prevent OOMs, by re-using memory more efficiently
 pad_to_sequence_len:
-# Max sequence length to concatenate training samples together up to
-# Inspired by StackLLaMA. see https://huggingface.co/blog/stackllama#supervised-fine-tuning
-# FutureWarning: This will soon be DEPRECATED
-max_packed_sequence_len: 1024
 # Use efficient multi-packing with block diagonal attention and per sequence position_ids. Recommend set to 'true'
 sample_packing:
 # Set to 'false' if getting errors during eval with sample_packing on.