diff --git a/examples/llama-3/fft-1b-fsdp.yml b/examples/llama-3/fft-1b-fsdp.yml index 66b305420..84b4db48f 100644 --- a/examples/llama-3/fft-1b-fsdp.yml +++ b/examples/llama-3/fft-1b-fsdp.yml @@ -44,4 +44,7 @@ fsdp_config: fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer fsdp_state_dict_type: FULL_STATE_DICT fsdp_sharding_strategy: FULL_SHARD - fsdp_backward_prefetch: BACKWARD_PRE \ No newline at end of file + fsdp_backward_prefetch: BACKWARD_PRE + +special_tokens: + pad_token: "<|end_of_text|>" \ No newline at end of file