From 9a8073e73d21f44fc35963b8ef46ab7d96a4472a Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sat, 12 Jul 2025 11:41:34 -0400 Subject: [PATCH] Liquid Foundation Model 2 support (#2905) * LFM2 support * docs * packing seems to work * update install to force install in case already on dev version * default to use chunked cross entropy --- examples/lfm2/README.md | 7 +++++ examples/lfm2/lfm2-350m-fft.yaml | 48 ++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 examples/lfm2/README.md create mode 100644 examples/lfm2/lfm2-350m-fft.yaml diff --git a/examples/lfm2/README.md b/examples/lfm2/README.md new file mode 100644 index 000000000..eb9ca911f --- /dev/null +++ b/examples/lfm2/README.md @@ -0,0 +1,7 @@ +# Liquid Foundation Models 2 + +LFM2 support in transformers exists in the main branch, but is not yet included in the transformers release. + +```bash +pip install --upgrade --no-deps --force-reinstall git+https://github.com/huggingface/transformers.git +``` diff --git a/examples/lfm2/lfm2-350m-fft.yaml b/examples/lfm2/lfm2-350m-fft.yaml new file mode 100644 index 000000000..95961557e --- /dev/null +++ b/examples/lfm2/lfm2-350m-fft.yaml @@ -0,0 +1,48 @@ +base_model: LiquidAI/LFM2-350M + +chunked_cross_entropy: true + +chat_template: tokenizer_default +eot_tokens: + - "<|im_end|>" +datasets: + - path: mlabonne/FineTome-100k + type: chat_template + split: train[:20%] + field_messages: conversations + message_field_role: from + message_field_content: value +dataset_prepared_path: last_run_prepared +val_set_size: 0.05 +output_dir: ./outputs/out + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 2 +micro_batch_size: 4 +num_epochs: 1 +optimizer: adamw_torch_fused +lr_scheduler: cosine +learning_rate: 5e-5 + +bf16: true +tf32: true + +gradient_checkpointing: false +resume_from_checkpoint: +logging_steps: 1 +flash_attention: true + +warmup_ratio: 0.1 +evals_per_epoch: 2 +saves_per_epoch: 1 + +weight_decay: 0.0