From 9a8073e73d21f44fc35963b8ef46ab7d96a4472a Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Sat, 12 Jul 2025 11:41:34 -0400
Subject: [PATCH] Liquid Foundation Model 2 support (#2905)

* LFM2 support

* docs

* packing seems to work

* update install to force install in case already on dev version

* default to use chunked cross entropy
---
 examples/lfm2/README.md          |  7 +++++
 examples/lfm2/lfm2-350m-fft.yaml | 48 ++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)
 create mode 100644 examples/lfm2/README.md
 create mode 100644 examples/lfm2/lfm2-350m-fft.yaml

diff --git a/examples/lfm2/README.md b/examples/lfm2/README.md
new file mode 100644
index 000000000..eb9ca911f
--- /dev/null
+++ b/examples/lfm2/README.md
@@ -0,0 +1,7 @@
+# Liquid Foundation Models 2
+
+LFM2 support in transformers exists in the main branch, but is not yet included in the transformers release.
+
+```bash
+pip install --upgrade --no-deps --force-reinstall git+https://github.com/huggingface/transformers.git
+```
diff --git a/examples/lfm2/lfm2-350m-fft.yaml b/examples/lfm2/lfm2-350m-fft.yaml
new file mode 100644
index 000000000..95961557e
--- /dev/null
+++ b/examples/lfm2/lfm2-350m-fft.yaml
@@ -0,0 +1,48 @@
+base_model: LiquidAI/LFM2-350M
+
+chunked_cross_entropy: true
+
+chat_template: tokenizer_default
+eot_tokens:
+  - "<|im_end|>"
+datasets:
+  - path: mlabonne/FineTome-100k
+    type: chat_template
+    split: train[:20%]
+    field_messages: conversations
+    message_field_role: from
+    message_field_content: value
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.05
+output_dir: ./outputs/out
+
+sequence_len: 4096
+sample_packing: true
+pad_to_sequence_len: true
+
+wandb_project:
+wandb_entity:
+wandb_watch:
+wandb_name:
+wandb_log_model:
+
+gradient_accumulation_steps: 2
+micro_batch_size: 4
+num_epochs: 1
+optimizer: adamw_torch_fused
+lr_scheduler: cosine
+learning_rate: 5e-5
+
+bf16: true
+tf32: true
+
+gradient_checkpointing: false
+resume_from_checkpoint:
+logging_steps: 1
+flash_attention: true
+
+warmup_ratio: 0.1
+evals_per_epoch: 2
+saves_per_epoch: 1
+
+weight_decay: 0.0