From e662972a2912b395da4817bf13d73b7d4a6d9137 Mon Sep 17 00:00:00 2001
From: Younes B <49240599+younesbelkada@users.noreply.github.com>
Date: Thu, 30 Apr 2026 19:25:02 +0400
Subject: [PATCH] Feat: Add bitnet integration (#3634)

* add bitnet

* switch to uv

* chore: liint

---------

Co-authored-by: Wing Lian <wing@axolotl.ai>
---
 _quarto.yml                             |  1 +
 docs/1_58bit_finetuning.qmd             | 70 +++++++++++++++++++++++++
 src/axolotl/loaders/model.py            | 11 ++++
 src/axolotl/utils/schemas/model.py      |  6 +++
 src/axolotl/utils/schemas/validation.py |  6 +++
 5 files changed, 94 insertions(+)
 create mode 100644 docs/1_58bit_finetuning.qmd

diff --git a/_quarto.yml b/_quarto.yml
index e8263a971..5b008bf99 100644
--- a/_quarto.yml
+++ b/_quarto.yml
@@ -311,6 +311,7 @@ website:
             - docs/dataset_loading.qmd
             - docs/qat.qmd
             - docs/quantize.qmd
+            - docs/1_58bit_finetuning.qmd
             - docs/optimizations.qmd
 
         - section: "Core Concepts"
diff --git a/docs/1_58bit_finetuning.qmd b/docs/1_58bit_finetuning.qmd
new file mode 100644
index 000000000..02bc3a6f1
--- /dev/null
+++ b/docs/1_58bit_finetuning.qmd
@@ -0,0 +1,70 @@
+---
+title: "1.58-bit Finetuning"
+back-to-top-navigation: true
+toc: true
+toc-expand: 2
+toc-depth: 4
+---
+
+## Overview
+
+1.58-bit finetuning allows you to finetune BitNet models when their prequantized weights are provided. In theory, it will be possible to fine-tune any LLM in 1.58bit format but the performance degradation will be dramatic.
+
+Axolotl supports 1.58-bit finetuning via the [`onebitllms`](https://github.com/tiiuae/onebitllms) library, which replaces standard linear layers with BitNet-compatible counterparts ready to use for training.
+
+::: {.callout-note}
+LoRA is not supported for BitNet models
+:::
+
+## Installation
+
+Install the `onebitllms` package before using this feature:
+
+```bash
+uv pip install onebitllms
+```
+
+Or from source:
+
+```bash
+uv pip install git+https://github.com/tiiuae/onebitllms
+```
+
+## Supported models
+
+For now, only `Falcon-E` series of models are supported. Make sure to use their `-prequantized` version:
+
+```bash
+tiiuae/Falcon-E-3B-Base-prequantized
+tiiuae/Falcon-E-1B-Base-prequantized
+```
+
+In theory, any other model would 'work' but the performance degradation will be huge. This remains an area of exploration.
+
+## Configuration
+
+To enable 1.58-bit finetuning, set the following in your configuration file:
+
+```yaml
+base_model: tiiuae/Falcon-E-3B-Base-prequantized  # A BitNet-compatible model
+
+use_onebitllms: true
+```
+
+::: {.callout-note}
+For BitNet models, it is recommended to use a higher learning rate than classic models (usually in the order of magnitude of 10x).
+:::
+
+## Considerations after training
+
+Once your model has been trained with 1.58bit fine-tuning, you can convert the trained model in ternary format using the `onebitllms` CLI:
+
+```bash
+onebitllms quantize_to_1bit INPUT_PATH OUTPUT_PATH
+```
+
+After that, you can use supported packages such as `llama.cpp` or Apple MLX package to run the trained model.
+
+## Example Configuration
+
+You can find example configurations in `examples/falcon-e` which contain one configuration for SFT and one configuration for DPO.
diff --git a/src/axolotl/loaders/model.py b/src/axolotl/loaders/model.py
index 4f5779327..57aabfbab 100644
--- a/src/axolotl/loaders/model.py
+++ b/src/axolotl/loaders/model.py
@@ -846,6 +846,17 @@ class ModelLoader:
             else:
                 self.model = self._load_model_from_pretrained(model_loader_class)
 
+        if self.cfg.use_onebitllms:
+            try:
+                from onebitllms import replace_linear_with_bitnet_linear
+            except ImportError as exc:
+                raise ImportError(
+                    "The 'onebitllms' package is required for use_onebitllms. "
+                    "Install it with: `uv pip install onebitllms`"
+                ) from exc
+
+            self.model = replace_linear_with_bitnet_linear(self.model)
+
         if is_deepspeed_zero3_enabled():
             skip_move_to_device = True
 
diff --git a/src/axolotl/utils/schemas/model.py b/src/axolotl/utils/schemas/model.py
index f54958b33..30202efe0 100644
--- a/src/axolotl/utils/schemas/model.py
+++ b/src/axolotl/utils/schemas/model.py
@@ -103,6 +103,12 @@ class ModelInputConfig(BaseModel):
         default=None,
         json_schema_extra={"description": "kwargs for model quantization config"},
     )
+    use_onebitllms: bool | None = Field(
+        default=None,
+        json_schema_extra={
+            "description": "Whether to use `onebitllms` for 1.58bit training (only for bitnet models)."
+        },
+    )
 
     @field_validator("trust_remote_code")
     @classmethod
diff --git a/src/axolotl/utils/schemas/validation.py b/src/axolotl/utils/schemas/validation.py
index fff69de26..76b09bfdb 100644
--- a/src/axolotl/utils/schemas/validation.py
+++ b/src/axolotl/utils/schemas/validation.py
@@ -638,6 +638,12 @@ class LoRAValidationMixin:
             raise ValueError("Fused modules are not supported with LoRA/QLoRA")
         return self
 
+    @model_validator(mode="after")
+    def check_onebitllms_lora(self):
+        if self.use_onebitllms and self.adapter in ["lora", "qlora"]:
+            raise ValueError("LoRA/QLoRA is not supported with use_onebitllms")
+        return self
+
     @model_validator(mode="before")
     @classmethod
     def warn_qlora_zero3_w_use_reentrant(cls, data):