From 79103b01ca1c914103d888d88fdb903e29840d4f Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Wed, 10 Sep 2025 09:01:02 +0700 Subject: [PATCH] Feat: add seedoss (#3104) [skip ci] * feat: add seedoss cce * feat: add seedoss config and docs * fix: shouldn't have target modules with target linear * feat: add vram numbers * fix: hf link * fix: name * fix: support multipack seedoss * fix: merge error * feat: update seedoss instructions for transformers release --- examples/seed-oss/README.md | 54 ++++++++++++++++++ examples/seed-oss/seed-oss-36b-qlora.yaml | 56 +++++++++++++++++++ .../integrations/cut_cross_entropy/README.md | 3 + src/axolotl/monkeypatch/multipack.py | 1 + 4 files changed, 114 insertions(+) create mode 100644 examples/seed-oss/README.md create mode 100644 examples/seed-oss/seed-oss-36b-qlora.yaml diff --git a/examples/seed-oss/README.md b/examples/seed-oss/README.md new file mode 100644 index 000000000..5610c1316 --- /dev/null +++ b/examples/seed-oss/README.md @@ -0,0 +1,54 @@ +# Finetune ByteDance's Seed-OSS with Axolotl + +[Seed-OSS](https://huggingface.co/collections/ByteDance-Seed/seed-oss-68a609f4201e788db05b5dcd) are a series of 36B parameter open source models trained by ByteDance's Seed Team. + +This guide shows how to fine-tune it with Axolotl with multi-turn conversations and proper masking. + +## Getting started + +1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html). You need to install from main as Seed-OSS is only on nightly or use our latest [Docker images](https://docs.axolotl.ai/docs/docker.html). + + Here is an example of how to install from main for pip: + +```bash +# Ensure you have Pytorch installed (Pytorch 2.6.0 min) +git clone https://github.com/axolotl-ai-cloud/axolotl.git +cd axolotl + +pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja +pip3 install --no-build-isolation -e '.[flash-attn]' + +# Install Cut Cross Entropy +python scripts/cutcrossentropy_install.py | sh +``` + +2. Run the finetuning example: + +```bash +axolotl train examples/seed-oss/seed-oss-36b-qlora.yaml +``` + +This config uses about 27.7 GiB VRAM. + +Let us know how it goes. Happy finetuning! 🚀 + +### TIPS + +- For inference, the official Seed Team recommends `top_p=0.95` and `temperature=1.1`. +- You can run a full finetuning by removing the `adapter: qlora` and `load_in_4bit: true` from the config. +- Read more on how to load your own dataset at [docs](https://docs.axolotl.ai/docs/dataset_loading.html). +- The dataset format follows the OpenAI Messages format as seen [here](https://docs.axolotl.ai/docs/dataset-formats/conversation.html#chat_template). + +## Optimization Guides + +- [Multi-GPU Training](https://docs.axolotl.ai/docs/multi-gpu.html) +- [Multi-Node Training](https://docs.axolotl.ai/docs/multi-node.html) +- [LoRA Optimizations](https://docs.axolotl.ai/docs/lora_optims.html) + +## Related Resources + +- [ByteDance Seed Website](https://seed.bytedance.com/) +- [Axolotl Docs](https://docs.axolotl.ai) +- [Axolotl Website](https://axolotl.ai) +- [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl) +- [Axolotl Discord](https://discord.gg/7m9sfhzaf3) diff --git a/examples/seed-oss/seed-oss-36b-qlora.yaml b/examples/seed-oss/seed-oss-36b-qlora.yaml new file mode 100644 index 000000000..00e7cf3eb --- /dev/null +++ b/examples/seed-oss/seed-oss-36b-qlora.yaml @@ -0,0 +1,56 @@ +base_model: ByteDance-Seed/Seed-OSS-36B-Instruct + +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + +plugins: + - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin + +load_in_8bit: false +load_in_4bit: true + +datasets: + - path: fozziethebeat/alpaca_messages_2k_test + type: chat_template + +dataset_prepared_path: last_run_prepared +val_set_size: 0.1 +output_dir: ./outputs/lora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 2048 +sample_packing: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 1 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +bf16: auto +tf32: false + +gradient_checkpointing: true +resume_from_checkpoint: +logging_steps: 1 +flash_attention: true + +warmup_ratio: 0.1 +evals_per_epoch: 1 +saves_per_epoch: 1 + +# save_first_step: true # uncomment this to validate checkpoint saving works with your config diff --git a/src/axolotl/integrations/cut_cross_entropy/README.md b/src/axolotl/integrations/cut_cross_entropy/README.md index a64bdd054..393412f64 100644 --- a/src/axolotl/integrations/cut_cross_entropy/README.md +++ b/src/axolotl/integrations/cut_cross_entropy/README.md @@ -34,6 +34,7 @@ plugins: - arcee - cohere - cohere2 +- deepseek_v3 - gemma - gemma2 - gemma3 @@ -42,6 +43,7 @@ plugins: - gemma3n_text - glm - glm4 +- glm4_moe - gpt_oss - granite - granitemoe @@ -64,6 +66,7 @@ plugins: - qwen3 - qwen3_moe - smollm3 +- seed_oss - voxtral ## Citation diff --git a/src/axolotl/monkeypatch/multipack.py b/src/axolotl/monkeypatch/multipack.py index e4f9ca2be..cbc546877 100644 --- a/src/axolotl/monkeypatch/multipack.py +++ b/src/axolotl/monkeypatch/multipack.py @@ -38,6 +38,7 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [ "smollm3", "gpt_oss", "arcee", + "seed_oss", ]