delete config

2025-09-23 15:34:07 +00:00
parent 3277d44d71
commit 1640cd4006
2 changed files with 0 additions and 67 deletions
--- a/examples/deepseek-v3/full-ft.yaml
+++ b/examples/deepseek-v3/full-ft.yaml
@@ -1,66 +0,0 @@
-# Example full fine-tuning config for a DeepSeek-V3 MoE model using Axolotl's
-# vendored Triton contiguous grouped GEMM kernels.
-# Replace `your-org/deepseek-v3-model` with the name of the model you uploaded to HF.
-
-base_model: axolotl-ai-co/deepseek-v3-8b
-model_config_type: deepseek_v3
-trust_remote_code: true
-moe_kernels: true
-
-# --- Data ------------------------------------------------------------------
-datasets:
-  - path: tatsu-lab/alpaca
-    type: alpaca
-
-val_set_size: 0.0
-output_dir: ./outputs/deepseek-v3/full-ft
-
-sequence_len: 4096
-sample_packing: true
-
-# --- Optimisation ----------------------------------------------------------
-num_epochs: 1
-micro_batch_size: 1
-gradient_accumulation_steps: 8
-optimizer: adamw_torch_fused
-learning_rate: 2e-5
-lr_scheduler: cosine
-warmup_ratio: 0.1
-weight_decay: 0.01
-
-# --- Precision & Performance -----------------------------------------------
-bf16: auto
-flash_attention: true
-
-# enable GC to keep activation memory manageable for the MoE blocks
-gradient_checkpointing: true
-gradient_checkpointing_kwargs:
-  use_reentrant: false
-
-# Axolotl automatically applies the DeepSeek-V3 MoE monkeypatch when
-# model_config_type is set to `deepseek_v3`, routing matmuls through the
-# vendored Triton kernels.
-
-# --- Logging & Saving ------------------------------------------------------
-logging_steps: 1
-evals_per_epoch: 2
-saves_per_epoch: 1
-
-# Uncomment the section below for multi-GPU training with FSDP
-# fsdp:
-#   - full_shard
-#   - auto_wrap
-# fsdp_config:
-#   fsdp_limit_all_gathers: true
-#   fsdp_sync_module_states: true
-#   fsdp_offload_params: true
-#   fsdp_use_orig_params: false
-#   fsdp_cpu_ram_efficient_loading: true
-#   fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
-#   fsdp_transformer_layer_cls_to_wrap: DeepseekV3MoE
-#   fsdp_state_dict_type: FULL_STATE_DICT
-#   fsdp_sharding_strategy: FULL_SHARD
-
-# wandb_project:
-# wandb_entity:
-# wandb_name: