From ec4ebfd99787577bff10d85beb7174aba3489236 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 28 May 2025 16:20:19 -0400 Subject: [PATCH] Add a few items to faq (#2734) * Add a few items to faq * formatting * chore: lint --- docs/faq.qmd | 14 ++++++++++++++ src/axolotl/utils/schemas/config.py | 12 ------------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/docs/faq.qmd b/docs/faq.qmd index f586099e7..f2744caba 100644 --- a/docs/faq.qmd +++ b/docs/faq.qmd @@ -110,3 +110,17 @@ description: Frequently asked questions > A: If `eot_tokens: ` is not provided, the default behavior is the same as before. EOS tokens used to delimit turns are masked/unmasked depending on whether the turn is trainable. > Internally, `eot_tokens: tokenizer.eos_token` and `train_on_eot: train_on_eos` (which defaults to `turn`). This transition helps clarify the naming and behavior of EOT/EOS tokens. + +**Q: `Data processing error: CAS service error`** + +> A: Try disabling XET with `export HF_HUB_DISABLE_XET=1` + +**Q: `torch._inductor.exc.LoweringException: NoValidChoicesError: No choices to select, please consider adding ATEN into max_autotune_gemm_backends config (defined in torch/_inductor/config.py) to allow at least one choice. `** + +> A: Depending on the version of torch, you may need to include this in your YAML: + +> ```yaml +> flex_attn_compile_kwargs: +> dynamic: false +> mode: max-autotune-no-cudagraphs +> ``` diff --git a/src/axolotl/utils/schemas/config.py b/src/axolotl/utils/schemas/config.py index 75551085b..698befa19 100644 --- a/src/axolotl/utils/schemas/config.py +++ b/src/axolotl/utils/schemas/config.py @@ -1175,18 +1175,6 @@ class AxolotlInputConfig( return data - # @model_validator(mode="before") - # @classmethod - # def check_grpo_peft_liger(cls, data): - # if ( - # data.get("rl") == "grpo" - # and data.get("trl", {}) - # and data.get("trl").get("use_liger_loss") - # and data.get("adapter") - # ): - # raise ValueError("PEFT + GRPO + Liger is not yet supported") - # return data - # @model_validator(mode="before") @classmethod def check_grpo_liger_sequence_parallel(cls, data):