From 6132a30cdabd877080bb02bff033d5ac14139ffd Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 28 Jan 2026 06:45:01 -0500 Subject: [PATCH] handle warnings from v5 upgrade (#3376) --- requirements.txt | 2 +- src/axolotl/cli/__init__.py | 2 +- src/axolotl/core/trainers/trl.py | 12 +++++------- src/axolotl/core/training_args.py | 6 +++++- tests/prompt_strategies/conftest.py | 2 ++ 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/requirements.txt b/requirements.txt index 21fdda226..565224e92 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,7 +16,7 @@ transformers==5.0.0 accelerate==1.12.0 datasets==4.5.0 deepspeed>=0.18.3 -trl==0.27.0 +trl==0.27.1 hf_xet==1.2.0 kernels==0.11.5 diff --git a/src/axolotl/cli/__init__.py b/src/axolotl/cli/__init__.py index fa647be65..6d0754806 100644 --- a/src/axolotl/cli/__init__.py +++ b/src/axolotl/cli/__init__.py @@ -5,6 +5,6 @@ import os from axolotl.logging_config import configure_logging os.environ.setdefault("TOKENIZERS_PARALLELISM", "false") -os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1") +os.environ.setdefault("HF_XET_HIGH_PERFORMANCE", "1") configure_logging() diff --git a/src/axolotl/core/trainers/trl.py b/src/axolotl/core/trainers/trl.py index c5f19a6fe..bc49754be 100644 --- a/src/axolotl/core/trainers/trl.py +++ b/src/axolotl/core/trainers/trl.py @@ -1,12 +1,10 @@ """Module for TRL RL trainers""" -from trl import ( - CPOTrainer, - KTOTrainer, - ORPOTrainer, - PRMTrainer, - RewardTrainer, -) +from trl import RewardTrainer +from trl.experimental.cpo import CPOTrainer +from trl.experimental.kto import KTOTrainer +from trl.experimental.orpo import ORPOTrainer +from trl.experimental.prm import PRMTrainer from axolotl.core.trainers.mixins import DistributedParallelMixin, RngLoaderMixin from axolotl.core.trainers.mixins.optimizer import OptimizerInitMixin, OptimizerMixin diff --git a/src/axolotl/core/training_args.py b/src/axolotl/core/training_args.py index d5be9fc62..2a155e5ef 100644 --- a/src/axolotl/core/training_args.py +++ b/src/axolotl/core/training_args.py @@ -8,7 +8,11 @@ from dataclasses import dataclass, field from typing import Optional, Type from transformers import TrainingArguments -from trl import CPOConfig, KTOConfig, ORPOConfig, PRMConfig, RewardConfig +from trl import RewardConfig +from trl.experimental.cpo import CPOConfig +from trl.experimental.kto import KTOConfig +from trl.experimental.orpo import ORPOConfig +from trl.experimental.prm import PRMConfig from axolotl.integrations.config import merge_training_args diff --git a/tests/prompt_strategies/conftest.py b/tests/prompt_strategies/conftest.py index 0af7b3e93..7c4475ab5 100644 --- a/tests/prompt_strategies/conftest.py +++ b/tests/prompt_strategies/conftest.py @@ -141,6 +141,7 @@ def fixture_phi35_tokenizer(): @pytest.fixture(name="phi4_tokenizer", scope="session", autouse=True) +@enable_hf_offline def fixture_phi4_tokenizer(): tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-4-reasoning") return tokenizer @@ -178,6 +179,7 @@ def fixture_devstral_1_1_tokenizer(): @pytest.fixture(name="qwen3_tokenizer") +@enable_hf_offline def qwen3_tokenizer_fixture( download_qwen3_half_billion_model, ): # pylint: disable=unused-argument,redefined-outer-name