diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml index 4c1b0463a..d215ea44c 100644 --- a/.github/workflows/base.yml +++ b/.github/workflows/base.yml @@ -5,7 +5,7 @@ on: jobs: build-base: - if: github.repository_owner == 'OpenAccess-AI-Collective' + if: github.repository_owner == 'axolotl-ai-cloud' # this job needs to be run on self-hosted GPU runners... runs-on: axolotl-gpu-runner strategy: diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d0d028982..8bced628d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -8,7 +8,7 @@ on: jobs: build-axolotl: - if: ${{ ! contains(github.event.commits[0].message, '[skip docker]]') && github.repository_owner == 'OpenAccess-AI-Collective' }} + if: ${{ ! contains(github.event.commits[0].message, '[skip docker]]') && github.repository_owner == 'axolotl-ai-cloud' }} strategy: fail-fast: false matrix: @@ -70,7 +70,7 @@ jobs: build-axolotl-cloud: needs: build-axolotl - if: ${{ ! contains(github.event.commits[0].message, '[skip docker]]') && github.repository_owner == 'OpenAccess-AI-Collective' }} + if: ${{ ! contains(github.event.commits[0].message, '[skip docker]]') && github.repository_owner == 'axolotl-ai-cloud' }} # this job needs to be run on self-hosted GPU runners... strategy: matrix: @@ -128,7 +128,7 @@ jobs: build-axolotl-cloud-no-tmux: needs: build-axolotl - if: ${{ ! contains(github.event.commits[0].message, '[skip docker]]') && github.repository_owner == 'OpenAccess-AI-Collective' }} + if: ${{ ! contains(github.event.commits[0].message, '[skip docker]]') && github.repository_owner == 'axolotl-ai-cloud' }} # this job needs to be run on self-hosted GPU runners... strategy: matrix: diff --git a/.github/workflows/nightlies.yml b/.github/workflows/nightlies.yml index f668e5f65..6dc22b6bf 100644 --- a/.github/workflows/nightlies.yml +++ b/.github/workflows/nightlies.yml @@ -7,7 +7,7 @@ on: jobs: build-axolotl: - if: ${{ ! contains(github.event.commits[0].message, '[skip docker]]') && github.repository_owner == 'OpenAccess-AI-Collective' }} + if: ${{ ! contains(github.event.commits[0].message, '[skip docker]]') && github.repository_owner == 'axolotl-ai-cloud' }} strategy: fail-fast: false matrix: @@ -70,7 +70,7 @@ jobs: build-axolotl-cloud: needs: build-axolotl - if: ${{ ! contains(github.event.commits[0].message, '[skip docker]]') && github.repository_owner == 'OpenAccess-AI-Collective' }} + if: ${{ ! contains(github.event.commits[0].message, '[skip docker]]') && github.repository_owner == 'axolotl-ai-cloud' }} # this job needs to be run on self-hosted GPU runners... strategy: matrix: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index db9173cac..2e2d0968d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -58,7 +58,7 @@ jobs: pytest --ignore=tests/e2e/ tests/ docker-e2e-tests: - if: github.repository_owner == 'OpenAccess-AI-Collective' + if: github.repository_owner == 'axolotl-ai-cloud' # this job needs to be run on self-hosted GPU runners... runs-on: [self-hosted, modal] timeout-minutes: 60 diff --git a/docs/config.qmd b/docs/config.qmd index 1c87386a6..e85999978 100644 --- a/docs/config.qmd +++ b/docs/config.qmd @@ -138,7 +138,7 @@ test_datasets: data_files: - /workspace/data/eval.jsonl -# use RL training: 'dpo', 'ipo', 'kto_pair' +# use RL training: 'dpo', 'ipo', 'kto' rl: # Saves the desired chat template to the tokenizer_config.json for easier inferencing diff --git a/requirements.txt b/requirements.txt index ee808de76..c8d168734 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ peft==0.11.1 transformers==4.42.3 tokenizers==0.19.1 bitsandbytes==0.43.1 -accelerate==0.30.1 +accelerate==0.32.0 deepspeed @ git+https://github.com/microsoft/DeepSpeed.git@bc48371c5e1fb8fd70fc79285e66201dbb65679b pydantic==2.6.3 addict @@ -40,6 +40,6 @@ s3fs gcsfs # adlfs -trl @ git+https://github.com/huggingface/trl.git@f18253bf2d747f68acc9cd89da95c85ebf59dbb9 +trl==0.9.6 zstandard==0.22.0 fastcore diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index 0c69f0be6..ec175454e 100755 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -1670,8 +1670,6 @@ class HFRLTrainerBuilder(TrainerBuilderBase): dpo_trainer_kwargs["loss_type"] = "ipo" if self.cfg.dpo_label_smoothing: dpo_trainer_kwargs["label_smoothing"] = self.cfg.dpo_label_smoothing - elif self.cfg.rl == "kto_pair": - dpo_trainer_kwargs["loss_type"] = "kto_pair" if self.eval_dataset: dpo_trainer_kwargs["eval_dataset"] = self.eval_dataset if self.cfg.adapter and self.peft_config: @@ -1680,7 +1678,7 @@ class HFRLTrainerBuilder(TrainerBuilderBase): dpo_trainer_kwargs[ "precompute_ref_log_probs" ] = self.cfg.precompute_ref_log_probs - if self.cfg.rl in ["dpo", "ipo", "kto_pair"]: + if self.cfg.rl in ["dpo", "ipo"]: trainer_cls = AxolotlDPOTrainer dpo_trainer_kwargs["beta"] = self.cfg.rl_beta or 0.1 trainer_cls_args = [self.model, self.model_ref] @@ -1695,7 +1693,7 @@ class HFRLTrainerBuilder(TrainerBuilderBase): elif self.cfg.rl == "orpo": trainer_cls = AxolotlORPOTrainer trainer_cls_args = [self.model] - elif self.cfg.rl == "kto": + elif self.cfg.rl in ["kto"]: trainer_cls = AxolotlKTOTrainer trainer_cls_args = [self.model] else: diff --git a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py index 1747c46b1..3cac4f839 100644 --- a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py +++ b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py @@ -165,7 +165,6 @@ class RLType(str, Enum): dpo = "dpo" # pylint: disable=invalid-name ipo = "ipo" # pylint: disable=invalid-name - kto_pair = "kto_pair" # pylint: disable=invalid-name orpo = "orpo" # pylint: disable=invalid-name kto = "kto" # pylint: disable=invalid-name diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 4e0d23c4f..d479d425d 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -805,11 +805,7 @@ def load_model( if not reference_model or cfg.lora_model_dir: # if we're not loading the reference model, then we're loading the model for training # then the dpo trainer doesn't want the peft model loaded over it, it just wants the lora/peft config - if ( - cfg.adapter - and cfg.rl in ["dpo", "ipo", "kto_pair", "kto"] - and not cfg.merge_lora - ): + if cfg.adapter and cfg.rl in ["dpo", "ipo", "kto"] and not cfg.merge_lora: _, lora_config = load_lora(model, cfg, inference=False, config_only=True) else: model, lora_config = load_adapter(model, cfg, cfg.adapter) diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index 6760dc488..a16baaae0 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -427,7 +427,7 @@ def prepare_optim_env(cfg): def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_steps): - if cfg.rl in ["dpo", "ipo", "kto_pair", "orpo", "kto"]: + if cfg.rl in ["dpo", "ipo", "orpo", "kto"]: trainer_builder = HFRLTrainerBuilder(cfg, model[0], tokenizer) trainer_builder.model_ref = model[1] trainer_builder.peft_config = model[2] diff --git a/tests/e2e/test_dpo.py b/tests/e2e/test_dpo.py index 5f03e6bc1..1c354e9a0 100644 --- a/tests/e2e/test_dpo.py +++ b/tests/e2e/test_dpo.py @@ -115,6 +115,7 @@ class TestDPOLlamaLora(unittest.TestCase): train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) assert (Path(temp_dir) / "checkpoint-20/adapter_model.safetensors").exists() + @pytest.mark.skip("kto_pair no longer supported in trl") @with_temp_dir def test_kto_pair_lora(self, temp_dir): # pylint: disable=duplicate-code