diff --git a/src/axolotl/prompt_strategies/dpo/mistral.py b/src/axolotl/prompt_strategies/dpo/mistral.py new file mode 100644 index 000000000..49e948fcd --- /dev/null +++ b/src/axolotl/prompt_strategies/dpo/mistral.py @@ -0,0 +1,30 @@ +""" +DPO strategies for mistral instruct +""" + + +def prompt_pairs(cfg): # pylint: disable=possibly-unused-variable,unused-argument + def transform_fn(sample): + sample["prompt"] = f"[INST]{sample['prompt']}[/INST]" + sample["chosen"] = f"{sample['chosen']}" + sample["rejected"] = f"{sample['rejected']}" + return sample + + return transform_fn + + +def argilla_chat( + cfg, + **kwargs, +): # pylint: disable=possibly-unused-variable,unused-argument + """ + for argilla/dpo-mix-7k conversations + """ + + def transform_fn(sample): + sample["prompt"] = f"[INST] {sample['chosen'][0]['content']} [/INST]" + sample["chosen"] = f"{sample['chosen'][1]['content']}" + sample["rejected"] = f"{sample['rejected'][1]['content']}" + return sample + + return transform_fn diff --git a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py index 53d60e76c..7c00500b9 100644 --- a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py +++ b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py @@ -575,6 +575,7 @@ class AxolotlInputConfig( neftune_noise_alpha: Optional[float] = None orpo_alpha: Optional[float] = None + dpo_beta: Optional[float] = None max_memory: Optional[ Dict[Union[int, Literal["cpu", "disk"]], Union[int, str]]