diff --git a/.nojekyll b/.nojekyll index 33021b61e..b46f4f982 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -4d136695 \ No newline at end of file +525f6d2a \ No newline at end of file diff --git a/docs/api/prompt_strategies.dpo.chat_template.html b/docs/api/prompt_strategies.dpo.chat_template.html index 895cc84a2..d4b93c442 100644 --- a/docs/api/prompt_strategies.dpo.chat_template.html +++ b/docs/api/prompt_strategies.dpo.chat_template.html @@ -20,6 +20,41 @@ ul.task-list li input[type="checkbox"] { margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ vertical-align: middle; } +/* CSS for syntax highlighting */ +html { -webkit-text-size-adjust: 100%; } +pre > code.sourceCode { white-space: pre; position: relative; } +pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } +pre > code.sourceCode > span:empty { height: 1.2em; } +.sourceCode { overflow: visible; } +code.sourceCode > span { color: inherit; text-decoration: inherit; } +div.sourceCode { margin: 1em 0; } +pre.sourceCode { margin: 0; } +@media screen { +div.sourceCode { overflow: auto; } +} +@media print { +pre > code.sourceCode { white-space: pre-wrap; } +pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } +} +pre.numberSource code + { counter-reset: source-line 0; } +pre.numberSource code > span + { position: relative; left: -4em; counter-increment: source-line; } +pre.numberSource code > span > a:first-child::before + { content: counter(source-line); + position: relative; left: -1em; text-align: right; vertical-align: baseline; + border: none; display: inline-block; + -webkit-touch-callout: none; -webkit-user-select: none; + -khtml-user-select: none; -moz-user-select: none; + -ms-user-select: none; user-select: none; + padding: 0 4px; width: 4em; + } +pre.numberSource { margin-left: 3em; padding-left: 4px; } +div.sourceCode + { } +@media screen { +pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } +} @@ -474,7 +509,13 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
prompt_strategies.dpo.chat_template
DPO prompt strategies for using tokenizer chat templates.
+| Name | +Description | +
|---|---|
| argilla_chat | +DPO chat template strategy for argilla-style datasets. | +
prompt_strategies.dpo.chat_template.argilla_chat(cfg, dataset_idx=0, **kwargs)DPO chat template strategy for argilla-style datasets.
+For argilla-style datasets where chosen/rejected contain full conversations +instead of single response messages. Extracts the conversation history from +the chosen field and formats both chosen/rejected responses using the +configured chat template.
+| Name | +Type | +Description | +Default | +
|---|---|---|---|
| cfg | ++ | Configuration object containing chat_template and dataset settings | +required | +
| dataset_idx | ++ | Index of the dataset in the config (default: 0) | +0 |
+
| **kwargs | ++ | Additional keyword arguments (unused) | +{} |
+
| Name | +Type | +Description | +
|---|---|---|
| tuple | ++ | (transform_fn, dataset_kwargs) where: - transform_fn: Function to transform dataset samples - dataset_kwargs: Dict with ‘remove_columns’ specifying columns to drop | +
{ +“chosen”: [ +{“role”: “user”, “content”: “…”}, +{“role”: “assistant”, “content”: “…”} +], +“rejected”: [ +{“role”: “user”, “content”: “…”}, +{“role”: “assistant”, “content”: “…”} +] +}
+{
+ "chosen": [
+ {"role": "user", "content": "..."},
+ {"role": "assistant", "content": "..."}
+ ],
+ "rejected": [
+ {"role": "user", "content": "..."},
+ {"role": "assistant", "content": "..."}
+ ]
+}rl: dpo
-datasets:
- - path: ...
- split: train
- type: chat_template.default
- field_messages: "messages"
- field_chosen: "chosen"
- field_rejected: "rejected"
- message_property_mappings:
- role: role
- content: content
- roles:
- user: ["user"]
- assistant: ["assistant"]
- system: ["system"]rl: dpo
+datasets:
+ - path: ...
+ split: train
+ type: chat_template.default
+ field_messages: "messages"
+ field_chosen: "chosen"
+ field_rejected: "rejected"
+ message_property_mappings:
+ role: role
+ content: content
+ roles:
+ user: ["user"]
+ assistant: ["assistant"]
+ system: ["system"]Sample input format:
-{
- "messages": [
- {
- "role": "system",
- "content": "..."
- },
- {
- "role": "user",
- "content": "..."
- },
- // ... more messages
- ],
- "chosen": {
- "role": "assistant",
- "content": "..."
- },
- "rejected": {
- "role": "assistant",
- "content": "..."
- }
-}{
+ "messages": [
+ {
+ "role": "system",
+ "content": "..."
+ },
+ {
+ "role": "user",
+ "content": "..."
+ },
+ // ... more messages
+ ],
+ "chosen": {
+ "role": "assistant",
+ "content": "..."
+ },
+ "rejected": {
+ "role": "assistant",
+ "content": "..."
+ }
+}For custom behaviors,
-rl: dpo
-datasets:
- - path: ...
- split: train
- type:
- field_prompt: "prompt"
- field_system: "system"
- field_chosen: "chosen"
- field_rejected: "rejected"
- prompt_format: "{prompt}"
- chosen_format: "{chosen}"
- rejected_format: "{rejected}"rl: dpo
+datasets:
+ - path: ...
+ split: train
+ type:
+ field_prompt: "prompt"
+ field_system: "system"
+ field_chosen: "chosen"
+ field_rejected: "rejected"
+ prompt_format: "{prompt}"
+ chosen_format: "{chosen}"
+ rejected_format: "{rejected}"The input format is a simple JSON input with customizable fields based on the above config.
-{
- "system": "...", // optional
- "prompt": "...",
- "chosen": "...",
- "rejected": "..."
-}{
+ "system": "...", // optional
+ "prompt": "...",
+ "chosen": "...",
+ "rejected": "..."
+}As IPO is just DPO with a different loss function, all supported dataset formats for DPO are also supported for IPO.
-rl: iporl: ipoPaper: https://arxiv.org/abs/2403.07691
-rl: orpo
-orpo_alpha: 0.1
-remove_unused_columns: false
-
-chat_template: chatml
-datasets:
- - path: argilla/ultrafeedback-binarized-preferences-cleaned
- type: chat_template.argillarl: orpo
+orpo_alpha: 0.1
+remove_unused_columns: false
+
+chat_template: chatml
+datasets:
+ - path: argilla/ultrafeedback-binarized-preferences-cleaned
+ type: chat_template.argillaORPO supports the following types with the following dataset format:
{
- "system": "...", // optional
- "prompt": "...", // if available, will be taken as user message for single-turn instead of from list below
-
- // chosen/rejected should be same till last content and only even-number of alternating user/assistant turns
- "chosen": [
- {"role": "user", "content": "..."},
- {"role": "assistant", "content": "..."}
- ],
- "rejected": [
- {"role": "user", "content": "..."},
- {"role": "assistant", "content": "..."}
- ]
-}{
+ "system": "...", // optional
+ "prompt": "...", // if available, will be taken as user message for single-turn instead of from list below
+
+ // chosen/rejected should be same till last content and only even-number of alternating user/assistant turns
+ "chosen": [
+ {"role": "user", "content": "..."},
+ {"role": "assistant", "content": "..."}
+ ],
+ "rejected": [
+ {"role": "user", "content": "..."},
+ {"role": "assistant", "content": "..."}
+ ]
+}rl: kto
-rl_beta: 0.1 # default
-kto_desirable_weight: 1.0 # default
-kto_undesirable_weight: 1.0 # default
-
-remove_unused_columns: false
-
-datasets:
- - path: argilla/ultrafeedback-binarized-preferences-cleaned-kto
- type: llama3.ultra
- split: train
-
-gradient_checkpointing: true
-gradient_checkpointing_kwargs:
- use_reentrant: truerl: kto
+rl_beta: 0.1 # default
+kto_desirable_weight: 1.0 # default
+kto_undesirable_weight: 1.0 # default
+
+remove_unused_columns: false
+
+datasets:
+ - path: argilla/ultrafeedback-binarized-preferences-cleaned-kto
+ type: llama3.ultra
+ split: train
+
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+ use_reentrant: trueKTO supports the following types with the following dataset format:
{
- "system": "...", // optional
- "instruction": "...",
- "completion": "..."
-}{
+ "system": "...", // optional
+ "instruction": "...",
+ "completion": "..."
+}{
- "chosen": [
- {"role": "user", "content": "..."}
- ],
- "completion": [
- {"role": "assistant", "content": "..."}
- ]
-}{
+ "chosen": [
+ {"role": "user", "content": "..."}
+ ],
+ "completion": [
+ {"role": "assistant", "content": "..."}
+ ]
+}{
- "system": "...", // optional
- "question": "...",
- "completion": "..."
-}{
"system": "...", // optional
- "prompt": "...",
+ "question": "...",
"completion": "..."
}{
"system": "...", // optional
"prompt": "...",
"completion": "..."
}{
"system": "...", // optional
- "instruction": "...",
+ "prompt": "...",
"completion": "..."
}{
+ "system": "...", // optional
+ "instruction": "...",
+ "completion": "..."
+}{
- "completion": [
- {"role": "user", "content": "..."},
- {"role": "assistant", "content": "..."}
- ]
-}{
+ "completion": [
+ {"role": "user", "content": "..."},
+ {"role": "assistant", "content": "..."}
+ ]
+}{
- "system": "...", // optional
- "question": "...",
- "completion": "..."
-}{
"system": "...", // optional
- "prompt": "...",
+ "question": "...",
"completion": "..."
}{
"system": "...", // optional
"prompt": "...",
"completion": "..."
}{
+ "system": "...", // optional
+ "prompt": "...",
+ "completion": "..."
+}For custom behaviors,
-rl: kto
-datasets:
- - path: ...
- split: train
- type:
- field_prompt: "prompt"
- field_system: "system"
- field_completion: "completion"
- field_label: "label"
- prompt_format: "{prompt}"
- completion_format: "{completion}"rl: kto
+datasets:
+ - path: ...
+ split: train
+ type:
+ field_prompt: "prompt"
+ field_system: "system"
+ field_completion: "completion"
+ field_label: "label"
+ prompt_format: "{prompt}"
+ completion_format: "{completion}"The input format is a simple JSON input with customizable fields based on the above config.
-{
- "system": "...", // optional
- "prompt": "...",
- "completion": "...",
- "label": "..."
-}{
+ "system": "...", // optional
+ "prompt": "...",
+ "completion": "...",
+ "label": "..."
+}Make sure you’ve installed the correct version of vLLM by including it as an extra when installing axolotl, e.g. pip install axolotl[vllm].
base_model: Qwen/Qwen2.5-1.5B-Instruct
-
-vllm:
- host: 0.0.0.0
- port: 8000
- tensor_parallel_size: 2
- gpu_memory_utilization: 0.85
- dtype: auto
- # max_model_len: # you may find it useful to set the vLLM model context length if you know this beforehand
-
-rl: grpo
-trl:
- use_vllm: true
- vllm_server_host: 0.0.0.0
- vllm_server_port: 8000
- vllm_server_timeout: 300CUDA_VISIBLE_DEVICES=2,3 axolotl vllm-serve grpo.yamlbase_model: Qwen/Qwen2.5-1.5B-Instruct
+
+vllm:
+ host: 0.0.0.0
+ port: 8000
+ tensor_parallel_size: 2
+ gpu_memory_utilization: 0.85
+ dtype: auto
+ # max_model_len: # you may find it useful to set the vLLM model context length if you know this beforehand
+
+rl: grpo
+trl:
+ use_vllm: true
+ vllm_server_host: 0.0.0.0
+ vllm_server_port: 8000
+ vllm_server_timeout: 300CUDA_VISIBLE_DEVICES=2,3 axolotl vllm-serve grpo.yamlYour vLLM instance will now attempt to spin up, and it’s time to kick off training utilizing our remaining two GPUs. In another terminal, execute:
CUDA_VISIBLE_DEVICES=0,1 axolotl train grpo.yaml --num-processes 2CUDA_VISIBLE_DEVICES=0,1 axolotl train grpo.yaml --num-processes 2