fix: unify default for conversations_field [skip-e2e] (#3070)

* fix: unify default for conversations_field

* fix: suggestion to remove defaults
This commit is contained in:
NanoCode012
2025-09-23 21:22:15 +07:00
committed by GitHub
parent 08d831c3d5
commit 55d1be2ae6
12 changed files with 8 additions and 41 deletions

View File

@@ -42,7 +42,6 @@ datasets:
- path: HuggingFaceH4/llava-instruct-mix-vsft - path: HuggingFaceH4/llava-instruct-mix-vsft
type: chat_template type: chat_template
split: train[:1%] split: train[:1%]
field_messages: messages
# (optional) if doing lora, only finetune the Language model, # (optional) if doing lora, only finetune the Language model,
# leave the vision model and vision tower frozen # leave the vision model and vision tower frozen

View File

@@ -9,10 +9,6 @@ strict: false
datasets: datasets:
- path: fozziethebeat/alpaca_messages_2k_test - path: fozziethebeat/alpaca_messages_2k_test
type: chat_template type: chat_template
field_messages: messages
message_property_mappings:
role: role
content: content
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05

View File

@@ -9,10 +9,6 @@ strict: false
datasets: datasets:
- path: fozziethebeat/alpaca_messages_2k_test - path: fozziethebeat/alpaca_messages_2k_test
type: chat_template type: chat_template
field_messages: messages
message_property_mappings:
role: role
content: content
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05

View File

@@ -9,10 +9,6 @@ strict: false
datasets: datasets:
- path: fozziethebeat/alpaca_messages_2k_test - path: fozziethebeat/alpaca_messages_2k_test
type: chat_template type: chat_template
field_messages: messages
message_property_mappings:
role: role
content: content
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05

View File

@@ -18,7 +18,7 @@ datasets:
- path: HuggingFaceH4/llava-instruct-mix-vsft - path: HuggingFaceH4/llava-instruct-mix-vsft
type: chat_template type: chat_template
split: train[:1%] split: train[:1%]
field_messages: messages
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.01 val_set_size: 0.01
output_dir: ./outputs/out output_dir: ./outputs/out

View File

@@ -12,15 +12,6 @@ chat_template: llama3
datasets: datasets:
- path: fozziethebeat/alpaca_messages_2k_test - path: fozziethebeat/alpaca_messages_2k_test
type: chat_template type: chat_template
field_messages: messages
message_property_mappings:
role: role
content: content
roles:
user:
- user
assistant:
- assistant
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05

View File

@@ -46,7 +46,6 @@ datasets:
- path: HuggingFaceH4/llava-instruct-mix-vsft - path: HuggingFaceH4/llava-instruct-mix-vsft
type: chat_template type: chat_template
split: train[:1%] split: train[:1%]
field_messages: messages
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.0 val_set_size: 0.0

View File

@@ -45,7 +45,6 @@ datasets:
- path: HuggingFaceH4/llava-instruct-mix-vsft - path: HuggingFaceH4/llava-instruct-mix-vsft
type: chat_template type: chat_template
split: train[:1%] split: train[:1%]
field_messages: messages
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.0 val_set_size: 0.0

View File

@@ -12,15 +12,6 @@ chat_template: phi_3
datasets: datasets:
- path: fozziethebeat/alpaca_messages_2k_test - path: fozziethebeat/alpaca_messages_2k_test
type: chat_template type: chat_template
field_messages: messages
message_property_mappings:
role: role
content: content
roles:
user:
- user
assistant:
- assistant
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05

View File

@@ -11,7 +11,7 @@ datasets:
- path: HuggingFaceH4/llava-instruct-mix-vsft - path: HuggingFaceH4/llava-instruct-mix-vsft
type: chat_template type: chat_template
split: train[:1%] split: train[:1%]
field_messages: messages
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.0 val_set_size: 0.0
output_dir: ./outputs/out output_dir: ./outputs/out

View File

@@ -11,7 +11,7 @@ datasets:
- path: HuggingFaceH4/llava-instruct-mix-vsft - path: HuggingFaceH4/llava-instruct-mix-vsft
type: chat_template type: chat_template
split: train[:1%] split: train[:1%]
field_messages: messages
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.0 val_set_size: 0.0
output_dir: ./outputs/out output_dir: ./outputs/out

View File

@@ -8,7 +8,7 @@ from typing import Any, Mapping
def chat_message_transform_builder( def chat_message_transform_builder(
train_on_inputs=False, train_on_inputs=False,
conversations_field: str = "conversations", conversations_field: str = "messages",
message_field_role: str | list[str] | None = None, # commonly "role" message_field_role: str | list[str] | None = None, # commonly "role"
message_field_content: str | list[str] | None = None, # commonly "content" message_field_content: str | list[str] | None = None, # commonly "content"
message_field_training: str | list[str] | None = None, # commonly "weight" message_field_training: str | list[str] | None = None, # commonly "weight"
@@ -20,13 +20,13 @@ def chat_message_transform_builder(
If True, the transform will train on the inputs. If False, the transform will train on the targets. If True, the transform will train on the inputs. If False, the transform will train on the targets.
Defaults to False. Defaults to False.
conversations_field (str, optional): conversations_field (str, optional):
The field name of the conversations. Defaults to "conversations". The field name of the conversations. Defaults to "messages".
message_field_role (str | list[str], optional): message_field_role (str | list[str], optional):
The field name of the role. Defaults to "role". The field name of the role.
message_field_content (str | list[str], optional): message_field_content (str | list[str], optional):
The field name of the message content. Defaults to "content". The field name of the message content.
message_field_training (str | list[str], optional): message_field_training (str | list[str], optional):
The field name of the train/weight. Defaults to "weight". The field name of the train/weight.
Returns: Returns:
Callable: Callable: