consistency w sppo -> sppo_hard
This commit is contained in:
@@ -133,7 +133,7 @@ class RLType(str, Enum):
|
||||
ipo = "ipo" # pylint: disable=invalid-name
|
||||
kto_pair = "kto_pair" # pylint: disable=invalid-name
|
||||
orpo = "orpo" # pylint: disable=invalid-name
|
||||
sppo = "sppo_hard" # pylint: disable=invalid-name
|
||||
sppo_hard = "sppo_hard" # pylint: disable=invalid-name
|
||||
|
||||
|
||||
class ChatTemplate(str, Enum):
|
||||
|
||||
Reference in New Issue
Block a user