Compare commits

...

1 Commits

Author SHA1 Message Date
Wing Lian
21ba1cd3f1 wire up squash_position_ids 2025-08-23 16:21:28 -04:00
2 changed files with 8 additions and 0 deletions

View File

@@ -476,6 +476,8 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
) )
): ):
collator = V2BatchSamplerDataCollatorForSeq2Seq collator = V2BatchSamplerDataCollatorForSeq2Seq
if self.cfg.squash_position_ids:
kwargs["squash_position_ids"] = True
else: else:
collator = BatchSamplerDataCollatorForSeq2Seq collator = BatchSamplerDataCollatorForSeq2Seq
else: else:

View File

@@ -459,6 +459,12 @@ class AxolotlInputConfig(
"description": "The multiprocessing start method to use for packing. Should be 'fork', 'spawn' or 'forkserver'" "description": "The multiprocessing start method to use for packing. Should be 'fork', 'spawn' or 'forkserver'"
}, },
) )
squash_position_ids: bool | None = Field(
default=None,
json_schema_extra={
"description": "Whether to squash position_ids for packing, effectively extending context length."
},
)
eval_sample_packing: bool | None = Field( eval_sample_packing: bool | None = Field(
default=None, default=None,
json_schema_extra={ json_schema_extra={