|
|
|
|
@@ -758,6 +758,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
|
|
|
|
<ul class="collapse">
|
|
|
|
|
<li><a href="#classes" id="toc-classes" class="nav-link" data-scroll-target="#classes">Classes</a>
|
|
|
|
|
<ul class="collapse">
|
|
|
|
|
<li><a href="#axolotl.core.trainers.grpo.trainer.AxolotlAsyncGRPOTrainer" id="toc-axolotl.core.trainers.grpo.trainer.AxolotlAsyncGRPOTrainer" class="nav-link" data-scroll-target="#axolotl.core.trainers.grpo.trainer.AxolotlAsyncGRPOTrainer">AxolotlAsyncGRPOTrainer</a></li>
|
|
|
|
|
<li><a href="#axolotl.core.trainers.grpo.trainer.AxolotlGRPOSequenceParallelTrainer" id="toc-axolotl.core.trainers.grpo.trainer.AxolotlGRPOSequenceParallelTrainer" class="nav-link" data-scroll-target="#axolotl.core.trainers.grpo.trainer.AxolotlGRPOSequenceParallelTrainer">AxolotlGRPOSequenceParallelTrainer</a></li>
|
|
|
|
|
<li><a href="#axolotl.core.trainers.grpo.trainer.AxolotlGRPOTrainer" id="toc-axolotl.core.trainers.grpo.trainer.AxolotlGRPOTrainer" class="nav-link" data-scroll-target="#axolotl.core.trainers.grpo.trainer.AxolotlGRPOTrainer">AxolotlGRPOTrainer</a></li>
|
|
|
|
|
</ul></li>
|
|
|
|
|
@@ -786,30 +787,39 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
|
|
|
|
</thead>
|
|
|
|
|
<tbody>
|
|
|
|
|
<tr class="odd">
|
|
|
|
|
<td><a href="#axolotl.core.trainers.grpo.trainer.AxolotlAsyncGRPOTrainer">AxolotlAsyncGRPOTrainer</a></td>
|
|
|
|
|
<td>Extend AsyncGRPOTrainer with axolotl helpers</td>
|
|
|
|
|
</tr>
|
|
|
|
|
<tr class="even">
|
|
|
|
|
<td><a href="#axolotl.core.trainers.grpo.trainer.AxolotlGRPOSequenceParallelTrainer">AxolotlGRPOSequenceParallelTrainer</a></td>
|
|
|
|
|
<td>Extend the base GRPOTrainer for sequence parallelism handling</td>
|
|
|
|
|
</tr>
|
|
|
|
|
<tr class="even">
|
|
|
|
|
<tr class="odd">
|
|
|
|
|
<td><a href="#axolotl.core.trainers.grpo.trainer.AxolotlGRPOTrainer">AxolotlGRPOTrainer</a></td>
|
|
|
|
|
<td>Extend the base GRPOTrainer for axolotl helpers</td>
|
|
|
|
|
</tr>
|
|
|
|
|
</tbody>
|
|
|
|
|
</table>
|
|
|
|
|
<section id="axolotl.core.trainers.grpo.trainer.AxolotlAsyncGRPOTrainer" class="level3">
|
|
|
|
|
<h3 class="anchored" data-anchor-id="axolotl.core.trainers.grpo.trainer.AxolotlAsyncGRPOTrainer">AxolotlAsyncGRPOTrainer</h3>
|
|
|
|
|
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>core.trainers.grpo.trainer.AxolotlAsyncGRPOTrainer(<span class="op">*</span>args, <span class="op">**</span>kwargs)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
|
|
|
|
<p>Extend AsyncGRPOTrainer with axolotl helpers</p>
|
|
|
|
|
</section>
|
|
|
|
|
<section id="axolotl.core.trainers.grpo.trainer.AxolotlGRPOSequenceParallelTrainer" class="level3">
|
|
|
|
|
<h3 class="anchored" data-anchor-id="axolotl.core.trainers.grpo.trainer.AxolotlGRPOSequenceParallelTrainer">AxolotlGRPOSequenceParallelTrainer</h3>
|
|
|
|
|
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>core.trainers.grpo.trainer.AxolotlGRPOSequenceParallelTrainer(</span>
|
|
|
|
|
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a> model,</span>
|
|
|
|
|
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a> reward_funcs,</span>
|
|
|
|
|
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a> args<span class="op">=</span><span class="va">None</span>,</span>
|
|
|
|
|
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a> train_dataset<span class="op">=</span><span class="va">None</span>,</span>
|
|
|
|
|
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a> eval_dataset<span class="op">=</span><span class="va">None</span>,</span>
|
|
|
|
|
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a> processing_class<span class="op">=</span><span class="va">None</span>,</span>
|
|
|
|
|
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a> reward_processing_classes<span class="op">=</span><span class="va">None</span>,</span>
|
|
|
|
|
<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a> callbacks<span class="op">=</span><span class="va">None</span>,</span>
|
|
|
|
|
<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a> optimizers<span class="op">=</span>(<span class="va">None</span>, <span class="va">None</span>),</span>
|
|
|
|
|
<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a> peft_config<span class="op">=</span><span class="va">None</span>,</span>
|
|
|
|
|
<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a> optimizer_cls_and_kwargs<span class="op">=</span><span class="va">None</span>,</span>
|
|
|
|
|
<span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
|
|
|
|
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>core.trainers.grpo.trainer.AxolotlGRPOSequenceParallelTrainer(</span>
|
|
|
|
|
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a> model,</span>
|
|
|
|
|
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a> reward_funcs,</span>
|
|
|
|
|
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a> args<span class="op">=</span><span class="va">None</span>,</span>
|
|
|
|
|
<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a> train_dataset<span class="op">=</span><span class="va">None</span>,</span>
|
|
|
|
|
<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a> eval_dataset<span class="op">=</span><span class="va">None</span>,</span>
|
|
|
|
|
<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a> processing_class<span class="op">=</span><span class="va">None</span>,</span>
|
|
|
|
|
<span id="cb2-8"><a href="#cb2-8" aria-hidden="true" tabindex="-1"></a> reward_processing_classes<span class="op">=</span><span class="va">None</span>,</span>
|
|
|
|
|
<span id="cb2-9"><a href="#cb2-9" aria-hidden="true" tabindex="-1"></a> callbacks<span class="op">=</span><span class="va">None</span>,</span>
|
|
|
|
|
<span id="cb2-10"><a href="#cb2-10" aria-hidden="true" tabindex="-1"></a> optimizers<span class="op">=</span>(<span class="va">None</span>, <span class="va">None</span>),</span>
|
|
|
|
|
<span id="cb2-11"><a href="#cb2-11" aria-hidden="true" tabindex="-1"></a> peft_config<span class="op">=</span><span class="va">None</span>,</span>
|
|
|
|
|
<span id="cb2-12"><a href="#cb2-12" aria-hidden="true" tabindex="-1"></a> optimizer_cls_and_kwargs<span class="op">=</span><span class="va">None</span>,</span>
|
|
|
|
|
<span id="cb2-13"><a href="#cb2-13" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
|
|
|
|
<p>Extend the base GRPOTrainer for sequence parallelism handling</p>
|
|
|
|
|
<section id="methods" class="level4">
|
|
|
|
|
<h4 class="anchored" data-anchor-id="methods">Methods</h4>
|
|
|
|
|
@@ -829,15 +839,15 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
|
|
|
|
</table>
|
|
|
|
|
<section id="axolotl.core.trainers.grpo.trainer.AxolotlGRPOSequenceParallelTrainer.get_train_dataloader" class="level5">
|
|
|
|
|
<h5 class="anchored" data-anchor-id="axolotl.core.trainers.grpo.trainer.AxolotlGRPOSequenceParallelTrainer.get_train_dataloader">get_train_dataloader</h5>
|
|
|
|
|
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>core.trainers.grpo.trainer.AxolotlGRPOSequenceParallelTrainer.get_train_dataloader(</span>
|
|
|
|
|
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
|
|
|
|
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>core.trainers.grpo.trainer.AxolotlGRPOSequenceParallelTrainer.get_train_dataloader(</span>
|
|
|
|
|
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
|
|
|
|
<p>Get dataloader for training</p>
|
|
|
|
|
</section>
|
|
|
|
|
</section>
|
|
|
|
|
</section>
|
|
|
|
|
<section id="axolotl.core.trainers.grpo.trainer.AxolotlGRPOTrainer" class="level3">
|
|
|
|
|
<h3 class="anchored" data-anchor-id="axolotl.core.trainers.grpo.trainer.AxolotlGRPOTrainer">AxolotlGRPOTrainer</h3>
|
|
|
|
|
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>core.trainers.grpo.trainer.AxolotlGRPOTrainer(<span class="op">*</span>args, <span class="op">**</span>kwargs)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
|
|
|
|
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>core.trainers.grpo.trainer.AxolotlGRPOTrainer(<span class="op">*</span>args, <span class="op">**</span>kwargs)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
|
|
|
|
<p>Extend the base GRPOTrainer for axolotl helpers</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|