Built site for gh-pages

This commit is contained in:
Quarto GHA Workflow Runner
2025-05-07 00:10:22 +00:00
parent 29394ec9f8
commit 299b5a3e5e
174 changed files with 4531 additions and 3377 deletions

View File

@@ -74,7 +74,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2b3e328b71be8d25427581baeb23079b.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
<link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="../../site_libs/bootstrap/bootstrap-653e373a27bf50c3d267316c2b2b59fb.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
<link href="../../site_libs/bootstrap/bootstrap-ce762b396f898894284bb8eeee180359.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
<script id="quarto-search-options" type="application/json">{
"location": "navbar",
"copy-button": false,
@@ -455,6 +455,9 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<li><a href="#functions" id="toc-functions" class="nav-link" data-scroll-target="#functions">Functions</a>
<ul class="collapse">
<li><a href="#axolotl.utils.samplers.multipack.allocate_sequentially" id="toc-axolotl.utils.samplers.multipack.allocate_sequentially" class="nav-link" data-scroll-target="#axolotl.utils.samplers.multipack.allocate_sequentially">allocate_sequentially</a></li>
<li><a href="#axolotl.utils.samplers.multipack.ffd_check" id="toc-axolotl.utils.samplers.multipack.ffd_check" class="nav-link" data-scroll-target="#axolotl.utils.samplers.multipack.ffd_check">ffd_check</a></li>
<li><a href="#axolotl.utils.samplers.multipack.pack_group" id="toc-axolotl.utils.samplers.multipack.pack_group" class="nav-link" data-scroll-target="#axolotl.utils.samplers.multipack.pack_group">pack_group</a></li>
<li><a href="#axolotl.utils.samplers.multipack.pack_parallel" id="toc-axolotl.utils.samplers.multipack.pack_parallel" class="nav-link" data-scroll-target="#axolotl.utils.samplers.multipack.pack_parallel">pack_parallel</a></li>
</ul></li>
</ul></li>
</ul>
@@ -469,7 +472,8 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<section id="axolotl.utils.samplers.multipack" class="level1">
<h1>utils.samplers.multipack</h1>
<p><code>utils.samplers.multipack</code></p>
<p>Multipack Batch Sampler</p>
<p>Multipack Batch Sampler - An efficient batch sampler for packing variable-length sequences
into fixed-capacity batches to optimize memory usage and training throughput.</p>
<section id="classes" class="level2">
<h2 class="anchored" data-anchor-id="classes">Classes</h2>
<table class="caption-top table">
@@ -482,7 +486,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<tbody>
<tr class="odd">
<td><a href="#axolotl.utils.samplers.multipack.MultipackBatchSampler">MultipackBatchSampler</a></td>
<td>Batch sampler class for multipack</td>
<td>Batch sampler class for efficient packing of variable-length sequences</td>
</tr>
</tbody>
</table>
@@ -498,9 +502,134 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a> drop_last<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a> num_count_samples<span class="op">=</span><span class="dv">16</span>,</span>
<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a> sequential<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a> <span class="op">**</span>kwargs,</span>
<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Batch sampler class for multipack</p>
<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a> group_size<span class="op">=</span><span class="dv">100000</span>,</span>
<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a> bin_size<span class="op">=</span><span class="dv">200</span>,</span>
<span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a> num_processes<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-14"><a href="#cb1-14" aria-hidden="true" tabindex="-1"></a> safe_mode<span class="op">=</span><span class="va">True</span>,</span>
<span id="cb1-15"><a href="#cb1-15" aria-hidden="true" tabindex="-1"></a> <span class="op">**</span>kwargs,</span>
<span id="cb1-16"><a href="#cb1-16" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Batch sampler class for efficient packing of variable-length sequences</p>
<p>This sampler packs sequences into fixed-capacity bins (batches) to maximize
GPU memory utilization and training throughput by reducing padding.</p>
<p>It supports both parallel packing (using FFD algorithm) and
sequential packing (preserving original sequence order).</p>
<section id="methods" class="level4">
<h4 class="anchored" data-anchor-id="methods">Methods</h4>
<table class="caption-top table">
<thead>
<tr class="header">
<th>Name</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td><a href="#axolotl.utils.samplers.multipack.MultipackBatchSampler.efficiency">efficiency</a></td>
<td>Calculate the packing efficiency (ratio of tokens used to total token slots)</td>
</tr>
<tr class="even">
<td><a href="#axolotl.utils.samplers.multipack.MultipackBatchSampler.gather_efficiency">gather_efficiency</a></td>
<td>Gather and synchronize packing efficiency estimates across all distributed ranks</td>
</tr>
<tr class="odd">
<td><a href="#axolotl.utils.samplers.multipack.MultipackBatchSampler.gather_len_batches">gather_len_batches</a></td>
<td>Gather and synchronize batch counts across all distributed ranks</td>
</tr>
<tr class="even">
<td><a href="#axolotl.utils.samplers.multipack.MultipackBatchSampler.generate_batches">generate_batches</a></td>
<td>Generate packed batches for training</td>
</tr>
<tr class="odd">
<td><a href="#axolotl.utils.samplers.multipack.MultipackBatchSampler.set_epoch">set_epoch</a></td>
<td>Set the epoch number, used for reproducible shuffling across epochs</td>
</tr>
</tbody>
</table>
<section id="axolotl.utils.samplers.multipack.MultipackBatchSampler.efficiency" class="level5">
<h5 class="anchored" data-anchor-id="axolotl.utils.samplers.multipack.MultipackBatchSampler.efficiency">efficiency</h5>
<div class="sourceCode" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>utils.samplers.multipack.MultipackBatchSampler.efficiency()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Calculate the packing efficiency (ratio of tokens used to total token slots)
Higher is better - 1.0 would mean perfect packing with no wasted space</p>
</section>
<section id="axolotl.utils.samplers.multipack.MultipackBatchSampler.gather_efficiency" class="level5">
<h5 class="anchored" data-anchor-id="axolotl.utils.samplers.multipack.MultipackBatchSampler.gather_efficiency">gather_efficiency</h5>
<div class="sourceCode" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>utils.samplers.multipack.MultipackBatchSampler.gather_efficiency()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Gather and synchronize packing efficiency estimates across all distributed ranks
Returns a conservative efficiency estimate based on the measurements</p>
</section>
<section id="axolotl.utils.samplers.multipack.MultipackBatchSampler.gather_len_batches" class="level5">
<h5 class="anchored" data-anchor-id="axolotl.utils.samplers.multipack.MultipackBatchSampler.gather_len_batches">gather_len_batches</h5>
<div class="sourceCode" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>utils.samplers.multipack.MultipackBatchSampler.gather_len_batches(num)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Gather and synchronize batch counts across all distributed ranks
Returns the minimum number of batches available on any rank</p>
</section>
<section id="axolotl.utils.samplers.multipack.MultipackBatchSampler.generate_batches" class="level5">
<h5 class="anchored" data-anchor-id="axolotl.utils.samplers.multipack.MultipackBatchSampler.generate_batches">generate_batches</h5>
<div class="sourceCode" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>utils.samplers.multipack.MultipackBatchSampler.generate_batches(set_stats<span class="op">=</span><span class="va">False</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Generate packed batches for training</p>
<section id="parameters" class="level6 doc-section doc-section-parameters">
<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters">Parameters</h6>
<table class="caption-top table">
<colgroup>
<col style="width: 15%">
<col style="width: 11%">
<col style="width: 57%">
<col style="width: 15%">
</colgroup>
<thead>
<tr class="header">
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>set_stats</td>
<td></td>
<td>Whether to update efficiency statistics</td>
<td><code>False</code></td>
</tr>
</tbody>
</table>
</section>
<section id="returns" class="level6 doc-section doc-section-returns">
<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns">Returns</h6>
<table class="caption-top table">
<colgroup>
<col style="width: 10%">
<col style="width: 10%">
<col style="width: 78%">
</colgroup>
<thead>
<tr class="header">
<th>Name</th>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td></td>
<td></td>
<td>List of batches, where each batch contains multiple bins,</td>
</tr>
<tr class="even">
<td></td>
<td></td>
<td>and each bin contains multiple sequence indices</td>
</tr>
</tbody>
</table>
</section>
</section>
<section id="axolotl.utils.samplers.multipack.MultipackBatchSampler.set_epoch" class="level5">
<h5 class="anchored" data-anchor-id="axolotl.utils.samplers.multipack.MultipackBatchSampler.set_epoch">set_epoch</h5>
<div class="sourceCode" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>utils.samplers.multipack.MultipackBatchSampler.set_epoch(epoch)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Set the epoch number, used for reproducible shuffling across epochs</p>
</section>
</section>
</section>
</section>
<section id="functions" class="level2">
@@ -517,26 +646,368 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<td><a href="#axolotl.utils.samplers.multipack.allocate_sequentially">allocate_sequentially</a></td>
<td>Sequential allocator that preserves example order</td>
</tr>
<tr class="even">
<td><a href="#axolotl.utils.samplers.multipack.ffd_check">ffd_check</a></td>
<td>First-fit-decreasing bin packing algorithm check</td>
</tr>
<tr class="odd">
<td><a href="#axolotl.utils.samplers.multipack.pack_group">pack_group</a></td>
<td>Pack a group of sequences into bins using First-Fit Decreasing algorithm</td>
</tr>
<tr class="even">
<td><a href="#axolotl.utils.samplers.multipack.pack_parallel">pack_parallel</a></td>
<td>Pack sequences into bins using parallel processing</td>
</tr>
</tbody>
</table>
<section id="axolotl.utils.samplers.multipack.allocate_sequentially" class="level3">
<h3 class="anchored" data-anchor-id="axolotl.utils.samplers.multipack.allocate_sequentially">allocate_sequentially</h3>
<div class="sourceCode" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>utils.samplers.multipack.allocate_sequentially(lengths, rank, c, n)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="sourceCode" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>utils.samplers.multipack.allocate_sequentially(</span>
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a> sequence_lengths,</span>
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a> rank,</span>
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a> bin_capacity,</span>
<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a> num_ranks,</span>
<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Sequential allocator that preserves example order</p>
<p>Parameters:
- lengths: The lengths of all examples
- rank: The current rank (for distributed training)
- c: The capacity of each bin (maximum sequence length)
- n: Number of ranks</p>
<p>Returns:
- result: List of batches for the current rank
- total_used: Number of actual example tokens
- total_slots: Maximum theoretical number of example tokens (number of bins * bin capacity)</p>
<section id="parameters-1" class="level4 doc-section doc-section-parameters">
<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-1">Parameters</h4>
<table class="caption-top table">
<colgroup>
<col style="width: 19%">
<col style="width: 12%">
<col style="width: 55%">
<col style="width: 12%">
</colgroup>
<thead>
<tr class="header">
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>sequence_lengths</td>
<td>np.ndarray</td>
<td>The lengths of all examples</td>
<td><em>required</em></td>
</tr>
<tr class="even">
<td>rank</td>
<td>int</td>
<td>The current rank (for distributed training)</td>
<td><em>required</em></td>
</tr>
<tr class="odd">
<td>bin_capacity</td>
<td>int</td>
<td>The capacity of each bin (maximum sequence length)</td>
<td><em>required</em></td>
</tr>
<tr class="even">
<td>num_ranks</td>
<td>int</td>
<td>Number of ranks (processes/GPUs)</td>
<td><em>required</em></td>
</tr>
</tbody>
</table>
</section>
<section id="returns-1" class="level4 doc-section doc-section-returns">
<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-1">Returns</h4>
<table class="caption-top table">
<colgroup>
<col style="width: 18%">
<col style="width: 7%">
<col style="width: 74%">
</colgroup>
<thead>
<tr class="header">
<th>Name</th>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>rank_batches</td>
<td></td>
<td>List of batches for the current rank</td>
</tr>
<tr class="even">
<td>total_tokens_used</td>
<td></td>
<td>Number of actual example tokens</td>
</tr>
<tr class="odd">
<td>total_token_slots</td>
<td></td>
<td>Maximum theoretical number of example tokens (number of bins * bin capacity)</td>
</tr>
</tbody>
</table>
</section>
</section>
<section id="axolotl.utils.samplers.multipack.ffd_check" class="level3">
<h3 class="anchored" data-anchor-id="axolotl.utils.samplers.multipack.ffd_check">ffd_check</h3>
<div class="sourceCode" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>utils.samplers.multipack.ffd_check(sequence_lengths, bin_capacity, num_bins)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>First-fit-decreasing bin packing algorithm check</p>
<p>Checks if sequences with the given lengths could fit in the specified number of bins</p>
<section id="parameters-2" class="level4 doc-section doc-section-parameters">
<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-2">Parameters</h4>
<table class="caption-top table">
<colgroup>
<col style="width: 25%">
<col style="width: 16%">
<col style="width: 41%">
<col style="width: 16%">
</colgroup>
<thead>
<tr class="header">
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>sequence_lengths</td>
<td>np.ndarray</td>
<td>Array of sequence lengths</td>
<td><em>required</em></td>
</tr>
<tr class="even">
<td>bin_capacity</td>
<td>int</td>
<td>Maximum capacity of each bin</td>
<td><em>required</em></td>
</tr>
<tr class="odd">
<td>num_bins</td>
<td>int</td>
<td>Number of bins available</td>
<td><em>required</em></td>
</tr>
</tbody>
</table>
</section>
<section id="returns-2" class="level4 doc-section doc-section-returns">
<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-2">Returns</h4>
<table class="caption-top table">
<colgroup>
<col style="width: 11%">
<col style="width: 11%">
<col style="width: 77%">
</colgroup>
<thead>
<tr class="header">
<th>Name</th>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td></td>
<td></td>
<td>True if all sequences can be packed, False otherwise</td>
</tr>
</tbody>
</table>
</section>
</section>
<section id="axolotl.utils.samplers.multipack.pack_group" class="level3">
<h3 class="anchored" data-anchor-id="axolotl.utils.samplers.multipack.pack_group">pack_group</h3>
<div class="sourceCode" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>utils.samplers.multipack.pack_group(</span>
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a> sequence_lengths,</span>
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a> group_offset,</span>
<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a> bin_capacity,</span>
<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a> max_bins,</span>
<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a> bin_size,</span>
<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a> safe_mode<span class="op">=</span><span class="va">True</span>,</span>
<span id="cb9-8"><a href="#cb9-8" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Pack a group of sequences into bins using First-Fit Decreasing algorithm</p>
<section id="parameters-3" class="level4 doc-section doc-section-parameters">
<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-3">Parameters</h4>
<table class="caption-top table">
<colgroup>
<col style="width: 19%">
<col style="width: 12%">
<col style="width: 54%">
<col style="width: 12%">
</colgroup>
<thead>
<tr class="header">
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>sequence_lengths</td>
<td>np.ndarray</td>
<td>Array of sequence lengths</td>
<td><em>required</em></td>
</tr>
<tr class="even">
<td>group_offset</td>
<td>int</td>
<td>Offset to apply to indices when returning results</td>
<td><em>required</em></td>
</tr>
<tr class="odd">
<td>bin_capacity</td>
<td>int</td>
<td>Maximum capacity of each bin</td>
<td><em>required</em></td>
</tr>
<tr class="even">
<td>max_bins</td>
<td>int</td>
<td>Maximum number of bins to use</td>
<td><em>required</em></td>
</tr>
<tr class="odd">
<td>bin_size</td>
<td>int</td>
<td>Maximum number of sequences per bin</td>
<td><em>required</em></td>
</tr>
<tr class="even">
<td>safe_mode</td>
<td>bool</td>
<td>If True, use a more conservative packing approach</td>
<td><code>True</code></td>
</tr>
</tbody>
</table>
</section>
<section id="returns-3" class="level4 doc-section doc-section-returns">
<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-3">Returns</h4>
<table class="caption-top table">
<colgroup>
<col style="width: 8%">
<col style="width: 8%">
<col style="width: 82%">
</colgroup>
<thead>
<tr class="header">
<th>Name</th>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td></td>
<td></td>
<td>List of bins, where each bin contains indices of sequences assigned to it</td>
</tr>
</tbody>
</table>
</section>
</section>
<section id="axolotl.utils.samplers.multipack.pack_parallel" class="level3">
<h3 class="anchored" data-anchor-id="axolotl.utils.samplers.multipack.pack_parallel">pack_parallel</h3>
<div class="sourceCode" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>utils.samplers.multipack.pack_parallel(</span>
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a> sequence_lengths,</span>
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a> bin_capacity,</span>
<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a> group_size,</span>
<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a> bin_size,</span>
<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a> num_processes<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a> safe_mode<span class="op">=</span><span class="va">True</span>,</span>
<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Pack sequences into bins using parallel processing</p>
<section id="parameters-4" class="level4 doc-section doc-section-parameters">
<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-4">Parameters</h4>
<table class="caption-top table">
<colgroup>
<col style="width: 18%">
<col style="width: 13%">
<col style="width: 56%">
<col style="width: 12%">
</colgroup>
<thead>
<tr class="header">
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>sequence_lengths</td>
<td>np.ndarray</td>
<td>Array of sequence lengths</td>
<td><em>required</em></td>
</tr>
<tr class="even">
<td>bin_capacity</td>
<td>int</td>
<td>Maximum capacity of each bin as total number of tokens</td>
<td><em>required</em></td>
</tr>
<tr class="odd">
<td>group_size</td>
<td>int</td>
<td>Number of sequences to process in each group</td>
<td><em>required</em></td>
</tr>
<tr class="even">
<td>bin_size</td>
<td>int</td>
<td>Maximum number of bins to use</td>
<td><em>required</em></td>
</tr>
<tr class="odd">
<td>num_processes</td>
<td>int | None</td>
<td>Number of parallel processes to use</td>
<td><code>None</code></td>
</tr>
<tr class="even">
<td>safe_mode</td>
<td>bool</td>
<td>If True, use a more conservative packing approach</td>
<td><code>True</code></td>
</tr>
</tbody>
</table>
</section>
<section id="returns-4" class="level4 doc-section doc-section-returns">
<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-4">Returns</h4>
<table class="caption-top table">
<colgroup>
<col style="width: 8%">
<col style="width: 8%">
<col style="width: 82%">
</colgroup>
<thead>
<tr class="header">
<th>Name</th>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td></td>
<td></td>
<td>List of bins, where each bin contains indices of sequences assigned to it</td>
</tr>
</tbody>
</table>
</section>
</section>
</section>
</section>
</main> <!-- /main -->
<script id="quarto-html-after-body" type="application/javascript">