@@ -0,0 +1,844 @@
<!DOCTYPE html>
< html xmlns = "http://www.w3.org/1999/xhtml" lang = "en" xml:lang = "en" > < head >
< meta charset = "utf-8" >
< meta name = "generator" content = "quarto-1.5.57" >
< meta name = "viewport" content = "width=device-width, initial-scale=1.0, user-scalable=yes" >
< meta name = "description" content = "A comprehensive guide for using Axolotl on distributed systems with AMD GPUs" >
< title > Training with AMD GPUs on HPC Systems – Axolotl< / title >
< style >
code { white-space : pre-wrap ; }
span . smallcaps { font-variant : small-caps ; }
div . columns { display : flex ; gap : min ( 4 vw , 1.5 em ) ; }
div . column { flex : auto ; overflow-x : auto ; }
div . hanging-indent { margin-left : 1.5 em ; text-indent : -1.5 em ; }
ul . task-list { list-style : none ; }
ul . task-list li input [ type = "checkbox" ] {
width : 0.8 em ;
margin : 0 0.8 em 0.2 em -1 em ; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
vertical-align : middle ;
}
/* CSS for syntax highlighting */
pre > code . sourceCode { white-space : pre ; position : relative ; }
pre > code . sourceCode > span { line-height : 1.25 ; }
pre > code . sourceCode > span : empty { height : 1.2 em ; }
. sourceCode { overflow : visible ; }
code . sourceCode > span { color : inherit ; text-decoration : inherit ; }
div . sourceCode { margin : 1 em 0 ; }
pre . sourceCode { margin : 0 ; }
@ media screen {
div . sourceCode { overflow : auto ; }
}
@ media print {
pre > code . sourceCode { white-space : pre-wrap ; }
pre > code . sourceCode > span { display : inline-block ; text-indent : -5 em ; padding-left : 5 em ; }
}
pre . numberSource code
{ counter-reset : source-line 0 ; }
pre . numberSource code > span
{ position : relative ; left : -4 em ; counter-increment : source-line ; }
pre . numberSource code > span > a : first-child :: before
{ content : counter ( source - line ) ;
position : relative ; left : -1 em ; text-align : right ; vertical-align : baseline ;
border : none ; display : inline-block ;
-webkit- touch-callout : none ; -webkit- user-select : none ;
-khtml- user-select : none ; -moz- user-select : none ;
-ms- user-select : none ; user-select : none ;
padding : 0 4 px ; width : 4 em ;
}
pre . numberSource { margin-left : 3 em ; padding-left : 4 px ; }
div . sourceCode
{ }
@ media screen {
pre > code . sourceCode > span > a : first-child :: before { text-decoration : underline ; }
}
< / style >
< script src = "../site_libs/quarto-nav/quarto-nav.js" > < / script >
< script src = "../site_libs/clipboard/clipboard.min.js" > < / script >
< script src = "../site_libs/quarto-search/autocomplete.umd.js" > < / script >
< script src = "../site_libs/quarto-search/fuse.min.js" > < / script >
< script src = "../site_libs/quarto-search/quarto-search.js" > < / script >
< meta name = "quarto:offset" content = "../" >
< link href = "../favicon.jpg" rel = "icon" type = "image/jpeg" >
< script src = "../site_libs/quarto-html/quarto.js" > < / script >
< script src = "../site_libs/quarto-html/popper.min.js" > < / script >
< script src = "../site_libs/quarto-html/tippy.umd.min.js" > < / script >
< script src = "../site_libs/quarto-html/anchor.min.js" > < / script >
< link href = "../site_libs/quarto-html/tippy.css" rel = "stylesheet" >
< link href = "../site_libs/quarto-html/quarto-syntax-highlighting.css" rel = "stylesheet" id = "quarto-text-highlighting-styles" >
< script src = "../site_libs/bootstrap/bootstrap.min.js" > < / script >
< link href = "../site_libs/bootstrap/bootstrap-icons.css" rel = "stylesheet" >
< link href = "../site_libs/bootstrap/bootstrap.min.css" rel = "stylesheet" id = "quarto-bootstrap" data-mode = "light" >
< script id = "quarto-search-options" type = "application/json" > {
"location" : "navbar" ,
"copy-button" : false ,
"collapse-after" : 3 ,
"panel-placement" : "end" ,
"type" : "overlay" ,
"limit" : 50 ,
"keyboard-shortcut" : [
"f" ,
"/" ,
"s"
] ,
"show-item-context" : false ,
"language" : {
"search-no-results-text" : "No results" ,
"search-matching-documents-text" : "matching documents" ,
"search-copy-link-title" : "Copy link to search" ,
"search-hide-matches-text" : "Hide additional matches" ,
"search-more-match-text" : "more match in this document" ,
"search-more-matches-text" : "more matches in this document" ,
"search-clear-button-title" : "Clear" ,
"search-text-placeholder" : "" ,
"search-detached-cancel-button-title" : "Cancel" ,
"search-submit-button-title" : "Submit" ,
"search-label" : "Search"
}
} < / script >
< link rel = "stylesheet" href = "../styles.css" >
< / head >
< body class = "nav-sidebar docked nav-fixed" >
< div id = "quarto-search-results" > < / div >
< header id = "quarto-header" class = "headroom fixed-top" >
< nav class = "navbar navbar-expand " data-bs-theme = "dark" >
< div class = "navbar-container container-fluid" >
< div class = "navbar-brand-container mx-auto" >
< a class = "navbar-brand" href = "../index.html" >
< span class = "navbar-title" > Axolotl< / span >
< / a >
< / div >
< div class = "quarto-navbar-tools tools-wide tools-end" >
< a href = "https://twitter.com/axolotl_ai" title = "" class = "quarto-navigation-tool px-1" aria-label = "" > < i class = "bi bi-twitter" > < / i > < / a >
< a href = "https://github.com/axolotl-ai-cloud/axolotl/" title = "" class = "quarto-navigation-tool px-1" aria-label = "" > < i class = "bi bi-github" > < / i > < / a >
< a href = "https://discord.gg/7m9sfhzaf3" title = "" class = "quarto-navigation-tool px-1" aria-label = "" > < i class = "bi bi-discord" > < / i > < / a >
< / div >
< div id = "quarto-search" class = "" title = "Search" > < / div >
< / div > <!-- /container - fluid -->
< / nav >
< nav class = "quarto-secondary-nav" >
< div class = "container-fluid d-flex" >
< button type = "button" class = "quarto-btn-toggle btn" data-bs-toggle = "collapse" role = "button" data-bs-target = ".quarto-sidebar-collapse-item" aria-controls = "quarto-sidebar" aria-expanded = "false" aria-label = "Toggle sidebar navigation" onclick = "if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }" >
< i class = "bi bi-layout-text-sidebar-reverse" > < / i >
< / button >
< nav class = "quarto-page-breadcrumbs" aria-label = "breadcrumb" > < ol class = "breadcrumb" > < li class = "breadcrumb-item" > < a href = "../docs/debugging.html" > How-To Guides< / a > < / li > < li class = "breadcrumb-item" > < a href = "../docs/amd_hpc.html" > Training with AMD GPUs on HPC Systems< / a > < / li > < / ol > < / nav >
< a class = "flex-grow-1" role = "navigation" data-bs-toggle = "collapse" data-bs-target = ".quarto-sidebar-collapse-item" aria-controls = "quarto-sidebar" aria-expanded = "false" aria-label = "Toggle sidebar navigation" onclick = "if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }" >
< / a >
< / div >
< / nav >
< / header >
<!-- content -->
< div id = "quarto-content" class = "quarto-container page-columns page-rows-contents page-layout-article page-navbar" >
<!-- sidebar -->
< nav id = "quarto-sidebar" class = "sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto" >
< div class = "sidebar-menu-container" >
< ul class = "list-unstyled mt-1" >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../index.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > Home< / span > < / a >
< / div >
< / li >
< li class = "sidebar-item sidebar-item-section" >
< div class = "sidebar-item-container" >
< a class = "sidebar-item-text sidebar-link text-start" data-bs-toggle = "collapse" data-bs-target = "#quarto-sidebar-section-1" role = "navigation" aria-expanded = "true" >
< span class = "menu-text" > How-To Guides< / span > < / a >
< a class = "sidebar-item-toggle text-start" data-bs-toggle = "collapse" data-bs-target = "#quarto-sidebar-section-1" role = "navigation" aria-expanded = "true" aria-label = "Toggle section" >
< i class = "bi bi-chevron-right ms-2" > < / i >
< / a >
< / div >
< ul id = "quarto-sidebar-section-1" class = "collapse list-unstyled sidebar-section depth1 show" >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/debugging.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > Debugging< / span > < / a >
< / div >
< / li >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/multipack.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > Multipack (Sample Packing)< / span > < / a >
< / div >
< / li >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/fsdp_qlora.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > FDSP + QLoRA< / span > < / a >
< / div >
< / li >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/input_output.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > Template-free prompt construction< / span > < / a >
< / div >
< / li >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/rlhf.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > RLHF (Beta)< / span > < / a >
< / div >
< / li >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/nccl.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > NCCL< / span > < / a >
< / div >
< / li >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/mac.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > Mac M-series< / span > < / a >
< / div >
< / li >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/multi-node.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > Multi Node< / span > < / a >
< / div >
< / li >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/unsloth.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > Unsloth< / span > < / a >
< / div >
< / li >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/amd_hpc.html" class = "sidebar-item-text sidebar-link active" >
< span class = "menu-text" > Training with AMD GPUs on HPC Systems< / span > < / a >
< / div >
< / li >
< / ul >
< / li >
< li class = "sidebar-item sidebar-item-section" >
< div class = "sidebar-item-container" >
< a href = "../docs/dataset-formats/index.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > Dataset Formats< / span > < / a >
< a class = "sidebar-item-toggle text-start" data-bs-toggle = "collapse" data-bs-target = "#quarto-sidebar-section-2" role = "navigation" aria-expanded = "true" aria-label = "Toggle section" >
< i class = "bi bi-chevron-right ms-2" > < / i >
< / a >
< / div >
< ul id = "quarto-sidebar-section-2" class = "collapse list-unstyled sidebar-section depth1 show" >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/dataset-formats/pretraining.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > Pre-training< / span > < / a >
< / div >
< / li >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/dataset-formats/inst_tune.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > Instruction Tuning< / span > < / a >
< / div >
< / li >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/dataset-formats/conversation.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > Conversation< / span > < / a >
< / div >
< / li >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/dataset-formats/template_free.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > Template-Free< / span > < / a >
< / div >
< / li >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/dataset-formats/tokenized.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > Custom Pre-Tokenized Dataset< / span > < / a >
< / div >
< / li >
< / ul >
< / li >
< li class = "sidebar-item sidebar-item-section" >
< div class = "sidebar-item-container" >
< a class = "sidebar-item-text sidebar-link text-start" data-bs-toggle = "collapse" data-bs-target = "#quarto-sidebar-section-3" role = "navigation" aria-expanded = "true" >
< span class = "menu-text" > Reference< / span > < / a >
< a class = "sidebar-item-toggle text-start" data-bs-toggle = "collapse" data-bs-target = "#quarto-sidebar-section-3" role = "navigation" aria-expanded = "true" aria-label = "Toggle section" >
< i class = "bi bi-chevron-right ms-2" > < / i >
< / a >
< / div >
< ul id = "quarto-sidebar-section-3" class = "collapse list-unstyled sidebar-section depth1 show" >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/config.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > Config options< / span > < / a >
< / div >
< / li >
< / ul >
< / li >
< li class = "sidebar-item" >
< div class = "sidebar-item-container" >
< a href = "../docs/faq.html" class = "sidebar-item-text sidebar-link" >
< span class = "menu-text" > FAQ< / span > < / a >
< / div >
< / li >
< / ul >
< / div >
< / nav >
< div id = "quarto-sidebar-glass" class = "quarto-sidebar-collapse-item" data-bs-toggle = "collapse" data-bs-target = ".quarto-sidebar-collapse-item" > < / div >
<!-- margin - sidebar -->
< div id = "quarto-margin-sidebar" class = "sidebar margin-sidebar" >
< nav id = "TOC" role = "doc-toc" class = "toc-active" >
< h2 id = "toc-title" > On this page< / h2 >
< ul >
< li > < a href = "#setup" id = "toc-setup" class = "nav-link active" data-scroll-target = "#setup" > Setup< / a >
< ul class = "collapse" >
< li > < a href = "#install-python" id = "toc-install-python" class = "nav-link" data-scroll-target = "#install-python" > 1. Install Python< / a > < / li >
< li > < a href = "#configure-python-environment" id = "toc-configure-python-environment" class = "nav-link" data-scroll-target = "#configure-python-environment" > 2. Configure Python Environment< / a > < / li >
< li > < a href = "#load-amd-gpu-software" id = "toc-load-amd-gpu-software" class = "nav-link" data-scroll-target = "#load-amd-gpu-software" > 3. Load AMD GPU Software< / a > < / li >
< li > < a href = "#install-pytorch" id = "toc-install-pytorch" class = "nav-link" data-scroll-target = "#install-pytorch" > 4. Install PyTorch< / a > < / li >
< li > < a href = "#install-flash-attention" id = "toc-install-flash-attention" class = "nav-link" data-scroll-target = "#install-flash-attention" > 5. Install Flash Attention< / a > < / li >
< li > < a href = "#install-axolotl" id = "toc-install-axolotl" class = "nav-link" data-scroll-target = "#install-axolotl" > 6. Install Axolotl< / a > < / li >
< li > < a href = "#apply-xformers-workaround" id = "toc-apply-xformers-workaround" class = "nav-link" data-scroll-target = "#apply-xformers-workaround" > 7. Apply xformers Workaround< / a > < / li >
< li > < a href = "#prepare-job-submission-script" id = "toc-prepare-job-submission-script" class = "nav-link" data-scroll-target = "#prepare-job-submission-script" > 8. Prepare Job Submission Script< / a > < / li >
< li > < a href = "#download-base-model" id = "toc-download-base-model" class = "nav-link" data-scroll-target = "#download-base-model" > 9. Download Base Model< / a > < / li >
< li > < a href = "#create-axolotl-configuration" id = "toc-create-axolotl-configuration" class = "nav-link" data-scroll-target = "#create-axolotl-configuration" > 10. Create Axolotl Configuration< / a > < / li >
< li > < a href = "#preprocess-data" id = "toc-preprocess-data" class = "nav-link" data-scroll-target = "#preprocess-data" > 11. Preprocess Data< / a > < / li >
< li > < a href = "#train" id = "toc-train" class = "nav-link" data-scroll-target = "#train" > 12. Train< / a > < / li >
< / ul > < / li >
< / ul >
< / nav >
< / div >
<!-- main -->
< main class = "content" id = "quarto-document-content" >
< header id = "title-block-header" class = "quarto-title-block default" > < nav class = "quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label = "breadcrumb" > < ol class = "breadcrumb" > < li class = "breadcrumb-item" > < a href = "../docs/debugging.html" > How-To Guides< / a > < / li > < li class = "breadcrumb-item" > < a href = "../docs/amd_hpc.html" > Training with AMD GPUs on HPC Systems< / a > < / li > < / ol > < / nav >
< div class = "quarto-title" >
< h1 class = "title" > Training with AMD GPUs on HPC Systems< / h1 >
< / div >
< div >
< div class = "description" >
A comprehensive guide for using Axolotl on distributed systems with AMD GPUs
< / div >
< / div >
< div class = "quarto-title-meta" >
< / div >
< / header >
< p > This guide provides step-by-step instructions for installing and configuring Axolotl on a High-Performance Computing (HPC) environment equipped with AMD GPUs.< / p >
< section id = "setup" class = "level2" >
< h2 class = "anchored" data-anchor-id = "setup" > Setup< / h2 >
< section id = "install-python" class = "level3" >
< h3 class = "anchored" data-anchor-id = "install-python" > 1. Install Python< / h3 >
< p > We recommend using Miniforge, a minimal conda-based Python distribution:< / p >
< div class = "sourceCode" id = "cb1" > < pre class = "sourceCode bash code-with-copy" > < code class = "sourceCode bash" > < span id = "cb1-1" > < a href = "#cb1-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "ex" > curl< / span > < span class = "at" > -L< / span > < span class = "at" > -O< / span > < span class = "st" > "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-< / span > < span class = "va" > $(< / span > < span class = "fu" > uname< / span > < span class = "va" > )< / span > < span class = "st" > -< / span > < span class = "va" > $(< / span > < span class = "fu" > uname< / span > < span class = "at" > -m< / span > < span class = "va" > )< / span > < span class = "st" > .sh"< / span > < / span >
< span id = "cb1-2" > < a href = "#cb1-2" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > bash< / span > Miniforge3-< span class = "va" > $(< / span > < span class = "fu" > uname< / span > < span class = "va" > )< / span > -< span class = "va" > $(< / span > < span class = "fu" > uname< / span > < span class = "at" > -m< / span > < span class = "va" > )< / span > .sh< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button" > < i class = "bi" > < / i > < / button > < / pre > < / div >
< / section >
< section id = "configure-python-environment" class = "level3" >
< h3 class = "anchored" data-anchor-id = "configure-python-environment" > 2. Configure Python Environment< / h3 >
< p > Add Python to your PATH and ensure it’ s available at login:< / p >
< div class = "sourceCode" id = "cb2" > < pre class = "sourceCode bash code-with-copy" > < code class = "sourceCode bash" > < span id = "cb2-1" > < a href = "#cb2-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "bu" > echo< / span > < span class = "st" > 'export PATH=~/miniforge3/bin:$PATH'< / span > < span class = "op" > > > < / span > ~/.bashrc< / span >
< span id = "cb2-2" > < a href = "#cb2-2" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "bu" > echo< / span > < span class = "st" > 'if [ -f ~/.bashrc ]; then . ~/.bashrc; fi'< / span > < span class = "op" > > > < / span > ~/.bash_profile< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button" > < i class = "bi" > < / i > < / button > < / pre > < / div >
< / section >
< section id = "load-amd-gpu-software" class = "level3" >
< h3 class = "anchored" data-anchor-id = "load-amd-gpu-software" > 3. Load AMD GPU Software< / h3 >
< p > Load the ROCm module:< / p >
< div class = "sourceCode" id = "cb3" > < pre class = "sourceCode bash code-with-copy" > < code class = "sourceCode bash" > < span id = "cb3-1" > < a href = "#cb3-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "ex" > module< / span > load rocm/5.7.1< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button" > < i class = "bi" > < / i > < / button > < / pre > < / div >
< p > Note: The specific module name and version may vary depending on your HPC system. Consult your system documentation for the correct module name.< / p >
< / section >
< section id = "install-pytorch" class = "level3" >
< h3 class = "anchored" data-anchor-id = "install-pytorch" > 4. Install PyTorch< / h3 >
< p > Install PyTorch with ROCm support:< / p >
< div class = "sourceCode" id = "cb4" > < pre class = "sourceCode bash code-with-copy" > < code class = "sourceCode bash" > < span id = "cb4-1" > < a href = "#cb4-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "ex" > pip< / span > install < span class = "at" > -U< / span > torch torchvision torchaudio < span class = "at" > --index-url< / span > https://download.pytorch.org/whl/rocm5.7 < span class = "at" > --force-reinstall< / span > < / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button" > < i class = "bi" > < / i > < / button > < / pre > < / div >
< / section >
< section id = "install-flash-attention" class = "level3" >
< h3 class = "anchored" data-anchor-id = "install-flash-attention" > 5. Install Flash Attention< / h3 >
< p > Clone and install the Flash Attention repository:< / p >
< div class = "sourceCode" id = "cb5" > < pre class = "sourceCode bash code-with-copy" > < code class = "sourceCode bash" > < span id = "cb5-1" > < a href = "#cb5-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > git< / span > clone < span class = "at" > --recursive< / span > https://github.com/ROCmSoftwarePlatform/flash-attention.git< / span >
< span id = "cb5-2" > < a href = "#cb5-2" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "bu" > export< / span > < span class = "va" > GPU_ARCHS< / span > < span class = "op" > =< / span > < span class = "st" > "gfx90a"< / span > < / span >
< span id = "cb5-3" > < a href = "#cb5-3" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "bu" > cd< / span > flash-attention< / span >
< span id = "cb5-4" > < a href = "#cb5-4" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "bu" > export< / span > < span class = "va" > PYTHON_SITE_PACKAGES< / span > < span class = "op" > =< / span > < span class = "va" > $(< / span > < span class = "ex" > python< / span > < span class = "at" > -c< / span > < span class = "st" > 'import site; print(site.getsitepackages()[0])'< / span > < span class = "va" > )< / span > < / span >
< span id = "cb5-5" > < a href = "#cb5-5" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > patch< / span > < span class = "st" > "< / span > < span class = "va" > ${PYTHON_SITE_PACKAGES}< / span > < span class = "st" > /torch/utils/hipify/hipify_python.py"< / span > hipify_patch.patch< / span >
< span id = "cb5-6" > < a href = "#cb5-6" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "ex" > pip< / span > install .< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button" > < i class = "bi" > < / i > < / button > < / pre > < / div >
< / section >
< section id = "install-axolotl" class = "level3" >
< h3 class = "anchored" data-anchor-id = "install-axolotl" > 6. Install Axolotl< / h3 >
< p > Clone and install Axolotl:< / p >
< div class = "sourceCode" id = "cb6" > < pre class = "sourceCode bash code-with-copy" > < code class = "sourceCode bash" > < span id = "cb6-1" > < a href = "#cb6-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > git< / span > clone https://github.com/axolotl-ai-cloud/axolotl< / span >
< span id = "cb6-2" > < a href = "#cb6-2" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "bu" > cd< / span > axolotl< / span >
< span id = "cb6-3" > < a href = "#cb6-3" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "ex" > pip< / span > install packaging ninja< / span >
< span id = "cb6-4" > < a href = "#cb6-4" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "ex" > pip< / span > install < span class = "at" > -e< / span > .< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button" > < i class = "bi" > < / i > < / button > < / pre > < / div >
< / section >
< section id = "apply-xformers-workaround" class = "level3" >
< h3 class = "anchored" data-anchor-id = "apply-xformers-workaround" > 7. Apply xformers Workaround< / h3 >
< p > xformers appears to be incompatible with ROCm. Apply the following workarounds: - Edit $HOME/packages/axolotl/src/axolotl/monkeypatch/llama_attn_hijack_flash.py modifying the code to always return < code > False< / code > for SwiGLU availability from xformers. - Edit $HOME/miniforge3/lib/python3.10/site-packages/xformers/ops/swiglu_op.py replacing the “SwiGLU” function with a pass statement.< / p >
< / section >
< section id = "prepare-job-submission-script" class = "level3" >
< h3 class = "anchored" data-anchor-id = "prepare-job-submission-script" > 8. Prepare Job Submission Script< / h3 >
< p > Create a script for job submission using your HPC’ s particular software (e.g. Slurm, PBS). Include necessary environment setup and the command to run Axolotl training. If the compute node(s) do(es) not have internet access, it is recommended to include< / p >
< div class = "sourceCode" id = "cb7" > < pre class = "sourceCode bash code-with-copy" > < code class = "sourceCode bash" > < span id = "cb7-1" > < a href = "#cb7-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "bu" > export< / span > < span class = "va" > TRANSFORMERS_OFFLINE< / span > < span class = "op" > =< / span > 1< / span >
< span id = "cb7-2" > < a href = "#cb7-2" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "bu" > export< / span > < span class = "va" > HF_DATASETS_OFFLINE< / span > < span class = "op" > =< / span > 1< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button" > < i class = "bi" > < / i > < / button > < / pre > < / div >
< / section >
< section id = "download-base-model" class = "level3" >
< h3 class = "anchored" data-anchor-id = "download-base-model" > 9. Download Base Model< / h3 >
< p > Download a base model using the Hugging Face CLI:< / p >
< div class = "sourceCode" id = "cb8" > < pre class = "sourceCode bash code-with-copy" > < code class = "sourceCode bash" > < span id = "cb8-1" > < a href = "#cb8-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "ex" > huggingface-cli< / span > download meta-llama/Meta-Llama-3.1-8B < span class = "at" > --local-dir< / span > ~/hfdata/llama3.1-8B< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button" > < i class = "bi" > < / i > < / button > < / pre > < / div >
< / section >
< section id = "create-axolotl-configuration" class = "level3" >
< h3 class = "anchored" data-anchor-id = "create-axolotl-configuration" > 10. Create Axolotl Configuration< / h3 >
< p > Create an Axolotl configuration file (YAML format) tailored to your specific training requirements and dataset. Use FSDP for multi-node training.< / p >
< p > Note: Deepspeed did not work at the time of testing. However, if anyone managed to get it working, please let us know.< / p >
< / section >
< section id = "preprocess-data" class = "level3" >
< h3 class = "anchored" data-anchor-id = "preprocess-data" > 11. Preprocess Data< / h3 >
< p > Run preprocessing on the login node:< / p >
< div class = "sourceCode" id = "cb9" > < pre class = "sourceCode bash code-with-copy" > < code class = "sourceCode bash" > < span id = "cb9-1" > < a href = "#cb9-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "va" > CUDA_VISIBLE_DEVICES< / span > < span class = "op" > =< / span > < span class = "st" > ""< / span > < span class = "ex" > python< / span > < span class = "at" > -m< / span > axolotl.cli.preprocess /path/to/your/config.yaml< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button" > < i class = "bi" > < / i > < / button > < / pre > < / div >
< / section >
< section id = "train" class = "level3" >
< h3 class = "anchored" data-anchor-id = "train" > 12. Train< / h3 >
< p > You are now ready to submit your previously prepared job script. 🚂< / p >
< / section >
< / section >
< / main > <!-- /main -->
< script id = "quarto-html-after-body" type = "application/javascript" >
window . document . addEventListener ( "DOMContentLoaded" , function ( event ) {
const toggleBodyColorMode = ( bsSheetEl ) => {
const mode = bsSheetEl . getAttribute ( "data-mode" ) ;
const bodyEl = window . document . querySelector ( "body" ) ;
if ( mode === "dark" ) {
bodyEl . classList . add ( "quarto-dark" ) ;
bodyEl . classList . remove ( "quarto-light" ) ;
} else {
bodyEl . classList . add ( "quarto-light" ) ;
bodyEl . classList . remove ( "quarto-dark" ) ;
}
}
const toggleBodyColorPrimary = ( ) => {
const bsSheetEl = window . document . querySelector ( "link#quarto-bootstrap" ) ;
if ( bsSheetEl ) {
toggleBodyColorMode ( bsSheetEl ) ;
}
}
toggleBodyColorPrimary ( ) ;
const icon = "" ;
const anchorJS = new window . AnchorJS ( ) ;
anchorJS . options = {
placement : 'right' ,
icon : icon
} ;
anchorJS . add ( '.anchored' ) ;
const isCodeAnnotation = ( el ) => {
for ( const clz of el . classList ) {
if ( clz . startsWith ( 'code-annotation-' ) ) {
return true ;
}
}
return false ;
}
const onCopySuccess = function ( e ) {
// button target
const button = e . trigger ;
// don't keep focus
button . blur ( ) ;
// flash "checked"
button . classList . add ( 'code-copy-button-checked' ) ;
var currentTitle = button . getAttribute ( "title" ) ;
button . setAttribute ( "title" , "Copied!" ) ;
let tooltip ;
if ( window . bootstrap ) {
button . setAttribute ( "data-bs-toggle" , "tooltip" ) ;
button . setAttribute ( "data-bs-placement" , "left" ) ;
button . setAttribute ( "data-bs-title" , "Copied!" ) ;
tooltip = new bootstrap . Tooltip ( button ,
{ trigger : "manual" ,
customClass : "code-copy-button-tooltip" ,
offset : [ 0 , - 8 ] } ) ;
tooltip . show ( ) ;
}
setTimeout ( function ( ) {
if ( tooltip ) {
tooltip . hide ( ) ;
button . removeAttribute ( "data-bs-title" ) ;
button . removeAttribute ( "data-bs-toggle" ) ;
button . removeAttribute ( "data-bs-placement" ) ;
}
button . setAttribute ( "title" , currentTitle ) ;
button . classList . remove ( 'code-copy-button-checked' ) ;
} , 1000 ) ;
// clear code selection
e . clearSelection ( ) ;
}
const getTextToCopy = function ( trigger ) {
const codeEl = trigger . previousElementSibling . cloneNode ( true ) ;
for ( const childEl of codeEl . children ) {
if ( isCodeAnnotation ( childEl ) ) {
childEl . remove ( ) ;
}
}
return codeEl . innerText ;
}
const clipboard = new window . ClipboardJS ( '.code-copy-button:not([data-in-quarto-modal])' , {
text : getTextToCopy
} ) ;
clipboard . on ( 'success' , onCopySuccess ) ;
if ( window . document . getElementById ( 'quarto-embedded-source-code-modal' ) ) {
// For code content inside modals, clipBoardJS needs to be initialized with a container option
// TODO: Check when it could be a function (https://github.com/zenorocha/clipboard.js/issues/860)
const clipboardModal = new window . ClipboardJS ( '.code-copy-button[data-in-quarto-modal]' , {
text : getTextToCopy ,
container : window . document . getElementById ( 'quarto-embedded-source-code-modal' )
} ) ;
clipboardModal . on ( 'success' , onCopySuccess ) ;
}
var localhostRegex = new RegExp ( /^(?:http|https):\/\/localhost\:?[0-9]*\// ) ;
var mailtoRegex = new RegExp ( /^mailto:/ ) ;
var filterRegex = new RegExp ( "https:\/\/axolotl-ai-cloud\.github\.io\/axolotl\/" ) ;
var isInternal = ( href ) => {
return filterRegex . test ( href ) || localhostRegex . test ( href ) || mailtoRegex . test ( href ) ;
}
// Inspect non-navigation links and adorn them if external
var links = window . document . querySelectorAll ( 'a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)' ) ;
for ( var i = 0 ; i < links . length ; i ++ ) {
const link = links [ i ] ;
if ( ! isInternal ( link . href ) ) {
// undo the damage that might have been done by quarto-nav.js in the case of
// links that we want to consider external
if ( link . dataset . originalHref !== undefined ) {
link . href = link . dataset . originalHref ;
}
}
}
function tippyHover ( el , contentFn , onTriggerFn , onUntriggerFn ) {
const config = {
allowHTML : true ,
maxWidth : 500 ,
delay : 100 ,
arrow : false ,
appendTo : function ( el ) {
return el . parentElement ;
} ,
interactive : true ,
interactiveBorder : 10 ,
theme : 'quarto' ,
placement : 'bottom-start' ,
} ;
if ( contentFn ) {
config . content = contentFn ;
}
if ( onTriggerFn ) {
config . onTrigger = onTriggerFn ;
}
if ( onUntriggerFn ) {
config . onUntrigger = onUntriggerFn ;
}
window . tippy ( el , config ) ;
}
const noterefs = window . document . querySelectorAll ( 'a[role="doc-noteref"]' ) ;
for ( var i = 0 ; i < noterefs . length ; i ++ ) {
const ref = noterefs [ i ] ;
tippyHover ( ref , function ( ) {
// use id or data attribute instead here
let href = ref . getAttribute ( 'data-footnote-href' ) || ref . getAttribute ( 'href' ) ;
try { href = new URL ( href ) . hash ; } catch { }
const id = href . replace ( /^#\/?/ , "" ) ;
const note = window . document . getElementById ( id ) ;
if ( note ) {
return note . innerHTML ;
} else {
return "" ;
}
} ) ;
}
const xrefs = window . document . querySelectorAll ( 'a.quarto-xref' ) ;
const processXRef = ( id , note ) => {
// Strip column container classes
const stripColumnClz = ( el ) => {
el . classList . remove ( "page-full" , "page-columns" ) ;
if ( el . children ) {
for ( const child of el . children ) {
stripColumnClz ( child ) ;
}
}
}
stripColumnClz ( note )
if ( id === null || id . startsWith ( 'sec-' ) ) {
// Special case sections, only their first couple elements
const container = document . createElement ( "div" ) ;
if ( note . children && note . children . length > 2 ) {
container . appendChild ( note . children [ 0 ] . cloneNode ( true ) ) ;
for ( let i = 1 ; i < note . children . length ; i ++ ) {
const child = note . children [ i ] ;
if ( child . tagName === "P" && child . innerText === "" ) {
continue ;
} else {
container . appendChild ( child . cloneNode ( true ) ) ;
break ;
}
}
if ( window . Quarto ? . typesetMath ) {
window . Quarto . typesetMath ( container ) ;
}
return container . innerHTML
} else {
if ( window . Quarto ? . typesetMath ) {
window . Quarto . typesetMath ( note ) ;
}
return note . innerHTML ;
}
} else {
// Remove any anchor links if they are present
const anchorLink = note . querySelector ( 'a.anchorjs-link' ) ;
if ( anchorLink ) {
anchorLink . remove ( ) ;
}
if ( window . Quarto ? . typesetMath ) {
window . Quarto . typesetMath ( note ) ;
}
// TODO in 1.5, we should make sure this works without a callout special case
if ( note . classList . contains ( "callout" ) ) {
return note . outerHTML ;
} else {
return note . innerHTML ;
}
}
}
for ( var i = 0 ; i < xrefs . length ; i ++ ) {
const xref = xrefs [ i ] ;
tippyHover ( xref , undefined , function ( instance ) {
instance . disable ( ) ;
let url = xref . getAttribute ( 'href' ) ;
let hash = undefined ;
if ( url . startsWith ( '#' ) ) {
hash = url ;
} else {
try { hash = new URL ( url ) . hash ; } catch { }
}
if ( hash ) {
const id = hash . replace ( /^#\/?/ , "" ) ;
const note = window . document . getElementById ( id ) ;
if ( note !== null ) {
try {
const html = processXRef ( id , note . cloneNode ( true ) ) ;
instance . setContent ( html ) ;
} finally {
instance . enable ( ) ;
instance . show ( ) ;
}
} else {
// See if we can fetch this
fetch ( url . split ( '#' ) [ 0 ] )
. then ( res => res . text ( ) )
. then ( html => {
const parser = new DOMParser ( ) ;
const htmlDoc = parser . parseFromString ( html , "text/html" ) ;
const note = htmlDoc . getElementById ( id ) ;
if ( note !== null ) {
const html = processXRef ( id , note ) ;
instance . setContent ( html ) ;
}
} ) . finally ( ( ) => {
instance . enable ( ) ;
instance . show ( ) ;
} ) ;
}
} else {
// See if we can fetch a full url (with no hash to target)
// This is a special case and we should probably do some content thinning / targeting
fetch ( url )
. then ( res => res . text ( ) )
. then ( html => {
const parser = new DOMParser ( ) ;
const htmlDoc = parser . parseFromString ( html , "text/html" ) ;
const note = htmlDoc . querySelector ( 'main.content' ) ;
if ( note !== null ) {
// This should only happen for chapter cross references
// (since there is no id in the URL)
// remove the first header
if ( note . children . length > 0 && note . children [ 0 ] . tagName === "HEADER" ) {
note . children [ 0 ] . remove ( ) ;
}
const html = processXRef ( null , note ) ;
instance . setContent ( html ) ;
}
} ) . finally ( ( ) => {
instance . enable ( ) ;
instance . show ( ) ;
} ) ;
}
} , function ( instance ) {
} ) ;
}
let selectedAnnoteEl ;
const selectorForAnnotation = ( cell , annotation ) => {
let cellAttr = 'data-code-cell="' + cell + '"' ;
let lineAttr = 'data-code-annotation="' + annotation + '"' ;
const selector = 'span[' + cellAttr + '][' + lineAttr + ']' ;
return selector ;
}
const selectCodeLines = ( annoteEl ) => {
const doc = window . document ;
const targetCell = annoteEl . getAttribute ( "data-target-cell" ) ;
const targetAnnotation = annoteEl . getAttribute ( "data-target-annotation" ) ;
const annoteSpan = window . document . querySelector ( selectorForAnnotation ( targetCell , targetAnnotation ) ) ;
const lines = annoteSpan . getAttribute ( "data-code-lines" ) . split ( "," ) ;
const lineIds = lines . map ( ( line ) => {
return targetCell + "-" + line ;
} )
let top = null ;
let height = null ;
let parent = null ;
if ( lineIds . length > 0 ) {
//compute the position of the single el (top and bottom and make a div)
const el = window . document . getElementById ( lineIds [ 0 ] ) ;
top = el . offsetTop ;
height = el . offsetHeight ;
parent = el . parentElement . parentElement ;
if ( lineIds . length > 1 ) {
const lastEl = window . document . getElementById ( lineIds [ lineIds . length - 1 ] ) ;
const bottom = lastEl . offsetTop + lastEl . offsetHeight ;
height = bottom - top ;
}
if ( top !== null && height !== null && parent !== null ) {
// cook up a div (if necessary) and position it
let div = window . document . getElementById ( "code-annotation-line-highlight" ) ;
if ( div === null ) {
div = window . document . createElement ( "div" ) ;
div . setAttribute ( "id" , "code-annotation-line-highlight" ) ;
div . style . position = 'absolute' ;
parent . appendChild ( div ) ;
}
div . style . top = top - 2 + "px" ;
div . style . height = height + 4 + "px" ;
div . style . left = 0 ;
let gutterDiv = window . document . getElementById ( "code-annotation-line-highlight-gutter" ) ;
if ( gutterDiv === null ) {
gutterDiv = window . document . createElement ( "div" ) ;
gutterDiv . setAttribute ( "id" , "code-annotation-line-highlight-gutter" ) ;
gutterDiv . style . position = 'absolute' ;
const codeCell = window . document . getElementById ( targetCell ) ;
const gutter = codeCell . querySelector ( '.code-annotation-gutter' ) ;
gutter . appendChild ( gutterDiv ) ;
}
gutterDiv . style . top = top - 2 + "px" ;
gutterDiv . style . height = height + 4 + "px" ;
}
selectedAnnoteEl = annoteEl ;
}
} ;
const unselectCodeLines = ( ) => {
const elementsIds = [ "code-annotation-line-highlight" , "code-annotation-line-highlight-gutter" ] ;
elementsIds . forEach ( ( elId ) => {
const div = window . document . getElementById ( elId ) ;
if ( div ) {
div . remove ( ) ;
}
} ) ;
selectedAnnoteEl = undefined ;
} ;
// Handle positioning of the toggle
window . addEventListener (
"resize" ,
throttle ( ( ) => {
elRect = undefined ;
if ( selectedAnnoteEl ) {
selectCodeLines ( selectedAnnoteEl ) ;
}
} , 10 )
) ;
function throttle ( fn , ms ) {
let throttle = false ;
let timer ;
return ( ... args ) => {
if ( ! throttle ) { // first call gets through
fn . apply ( this , args ) ;
throttle = true ;
} else { // all the others get throttled
if ( timer ) clearTimeout ( timer ) ; // cancel #2
timer = setTimeout ( ( ) => {
fn . apply ( this , args ) ;
timer = throttle = false ;
} , ms ) ;
}
} ;
}
// Attach click handler to the DT
const annoteDls = window . document . querySelectorAll ( 'dt[data-target-cell]' ) ;
for ( const annoteDlNode of annoteDls ) {
annoteDlNode . addEventListener ( 'click' , ( event ) => {
const clickedEl = event . target ;
if ( clickedEl !== selectedAnnoteEl ) {
unselectCodeLines ( ) ;
const activeEl = window . document . querySelector ( 'dt[data-target-cell].code-annotation-active' ) ;
if ( activeEl ) {
activeEl . classList . remove ( 'code-annotation-active' ) ;
}
selectCodeLines ( clickedEl ) ;
clickedEl . classList . add ( 'code-annotation-active' ) ;
} else {
// Unselect the line
unselectCodeLines ( ) ;
clickedEl . classList . remove ( 'code-annotation-active' ) ;
}
} ) ;
}
const findCites = ( el ) => {
const parentEl = el . parentElement ;
if ( parentEl ) {
const cites = parentEl . dataset . cites ;
if ( cites ) {
return {
el ,
cites : cites . split ( ' ' )
} ;
} else {
return findCites ( el . parentElement )
}
} else {
return undefined ;
}
} ;
var bibliorefs = window . document . querySelectorAll ( 'a[role="doc-biblioref"]' ) ;
for ( var i = 0 ; i < bibliorefs . length ; i ++ ) {
const ref = bibliorefs [ i ] ;
const citeInfo = findCites ( ref ) ;
if ( citeInfo ) {
tippyHover ( citeInfo . el , function ( ) {
var popup = window . document . createElement ( 'div' ) ;
citeInfo . cites . forEach ( function ( cite ) {
var citeDiv = window . document . createElement ( 'div' ) ;
citeDiv . classList . add ( 'hanging-indent' ) ;
citeDiv . classList . add ( 'csl-entry' ) ;
var biblioDiv = window . document . getElementById ( 'ref-' + cite ) ;
if ( biblioDiv ) {
citeDiv . innerHTML = biblioDiv . innerHTML ;
}
popup . appendChild ( citeDiv ) ;
} ) ;
return popup . innerHTML ;
} ) ;
}
}
} ) ;
< / script >
< / div > <!-- /content -->
< / body > < / html >