Compare commits
1 Commits
autodoc
...
iterable-o
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1cfb8feb2d |
64
_quarto.yml
64
_quarto.yml
@@ -19,47 +19,35 @@ website:
|
|||||||
href: https://discord.gg/7m9sfhzaf3
|
href: https://discord.gg/7m9sfhzaf3
|
||||||
|
|
||||||
sidebar:
|
sidebar:
|
||||||
pinned: true
|
pinned: true
|
||||||
collapse-level: 2
|
collapse-level: 2
|
||||||
style: docked
|
style: docked
|
||||||
contents:
|
contents:
|
||||||
- text: Home
|
- text: Home
|
||||||
href: index.qmd
|
href: index.qmd
|
||||||
- section: "How-To Guides"
|
- section: "How-To Guides"
|
||||||
contents:
|
contents:
|
||||||
- docs/debugging.qmd
|
# TODO Edit folder structure after we have more docs.
|
||||||
- docs/multipack.qmd
|
- docs/debugging.qmd
|
||||||
- docs/fsdp_qlora.qmd
|
- docs/multipack.qmd
|
||||||
- docs/input_output.qmd
|
- docs/fsdp_qlora.qmd
|
||||||
- docs/rlhf.qmd
|
- docs/input_output.qmd
|
||||||
- docs/nccl.qmd
|
- docs/rlhf.qmd
|
||||||
- docs/mac.qmd
|
- docs/nccl.qmd
|
||||||
- docs/multi-node.qmd
|
- docs/mac.qmd
|
||||||
- docs/unsloth.qmd
|
- docs/multi-node.qmd
|
||||||
- docs/amd_hpc.qmd
|
- docs/unsloth.qmd
|
||||||
- section: "Dataset Formats"
|
- docs/amd_hpc.qmd
|
||||||
contents: docs/dataset-formats/*
|
- section: "Dataset Formats"
|
||||||
- section: "Reference"
|
contents: docs/dataset-formats/*
|
||||||
contents:
|
- section: "Reference"
|
||||||
- docs/config.qmd
|
contents:
|
||||||
- section: "API Reference"
|
- docs/config.qmd
|
||||||
contents: "{{ api_contents }}"
|
- docs/faq.qmd
|
||||||
- text: "FAQ"
|
|
||||||
href: docs/faq.qmd
|
|
||||||
|
|
||||||
format:
|
format:
|
||||||
html:
|
html:
|
||||||
theme: materia
|
theme: materia
|
||||||
css: styles.css
|
css: styles.css
|
||||||
toc: true
|
toc: true
|
||||||
|
|
||||||
quartodoc:
|
|
||||||
package: axolotl
|
|
||||||
parser: google
|
|
||||||
dir: api
|
|
||||||
sections:
|
|
||||||
- title: Core API
|
|
||||||
desc: Core functionality of Axolotl
|
|
||||||
|
|
||||||
metadata-files:
|
|
||||||
- api/_sidebar.yml
|
|
||||||
|
|||||||
17
_sidebar.yml
17
_sidebar.yml
@@ -1,17 +0,0 @@
|
|||||||
website:
|
|
||||||
sidebar:
|
|
||||||
- collapse-level: 2
|
|
||||||
contents:
|
|
||||||
- href: introduction.qmd
|
|
||||||
text: Introduction
|
|
||||||
- contents:
|
|
||||||
- reference/index.qmd
|
|
||||||
- contents: []
|
|
||||||
section: axolotl
|
|
||||||
section: Reference
|
|
||||||
- href: basics-summary.qmd
|
|
||||||
text: Basics
|
|
||||||
id: reference
|
|
||||||
search: true
|
|
||||||
style: docked
|
|
||||||
- id: dummy-sidebar
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
# ConstantLengthDataset { #axolotl.ConstantLengthDataset }
|
|
||||||
|
|
||||||
```python
|
|
||||||
ConstantLengthDataset(self, tokenizer, datasets, seq_length=2048)
|
|
||||||
```
|
|
||||||
|
|
||||||
Iterable dataset that returns constant length chunks of tokens from stream of text files.
|
|
||||||
Args:
|
|
||||||
tokenizer (Tokenizer): The processor used for processing the data.
|
|
||||||
dataset (dataset.Dataset): Dataset with text files.
|
|
||||||
seq_length (int): Length of token sequences to return.
|
|
||||||
@@ -1,19 +0,0 @@
|
|||||||
# TokenizedPromptDataset { #axolotl.TokenizedPromptDataset }
|
|
||||||
|
|
||||||
```python
|
|
||||||
TokenizedPromptDataset(
|
|
||||||
self,
|
|
||||||
prompt_tokenizer,
|
|
||||||
dataset,
|
|
||||||
process_count=None,
|
|
||||||
keep_in_memory=False,
|
|
||||||
**kwargs,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
Dataset that returns tokenized prompts from a stream of text files.
|
|
||||||
Args:
|
|
||||||
prompt_tokenizer (PromptTokenizingStrategy): The prompt tokenizing method for processing the data.
|
|
||||||
dataset (dataset.Dataset): Dataset with text files.
|
|
||||||
process_count (int): Number of processes to use for tokenizing.
|
|
||||||
keep_in_memory (bool): Whether to keep the tokenized dataset in memory.
|
|
||||||
@@ -1,28 +0,0 @@
|
|||||||
# choose_config { #axolotl.choose_config }
|
|
||||||
|
|
||||||
```python
|
|
||||||
choose_config(path)
|
|
||||||
```
|
|
||||||
|
|
||||||
Helper method for choosing a `axolotl` config YAML file (considering only files
|
|
||||||
ending with `.yml` or `.yaml`). If more than one config file exists in the passed
|
|
||||||
`path`, the user is prompted to choose one.
|
|
||||||
|
|
||||||
## Parameters {.doc-section .doc-section-parameters}
|
|
||||||
|
|
||||||
| Name | Type | Description | Default |
|
|
||||||
|--------|--------|-----------------------------------------------|------------|
|
|
||||||
| path | Path | Directory in which config file(s) are stored. | _required_ |
|
|
||||||
|
|
||||||
## Returns {.doc-section .doc-section-returns}
|
|
||||||
|
|
||||||
| Name | Type | Description |
|
|
||||||
|--------|--------|----------------------------------------------------------------------------------|
|
|
||||||
| | str | Path to either (1) the sole YAML file, or (2) if more than one YAML files exist, |
|
|
||||||
| | str | the user-selected YAML file. |
|
|
||||||
|
|
||||||
## Raises {.doc-section .doc-section-raises}
|
|
||||||
|
|
||||||
| Name | Type | Description |
|
|
||||||
|--------|------------|-------------------------------------------------|
|
|
||||||
| | ValueError | If no YAML files are found in the given `path`. |
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
# Function reference {.doc .doc-index}
|
|
||||||
|
|
||||||
## Core API
|
|
||||||
|
|
||||||
Core functionality of Axolotl
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
# load_cfg { #axolotl.load_cfg }
|
|
||||||
|
|
||||||
```python
|
|
||||||
load_cfg(config=Path('examples/'), **kwargs)
|
|
||||||
```
|
|
||||||
|
|
||||||
Loads the `axolotl` configuration stored at `config`, validates it, and performs
|
|
||||||
various setup.
|
|
||||||
|
|
||||||
## Parameters {.doc-section .doc-section-parameters}
|
|
||||||
|
|
||||||
| Name | Type | Description | Default |
|
|
||||||
|--------|--------------------|--------------------------------------------------------------|---------------------|
|
|
||||||
| config | Union\[str, Path\] | Path (local or remote) to `axolotl` config YAML file. | `Path('examples/')` |
|
|
||||||
| kwargs | | Additional keyword arguments to override config file values. | `{}` |
|
|
||||||
|
|
||||||
## Returns {.doc-section .doc-section-returns}
|
|
||||||
|
|
||||||
| Name | Type | Description |
|
|
||||||
|--------|-------------|-----------------------------------------------------|
|
|
||||||
| | DictDefault | `DictDefault` mapping configuration keys to values. |
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
# validate_config { #axolotl.validate_config }
|
|
||||||
|
|
||||||
```python
|
|
||||||
validate_config(cfg, capabilities=None, env_capabilities=None)
|
|
||||||
```
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"project": "axolotl", "version": "0.0.9999", "count": 0, "items": []}
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
# API Reference {.doc .doc-index}
|
|
||||||
|
|
||||||
## Core API
|
|
||||||
@@ -2,5 +2,3 @@ pre-commit
|
|||||||
black
|
black
|
||||||
mypy
|
mypy
|
||||||
types-requests
|
types-requests
|
||||||
quartodoc
|
|
||||||
quarto-cli
|
|
||||||
|
|||||||
@@ -2,20 +2,6 @@
|
|||||||
|
|
||||||
import pkgutil
|
import pkgutil
|
||||||
|
|
||||||
from .cli.config import choose_config, load_cfg, validate_config
|
|
||||||
from .datasets import ConstantLengthDataset, TokenizedPromptDataset
|
|
||||||
from .evaluate import evaluate
|
|
||||||
from .train import train
|
|
||||||
|
|
||||||
__path__ = pkgutil.extend_path(__path__, __name__) # Make this a namespace package
|
__path__ = pkgutil.extend_path(__path__, __name__) # Make this a namespace package
|
||||||
__version__ = "0.6.0"
|
|
||||||
|
|
||||||
__all__ = [
|
__version__ = "0.6.0"
|
||||||
"train",
|
|
||||||
"evaluate",
|
|
||||||
"TokenizedPromptDataset",
|
|
||||||
"ConstantLengthDataset",
|
|
||||||
"load_cfg",
|
|
||||||
"choose_config",
|
|
||||||
"validate_config",
|
|
||||||
]
|
|
||||||
|
|||||||
@@ -13,6 +13,12 @@ class PreprocessCliArgs:
|
|||||||
debug_num_examples: int = field(default=1)
|
debug_num_examples: int = field(default=1)
|
||||||
prompter: Optional[str] = field(default=None)
|
prompter: Optional[str] = field(default=None)
|
||||||
download: Optional[bool] = field(default=True)
|
download: Optional[bool] = field(default=True)
|
||||||
|
iterable: Optional[bool] = field(
|
||||||
|
default=None,
|
||||||
|
metadata={
|
||||||
|
"help": "Use IterableDataset for streaming processing of large datasets"
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
Reference in New Issue
Block a user