Compare commits
1 Commits
autodoc
...
iterable-o
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1cfb8feb2d |
64
_quarto.yml
64
_quarto.yml
@@ -19,47 +19,35 @@ website:
|
||||
href: https://discord.gg/7m9sfhzaf3
|
||||
|
||||
sidebar:
|
||||
pinned: true
|
||||
collapse-level: 2
|
||||
style: docked
|
||||
contents:
|
||||
- text: Home
|
||||
href: index.qmd
|
||||
- section: "How-To Guides"
|
||||
contents:
|
||||
- docs/debugging.qmd
|
||||
- docs/multipack.qmd
|
||||
- docs/fsdp_qlora.qmd
|
||||
- docs/input_output.qmd
|
||||
- docs/rlhf.qmd
|
||||
- docs/nccl.qmd
|
||||
- docs/mac.qmd
|
||||
- docs/multi-node.qmd
|
||||
- docs/unsloth.qmd
|
||||
- docs/amd_hpc.qmd
|
||||
- section: "Dataset Formats"
|
||||
contents: docs/dataset-formats/*
|
||||
- section: "Reference"
|
||||
contents:
|
||||
- docs/config.qmd
|
||||
- section: "API Reference"
|
||||
contents: "{{ api_contents }}"
|
||||
- text: "FAQ"
|
||||
href: docs/faq.qmd
|
||||
pinned: true
|
||||
collapse-level: 2
|
||||
style: docked
|
||||
contents:
|
||||
- text: Home
|
||||
href: index.qmd
|
||||
- section: "How-To Guides"
|
||||
contents:
|
||||
# TODO Edit folder structure after we have more docs.
|
||||
- docs/debugging.qmd
|
||||
- docs/multipack.qmd
|
||||
- docs/fsdp_qlora.qmd
|
||||
- docs/input_output.qmd
|
||||
- docs/rlhf.qmd
|
||||
- docs/nccl.qmd
|
||||
- docs/mac.qmd
|
||||
- docs/multi-node.qmd
|
||||
- docs/unsloth.qmd
|
||||
- docs/amd_hpc.qmd
|
||||
- section: "Dataset Formats"
|
||||
contents: docs/dataset-formats/*
|
||||
- section: "Reference"
|
||||
contents:
|
||||
- docs/config.qmd
|
||||
- docs/faq.qmd
|
||||
|
||||
|
||||
format:
|
||||
html:
|
||||
theme: materia
|
||||
css: styles.css
|
||||
toc: true
|
||||
|
||||
quartodoc:
|
||||
package: axolotl
|
||||
parser: google
|
||||
dir: api
|
||||
sections:
|
||||
- title: Core API
|
||||
desc: Core functionality of Axolotl
|
||||
|
||||
metadata-files:
|
||||
- api/_sidebar.yml
|
||||
|
||||
17
_sidebar.yml
17
_sidebar.yml
@@ -1,17 +0,0 @@
|
||||
website:
|
||||
sidebar:
|
||||
- collapse-level: 2
|
||||
contents:
|
||||
- href: introduction.qmd
|
||||
text: Introduction
|
||||
- contents:
|
||||
- reference/index.qmd
|
||||
- contents: []
|
||||
section: axolotl
|
||||
section: Reference
|
||||
- href: basics-summary.qmd
|
||||
text: Basics
|
||||
id: reference
|
||||
search: true
|
||||
style: docked
|
||||
- id: dummy-sidebar
|
||||
@@ -1,11 +0,0 @@
|
||||
# ConstantLengthDataset { #axolotl.ConstantLengthDataset }
|
||||
|
||||
```python
|
||||
ConstantLengthDataset(self, tokenizer, datasets, seq_length=2048)
|
||||
```
|
||||
|
||||
Iterable dataset that returns constant length chunks of tokens from stream of text files.
|
||||
Args:
|
||||
tokenizer (Tokenizer): The processor used for processing the data.
|
||||
dataset (dataset.Dataset): Dataset with text files.
|
||||
seq_length (int): Length of token sequences to return.
|
||||
@@ -1,19 +0,0 @@
|
||||
# TokenizedPromptDataset { #axolotl.TokenizedPromptDataset }
|
||||
|
||||
```python
|
||||
TokenizedPromptDataset(
|
||||
self,
|
||||
prompt_tokenizer,
|
||||
dataset,
|
||||
process_count=None,
|
||||
keep_in_memory=False,
|
||||
**kwargs,
|
||||
)
|
||||
```
|
||||
|
||||
Dataset that returns tokenized prompts from a stream of text files.
|
||||
Args:
|
||||
prompt_tokenizer (PromptTokenizingStrategy): The prompt tokenizing method for processing the data.
|
||||
dataset (dataset.Dataset): Dataset with text files.
|
||||
process_count (int): Number of processes to use for tokenizing.
|
||||
keep_in_memory (bool): Whether to keep the tokenized dataset in memory.
|
||||
@@ -1,28 +0,0 @@
|
||||
# choose_config { #axolotl.choose_config }
|
||||
|
||||
```python
|
||||
choose_config(path)
|
||||
```
|
||||
|
||||
Helper method for choosing a `axolotl` config YAML file (considering only files
|
||||
ending with `.yml` or `.yaml`). If more than one config file exists in the passed
|
||||
`path`, the user is prompted to choose one.
|
||||
|
||||
## Parameters {.doc-section .doc-section-parameters}
|
||||
|
||||
| Name | Type | Description | Default |
|
||||
|--------|--------|-----------------------------------------------|------------|
|
||||
| path | Path | Directory in which config file(s) are stored. | _required_ |
|
||||
|
||||
## Returns {.doc-section .doc-section-returns}
|
||||
|
||||
| Name | Type | Description |
|
||||
|--------|--------|----------------------------------------------------------------------------------|
|
||||
| | str | Path to either (1) the sole YAML file, or (2) if more than one YAML files exist, |
|
||||
| | str | the user-selected YAML file. |
|
||||
|
||||
## Raises {.doc-section .doc-section-raises}
|
||||
|
||||
| Name | Type | Description |
|
||||
|--------|------------|-------------------------------------------------|
|
||||
| | ValueError | If no YAML files are found in the given `path`. |
|
||||
@@ -1,5 +0,0 @@
|
||||
# Function reference {.doc .doc-index}
|
||||
|
||||
## Core API
|
||||
|
||||
Core functionality of Axolotl
|
||||
@@ -1,21 +0,0 @@
|
||||
# load_cfg { #axolotl.load_cfg }
|
||||
|
||||
```python
|
||||
load_cfg(config=Path('examples/'), **kwargs)
|
||||
```
|
||||
|
||||
Loads the `axolotl` configuration stored at `config`, validates it, and performs
|
||||
various setup.
|
||||
|
||||
## Parameters {.doc-section .doc-section-parameters}
|
||||
|
||||
| Name | Type | Description | Default |
|
||||
|--------|--------------------|--------------------------------------------------------------|---------------------|
|
||||
| config | Union\[str, Path\] | Path (local or remote) to `axolotl` config YAML file. | `Path('examples/')` |
|
||||
| kwargs | | Additional keyword arguments to override config file values. | `{}` |
|
||||
|
||||
## Returns {.doc-section .doc-section-returns}
|
||||
|
||||
| Name | Type | Description |
|
||||
|--------|-------------|-----------------------------------------------------|
|
||||
| | DictDefault | `DictDefault` mapping configuration keys to values. |
|
||||
@@ -1,5 +0,0 @@
|
||||
# validate_config { #axolotl.validate_config }
|
||||
|
||||
```python
|
||||
validate_config(cfg, capabilities=None, env_capabilities=None)
|
||||
```
|
||||
@@ -1 +0,0 @@
|
||||
{"project": "axolotl", "version": "0.0.9999", "count": 0, "items": []}
|
||||
@@ -1,3 +0,0 @@
|
||||
# API Reference {.doc .doc-index}
|
||||
|
||||
## Core API
|
||||
@@ -2,5 +2,3 @@ pre-commit
|
||||
black
|
||||
mypy
|
||||
types-requests
|
||||
quartodoc
|
||||
quarto-cli
|
||||
|
||||
@@ -2,20 +2,6 @@
|
||||
|
||||
import pkgutil
|
||||
|
||||
from .cli.config import choose_config, load_cfg, validate_config
|
||||
from .datasets import ConstantLengthDataset, TokenizedPromptDataset
|
||||
from .evaluate import evaluate
|
||||
from .train import train
|
||||
|
||||
__path__ = pkgutil.extend_path(__path__, __name__) # Make this a namespace package
|
||||
__version__ = "0.6.0"
|
||||
|
||||
__all__ = [
|
||||
"train",
|
||||
"evaluate",
|
||||
"TokenizedPromptDataset",
|
||||
"ConstantLengthDataset",
|
||||
"load_cfg",
|
||||
"choose_config",
|
||||
"validate_config",
|
||||
]
|
||||
__version__ = "0.6.0"
|
||||
|
||||
@@ -13,6 +13,12 @@ class PreprocessCliArgs:
|
||||
debug_num_examples: int = field(default=1)
|
||||
prompter: Optional[str] = field(default=None)
|
||||
download: Optional[bool] = field(default=True)
|
||||
iterable: Optional[bool] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "Use IterableDataset for streaming processing of large datasets"
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
Reference in New Issue
Block a user