Compare commits
6 Commits
fa-check
...
ec94d632f3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ec94d632f3 | ||
|
|
e8bd3b0b3b | ||
|
|
5a08b94668 | ||
|
|
ecb8c1f4b3 | ||
|
|
ab57be6526 | ||
|
|
c9640bca2c |
2
.github/workflows/preview-docs.yml
vendored
2
.github/workflows/preview-docs.yml
vendored
@@ -10,7 +10,7 @@ on:
|
||||
- '**/*.qmd' # any Quarto file
|
||||
- '_quarto.yml'
|
||||
- docs/scripts/generate_config_docs.py
|
||||
- src/axolotl/utils/schemas/**.py
|
||||
- src/axolotl/**/*.py
|
||||
|
||||
permissions:
|
||||
checks: write
|
||||
|
||||
10
TODO.md
10
TODO.md
@@ -1,10 +0,0 @@
|
||||
# todo list
|
||||
|
||||
- [] Validation of parameters for combinations that won't work
|
||||
|
||||
|
||||
|
||||
## things that are known not to work
|
||||
|
||||
- FSDP offload and gradient_checkpointing - https://github.com/pytorch/pytorch/issues/82203
|
||||
- adamw_bnb_8bit doesn't play well with FSDP offload
|
||||
@@ -14,7 +14,7 @@ packaging==23.2
|
||||
|
||||
huggingface_hub>=0.33.0
|
||||
peft==0.17.0
|
||||
transformers @ git+https://github.com/vasqu/transformers@fix-fa-integration
|
||||
transformers==4.55.0
|
||||
tokenizers>=0.21.1
|
||||
accelerate==1.10.0
|
||||
datasets==4.0.0
|
||||
|
||||
@@ -74,7 +74,9 @@ def generate_config_files(config: str, sweep: str | None) -> Iterator[tuple[str,
|
||||
sweep: Sweep configuration file
|
||||
|
||||
Yields:
|
||||
Tuple of configuration file name and whether this is a group of configurations
|
||||
tuple[str, bool]: (config_file, is_group)
|
||||
- config_file: configuration file path
|
||||
- is_group: whether this is a group of configurations
|
||||
"""
|
||||
|
||||
if not sweep:
|
||||
|
||||
@@ -18,7 +18,9 @@ from torch.distributed import DeviceMesh
|
||||
try:
|
||||
from transformers.modeling_flash_attention_utils import _flash_supports_window
|
||||
except ImportError:
|
||||
_flash_supports_window = True
|
||||
from transformers.modeling_flash_attention_utils import (
|
||||
_flash_supports_window_size as _flash_supports_window,
|
||||
)
|
||||
|
||||
from axolotl.monkeypatch.utils import get_cu_seqlens_from_pos_ids
|
||||
from axolotl.utils.logging import get_logger
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import unittest
|
||||
|
||||
from axolotl.monkeypatch.transformers.trainer_loss_calc import (
|
||||
check_evaluation_loop_is_fsdp2_patchable,
|
||||
check_evaluation_loop_is_patchable,
|
||||
check_maybe_log_save_evaluate_is_patchable,
|
||||
)
|
||||
@@ -19,6 +20,7 @@ class TestTrainerLossCalc(unittest.TestCase):
|
||||
the patched code changes upstream.
|
||||
"""
|
||||
assert check_evaluation_loop_is_patchable()
|
||||
assert check_evaluation_loop_is_fsdp2_patchable()
|
||||
assert check_maybe_log_save_evaluate_is_patchable()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user