Merge pull request #135 from NanoCode012/fix/grad-accu-readme

Fix: Update doc for grad_accu and add validation tests for batch size
2023-06-01 06:33:05 +09:00
parent a6f5e5eaec 3c71c8debe
commit 288fd62431
3 changed files with 26 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -397,6 +397,7 @@ Add below flag to train command above
 Please reduce any below
  - `micro_batch_size`
  - `eval_batch_size`
+  - `gradient_accumulation_steps`
  - `sequence_len`

 > RuntimeError: expected scalar type Float but found Half
--- a/src/axolotl/utils/validation.py
+++ b/src/axolotl/utils/validation.py
@@ -8,6 +8,12 @@ def validate_config(cfg):
        raise ValueError(
            "please set only one of gradient_accumulation_steps or batch_size"
        )
+    if cfg.batch_size:
+        logging.warning(
+            "%s\n%s",
+            "batch_size is not recommended. Please use gradient_accumulation_steps instead.",
+            "To calculate the equivalent gradient_accumulation_steps, divide batch_size / micro_batch_size / number of gpus.",
+        )
    if cfg.load_4bit:
        raise ValueError(
            "cfg.load_4bit parameter has been deprecated and replaced by cfg.gptq"
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -1,6 +1,8 @@
 """Module for testing the validation module"""

+import logging
 import unittest
+from typing import Optional

 import pytest

@@ -13,6 +15,12 @@ class ValidationTest(unittest.TestCase):
    Test the validation module
    """

+    _caplog: Optional[pytest.LogCaptureFixture] = None
+
+    @pytest.fixture(autouse=True)
+    def inject_fixtures(self, caplog):
+        self._caplog = caplog
+
    def test_load_4bit_deprecate(self):
        cfg = DictDefault(
            {
@@ -23,6 +31,17 @@ class ValidationTest(unittest.TestCase):
        with pytest.raises(ValueError):
            validate_config(cfg)

+    def test_batch_size_unused_warning(self):
+        cfg = DictDefault(
+            {
+                "batch_size": 32,
+            }
+        )
+
+        with self._caplog.at_level(logging.WARNING):
+            validate_config(cfg)
+            assert "batch_size is not recommended" in self._caplog.records[0].message
+
    def test_qlora(self):
        base_cfg = DictDefault(
            {