fix: fix CONTRIBUTING.md placeholders, bare except clauses, and add convert.py tests (#3485) [skip ci]

* docs: fix codestyle placeholders in CONTRIBUTING.md

Replace unresolved {codestyle} and {URLofCodestyle} template
variables with Ruff, the project's actual linter/formatter
as configured in .pre-commit-config.yaml.

* fix: replace bare except clauses with specific exception types

- quantization.py: use except ImportError for optional torchao imports
  (consistent with line 48 which already uses ImportError correctly)
- cli/config.py: use except (RuntimeError, AssertionError) for CUDA
  device property query

Prevents masking unrelated errors like KeyboardInterrupt or SystemExit.

* test: add unit tests for convert.py JSON/JSONL utilities

Cover FileReader, FileWriter, StdoutWriter, JsonParser,
JsonlSerializer, and JsonToJsonlConverter with 8 test cases
including roundtrip and edge case (empty list) scenarios.

Previously this module had zero test coverage.

* fix: address CodeRabbit review feedback

- quantization.py: catch (ImportError, RuntimeError) for optional
  torchao imports; CUDA wheel/GPU mismatches raise RuntimeError,
  not ImportError
- convert.py: remove unused output_file_path parameter from
  JsonToJsonlConverter.convert() — FileWriter already holds the
  output path from construction
- tests/test_convert.py: update call site to match new signature
This commit is contained in:
Aarush
2026-03-16 09:42:40 +05:30
committed by GitHub
parent f56efdb4ab
commit defee62d99
5 changed files with 97 additions and 6 deletions

View File

@@ -68,7 +68,7 @@ You can skip certain CI checks by including specific keywords in your commit mes
### Code Style ### Code Style
axolotl uses [{codestyle}]({URLofCodestyle}) as its code style guide. Please ensure that your code follows these guidelines. axolotl uses [Ruff](https://docs.astral.sh/ruff/) as its code style guide. Please ensure that your code follows these guidelines.
Use the pre-commit linter to ensure that your code is formatted consistently. Use the pre-commit linter to ensure that your code is formatted consistently.
```bash ```bash
@@ -83,6 +83,6 @@ Write clear and concise commit messages that briefly describe the changes made i
- [GitHub Help](https://help.github.com/) - [GitHub Help](https://help.github.com/)
- [GitHub Pull Request Documentation](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests) - [GitHub Pull Request Documentation](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests)
- [{codestyle}]({URLofCodestyle}) - [Ruff](https://docs.astral.sh/ruff/)
Thank you once again for your interest in contributing to axolotl. We look forward to collaborating with you and creating an even better project together! Thank you once again for your interest in contributing to axolotl. We look forward to collaborating with you and creating an even better project together!

View File

@@ -300,7 +300,7 @@ def load_cfg(
try: try:
device_props = torch.cuda.get_device_properties("cuda") device_props = torch.cuda.get_device_properties("cuda")
gpu_version = "sm_" + str(device_props.major) + str(device_props.minor) gpu_version = "sm_" + str(device_props.major) + str(device_props.minor)
except: except (RuntimeError, AssertionError):
gpu_version = None gpu_version = None
prepare_plugins(cfg) prepare_plugins(cfg)

View File

@@ -67,7 +67,7 @@ class JsonToJsonlConverter:
self.json_parser = json_parser self.json_parser = json_parser
self.jsonl_serializer = jsonl_serializer self.jsonl_serializer = jsonl_serializer
def convert(self, input_file_path, output_file_path): def convert(self, input_file_path):
content = self.file_reader.read(input_file_path) content = self.file_reader.read(input_file_path)
data = self.json_parser.parse(content) data = self.json_parser.parse(content)
# data = [r for r in data if r["conversations"]] # vicuna cleaned has rows with empty conversations # data = [r for r in data if r["conversations"]] # vicuna cleaned has rows with empty conversations

View File

@@ -29,7 +29,7 @@ if version.parse(torch.__version__) >= version.parse("2.8.0"):
from torchao.prototype.mx_formats import NVFP4InferenceConfig from torchao.prototype.mx_formats import NVFP4InferenceConfig
quantization_config_to_str[NVFP4InferenceConfig] = "nvfp4" quantization_config_to_str[NVFP4InferenceConfig] = "nvfp4"
except: except (ImportError, RuntimeError):
pass pass
# int4 weight config imports will fail on machines with fbgemm-gpu installed # int4 weight config imports will fail on machines with fbgemm-gpu installed
@@ -38,7 +38,7 @@ if version.parse(torch.__version__) >= version.parse("2.8.0"):
from torchao.quantization.quant_api import Int4WeightOnlyConfig from torchao.quantization.quant_api import Int4WeightOnlyConfig
quantization_config_to_str[Int4WeightOnlyConfig] = "int4" quantization_config_to_str[Int4WeightOnlyConfig] = "int4"
except: except (ImportError, RuntimeError):
pass pass
try: try:

91
tests/test_convert.py Normal file
View File

@@ -0,0 +1,91 @@
"""Unit tests for src/axolotl/convert.py"""
import json
import pytest
from axolotl.convert import (
FileReader,
FileWriter,
JsonlSerializer,
JsonParser,
JsonToJsonlConverter,
StdoutWriter,
)
class TestJsonParser:
def test_parse_valid_json_array(self):
parser = JsonParser()
result = parser.parse('[{"key": "value"}]')
assert result == [{"key": "value"}]
def test_parse_valid_json_object(self):
parser = JsonParser()
result = parser.parse('{"key": "value"}')
assert result == {"key": "value"}
def test_parse_invalid_json_raises(self):
parser = JsonParser()
with pytest.raises(json.JSONDecodeError):
parser.parse("not valid json")
class TestJsonlSerializer:
def test_serialize_single_item(self):
serializer = JsonlSerializer()
result = serializer.serialize([{"a": 1}])
assert result == '{"a": 1}'
def test_serialize_multiple_items(self):
serializer = JsonlSerializer()
result = serializer.serialize([{"a": 1}, {"b": 2}])
lines = result.split("\n")
assert len(lines) == 2
assert json.loads(lines[0]) == {"a": 1}
assert json.loads(lines[1]) == {"b": 2}
def test_serialize_empty_list(self):
serializer = JsonlSerializer()
result = serializer.serialize([])
assert result == ""
class TestFileReaderWriter:
def test_read_write_roundtrip(self, tmp_path):
test_file = tmp_path / "test.txt"
content = '{"hello": "world"}'
writer = FileWriter(str(test_file))
writer.write(content)
reader = FileReader()
result = reader.read(str(test_file))
assert result == content
class TestStdoutWriter:
def test_write_to_stdout(self, capsys):
writer = StdoutWriter()
writer.write("hello")
captured = capsys.readouterr()
assert captured.out == "hello\n"
class TestJsonToJsonlConverter:
def test_convert_json_to_jsonl(self, tmp_path):
input_data = [{"name": "Alice"}, {"name": "Bob"}]
input_file = tmp_path / "input.json"
output_file = tmp_path / "output.jsonl"
input_file.write_text(json.dumps(input_data), encoding="utf-8")
converter = JsonToJsonlConverter(
FileReader(), FileWriter(str(output_file)), JsonParser(), JsonlSerializer()
)
converter.convert(str(input_file))
result = output_file.read_text(encoding="utf-8")
lines = result.split("\n")
assert len(lines) == 2
assert json.loads(lines[0]) == {"name": "Alice"}
assert json.loads(lines[1]) == {"name": "Bob"}