axolotl/tests/e2e/kernels/test_quantize.py

"""Tests for quantization utility functions."""

import torch
from bitsandbytes.functional import QuantState

from axolotl.kernels.quantize import dequantize, dequantize_weight


def test_dequantize_null_state():
    """Test that dequantize returns input unchanged when quant_state is None"""
    W = torch.randn(32, 32)
    assert torch.equal(dequantize(W, None), W)


def test_dequantize_shape_preservation():
    """Test that dequantization preserves expected shapes"""
    shape = (32, 32)
    W = torch.randn(shape, device="cuda")

    quant_state = QuantState(
        absmax=torch.ones(shape[0], device="cuda"),
        shape=shape,
        code=torch.randint(0, 15, shape, device="cuda"),
        dtype=torch.float16,
        blocksize=32,
        quant_type="nf4",
        offset=torch.zeros(shape[0], dtype=torch.int32, device="cuda"),
        state2=QuantState(
            absmax=torch.ones(shape[0], device="cuda"),
            shape=shape,
            code=torch.randint(0, 15, shape, device="cuda"),
            dtype=torch.float16,
            blocksize=32,
            quant_type="nf4",
            offset=None,
            state2=None,
        ),
    )

    result = dequantize(W, quant_state)
    assert result.shape == shape
    assert result.dtype == torch.float16
    assert result.device == W.device


def test_dequantize_transposed():
    """Test that transposed input produces transposed output"""
    shape = (32, 32)
    W = torch.randn(1, shape[1], device="cuda")  # Transposed input

    quant_state = QuantState(
        absmax=torch.ones(1),
        shape=shape,
        code=torch.randint(0, 15, shape),
        dtype=torch.float16,
        blocksize=32,
        quant_type="nf4",
        offset=torch.zeros(1, dtype=torch.int32),
        state2=QuantState(
            absmax=torch.ones(1),
            shape=shape,
            code=torch.randint(0, 15, shape),
            dtype=torch.float16,
            blocksize=32,
            quant_type="nf4",
            offset=None,
            state2=None,
        ),
    )

    result = dequantize(W, quant_state)
    assert result.shape[0] == shape[0]


def test_dequantize_output_tensor():
    """Test dequantization with provided output tensor"""
    shape = (32, 32)
    W = torch.randn(shape, device="cuda")
    out = torch.empty(shape, dtype=torch.float16, device="cuda")

    quant_state = QuantState(
        absmax=torch.ones(shape[0]),
        shape=shape,
        code=torch.randint(0, 15, shape),
        dtype=torch.float16,
        blocksize=32,
        quant_type="nf4",
        offset=torch.zeros(shape[0], dtype=torch.int32),
        state2=QuantState(
            absmax=torch.ones(shape[0]),
            shape=shape,
            code=torch.randint(0, 15, shape),
            dtype=torch.float16,
            blocksize=32,
            quant_type="nf4",
            offset=None,
            state2=None,
        ),
    )

    result = dequantize(W, quant_state, out=out)
    assert result is out


def test_dequantize_weight_plain_tensor():
    """Test that dequantize_weight passes through unquantized tensors unchanged"""
    W = torch.randn(32, 64)
    result = dequantize_weight(W, quant_state=None, transpose=False)
    assert torch.equal(result, W)


def test_dequantize_weight_plain_tensor_transpose():
    """Test that dequantize_weight transposes unquantized tensors"""
    W = torch.randn(32, 64)
    result = dequantize_weight(W, quant_state=None, transpose=True)
    assert result.shape == (64, 32)
    assert torch.equal(result, W.t())