Skip to content

Commit

Permalink
test(QLayerNorm): add non-affine configurations
Browse files Browse the repository at this point in the history
  • Loading branch information
dacorvo committed Oct 29, 2024
1 parent f31a083 commit d563d4d
Showing 1 changed file with 14 additions and 10 deletions.
24 changes: 14 additions & 10 deletions test/nn/test_qlayernorm.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
from optimum.quanto.nn import QLayerNorm


def _test_quantize_layernorm(batch_size, tokens, embeddings, dtype, activations, device):
def _test_quantize_layernorm(batch_size, tokens, embeddings, affine, dtype, activations, device):
# Instantiate a normalization layer
norm = torch.nn.LayerNorm(embeddings).to(dtype).to(device)
norm = torch.nn.LayerNorm(embeddings, elementwise_affine=affine).to(dtype).to(device)
qnorm = QLayerNorm.from_module(norm, activations=activations)
qinputs = random_qactivation((batch_size,) + (tokens, embeddings), qtype=activations, dtype=dtype).to(device)
# Calibrate to avoid clipping and to set the correct dtype
Expand All @@ -43,38 +43,42 @@ def _test_quantize_layernorm(batch_size, tokens, embeddings, dtype, activations,

@pytest.mark.parametrize("batch_size", [1, 10])
@pytest.mark.parametrize("tokens, embeddings", [(32, 32), (10, 32)])
def test_quantize_layernorm_float16_activations_int8(batch_size, tokens, embeddings, device):
_test_quantize_layernorm(batch_size, tokens, embeddings, torch.float16, qint8, device)
@pytest.mark.parametrize("affine", [True, False], ids=["affine", "non-affine"])
def test_quantize_layernorm_float16_activations_int8(batch_size, tokens, embeddings, affine, device):
_test_quantize_layernorm(batch_size, tokens, embeddings, affine, torch.float16, qint8, device)


@pytest.mark.parametrize("batch_size", [1, 10])
@pytest.mark.parametrize("tokens, embeddings", [(32, 32), (10, 32)])
def test_quantize_layernorm_float32_activations_int8(batch_size, tokens, embeddings, device):
_test_quantize_layernorm(batch_size, tokens, embeddings, torch.float32, qint8, device)
@pytest.mark.parametrize("affine", [True, False], ids=["affine", "non-affine"])
def test_quantize_layernorm_float32_activations_int8(batch_size, tokens, embeddings, affine, device):
_test_quantize_layernorm(batch_size, tokens, embeddings, affine, torch.float32, qint8, device)


@pytest.mark.parametrize("batch_size", [1, 10])
@pytest.mark.parametrize("tokens, embeddings", [(32, 32), (10, 32)])
@pytest.mark.parametrize("affine", [True, False], ids=["affine", "non-affine"])
@pytest.mark.parametrize(
"activations",
[qfloat8_e5m2, qfloat8_e4m3fn, qfloat8_e4m3fnuz],
ids=["a-float8-e5m2", "a-float8-e4m3", "a-float8-e4m3-uz"],
)
@pytest.mark.skip_device("mps")
def test_quantize_layernorm_float16_activations_float8(batch_size, tokens, embeddings, activations, device):
_test_quantize_layernorm(batch_size, tokens, embeddings, torch.float16, activations, device)
def test_quantize_layernorm_float16_activations_float8(batch_size, tokens, embeddings, affine, activations, device):
_test_quantize_layernorm(batch_size, tokens, embeddings, affine, torch.float16, activations, device)


@pytest.mark.parametrize("batch_size", [1, 10])
@pytest.mark.parametrize("tokens, embeddings", [(32, 32), (10, 32)])
@pytest.mark.parametrize("affine", [True, False], ids=["affine", "non-affine"])
@pytest.mark.parametrize(
"activations",
[qfloat8_e5m2, qfloat8_e4m3fn, qfloat8_e4m3fnuz],
ids=["a-float8-e5m2", "a-float8-e4m3", "a-float8-e4m3-uz"],
)
@pytest.mark.skip_device("mps")
def test_quantize_layernorm_float32_activations_float8(batch_size, tokens, embeddings, activations, device):
_test_quantize_layernorm(batch_size, tokens, embeddings, torch.float32, activations, device)
def test_quantize_layernorm_float32_activations_float8(batch_size, tokens, embeddings, affine, activations, device):
_test_quantize_layernorm(batch_size, tokens, embeddings, affine, torch.float32, activations, device)


def test_quantize_layernom_no_activation():
Expand Down

0 comments on commit d563d4d

Please sign in to comment.