Add a16w8 per-op test for bmm (pytorch#19599)

christine-long-meta · web-flow · commit afd32cc6cf24 · 2026-05-19T15:52:55.000-04:00
Summary:
Add int16 activation / int8 weight (a16w8) quantization tests for
`aten.bmm` on Ethos-U55 and Ethos-U85.

## Context
Batch matrix multiply (`bmm`) implements the core `Q @ K^T` and
`attn_weights @ V` operations in the multi-head attention of the
EMG2Pose Conformer. At int16 IO precision the accumulator width and
rescale path differ between U55 and U85, so dedicated per-op coverage is
needed to catch numerics divergence before it surfaces as an end-to-end
SNR regression. The test matrix includes square, rectangular, and
large-batch configurations to exercise different tiling strategies in
the Vela backend.

Also removes unused `aten_op_mm` / `exir_op_mm` variables that were dead
code in `test_bmm.py`.

## Changes
- Add `a16w8_bmm_test_parameters` dict with 5 test configurations
covering same-shape, different-shape, rectangular, batch-10, and
negative-value tensors
- Add `test_bmm_a16w8_u55_INT` using `EthosU55PipelineINT` with
`a16w8_quantization=True, symmetric_io_quantization=True, qtol=128,
epsilon=2**-16`
- Add `test_bmm_a16w8_u85_INT` using `EthosU85PipelineINT` with same
kwargs
- Remove unused `aten_op_mm` and `exir_op_mm` variables
- Register `ops/test_bmm.py` in `fbcode/` and `xplat/` `targets.bzl`

bypass-pytorch-oss-checks

Reviewed By: Ninja91

Differential Revision: D104532363
diff --git a/backends/arm/test/ops/test_bmm.py b/backends/arm/test/ops/test_bmm.py
@@ -1,4 +1,4 @@
-# Copyright 2024-2025 Arm Limited and/or its affiliates.
+# Copyright 2024-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -10,11 +10,12 @@
 
 import torch
 
+from executorch.backends.arm.quantizer import get_symmetric_a16w8_quantization_config
 from executorch.backends.arm.test import common
-
 from executorch.backends.arm.test.tester.test_pipeline import (
     EthosU55PipelineINT,
     EthosU85PipelineINT,
+    OpNotSupportedPipeline,
     TosaPipelineFP,
     TosaPipelineINT,
     VgfPipeline,
@@ -23,9 +24,6 @@
 aten_op_bmm = "torch.ops.aten.bmm.default"
 exir_op_bmm = "executorch_exir_dialects_edge__ops_aten_bmm_default"
 
-aten_op_mm = "torch.ops.aten.matmul.default"
-exir_op_mm = "executorch_exir_dialects_edge__ops_aten_matmul_default"
-
 input_t1 = Tuple[torch.Tensor, torch.Tensor]  # Input x
 
 
@@ -191,3 +189,52 @@ def test_bmm_vgf_quant_single_input(test_data: input_t1):
         quantize=True,
     )
     pipeline.run()
+
+
+a16w8_bmm_test_parameters = {
+    "rand_same": lambda: (torch.rand(2, 1, 1), torch.rand(2, 1, 1)),
+    "rand_diff": lambda: (torch.rand(5, 3, 5), torch.rand(5, 5, 2)),
+    "rand_rect": lambda: (torch.rand(1, 55, 3), torch.rand(1, 3, 44)),
+    "rand_batch10": lambda: (torch.rand(10, 1, 10), torch.rand(10, 10, 5)),
+    "rand_neg": lambda: (
+        -10 * torch.randn(2, 32, 64),
+        5 + 5 * torch.randn(2, 64, 32),
+    ),
+}
+
+
+@common.parametrize("test_data", a16w8_bmm_test_parameters)
+@common.XfailIfNoCorstone300
+def test_bmm_a16w8_u55_INT(test_data: input_t1):
+    """U55 does not support bmm with INT16 inputs.
+
+    Verify bmm is rejected.
+
+    """
+    pipeline = OpNotSupportedPipeline[input_t1](
+        BMM(),
+        test_data(),
+        non_delegated_ops={exir_op_bmm: 1},
+        n_expected_delegates=0,
+        u55_subset=True,
+        quantize=True,
+        tosa_extensions=["int16"],
+    )
+    pipeline.quantizer.set_global(get_symmetric_a16w8_quantization_config())
+    pipeline.run()
+
+
+@common.parametrize("test_data", a16w8_bmm_test_parameters)
+@common.XfailIfNoCorstone320
+def test_bmm_a16w8_u85_INT(test_data: input_t1):
+    pipeline = EthosU85PipelineINT[input_t1](
+        BMM(),
+        test_data(),
+        aten_op_bmm,
+        exir_op_bmm,
+        a16w8_quantization=True,
+        symmetric_io_quantization=True,
+        qtol=1,
+        epsilon=2**-16,
+    )
+    pipeline.run()
diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl
@@ -42,6 +42,7 @@ def define_arm_tests():
         "ops/test_var.py",
         "ops/test_conv1d.py",
         "ops/test_gelu.py",
+        "ops/test_bmm.py",
     ]
 
     # Quantization

Original file line number	Diff line number	Diff line change
`@@ -42,6 +42,7 @@ def define_arm_tests():`
`42`	`42`	`"ops/test_var.py",`
`43`	`43`	`"ops/test_conv1d.py",`
`44`	`44`	`"ops/test_gelu.py",`
	`45`	`+ "ops/test_bmm.py",`
`45`	`46`	`]`
`46`	`47`
`47`	`48`	`# Quantization`