Arm backend: Annotate maximum/minimum ops w. independent observers (pytorch#18009)

AdrianLundell · web-flow · commit 495eec768a92 · 2026-03-09T19:58:43.000+01:00
Previous shared observers meant that inputs and outputs were all
quantized within the same range. In cases where the output is heavily
truncated, this left a lot of the output range unused, leading to
unnecessarily poor accuracy.

Signed-off-by: Adrian Lundell &lt;adrian.lundell@arm.com&gt;
diff --git a/backends/arm/_passes/insert_rescales_pass.py b/backends/arm/_passes/insert_rescales_pass.py
@@ -200,15 +200,23 @@ def _get_output_qparams(
 
         if target in [
             exir_ops.edge.aten.abs.default,
-            exir_ops.edge.aten.maximum.default,
-            exir_ops.edge.aten.minimum.default,
             exir_ops.edge.aten.sum.dim_IntList,
             exir_ops.edge.aten.add.Tensor,
             exir_ops.edge.aten.sub.Tensor,
         ]:
             # The op has not altered the scale; the output scale is equal to
             # the operands' scales.
             return self._int32_qargs(inputs_qparams[0].get_scale_per_tensor())
+        elif target in [
+            exir_ops.edge.aten.maximum.default,
+            exir_ops.edge.aten.minimum.default,
+        ]:
+            # Min/Max use a shared INT32 accumulator scale for inputs, then
+            # rescale to the original output activation scale.
+            min_scale = min(
+                [qp.get_scale_per_tensor() for qp in inputs_qparams.values()]
+            )
+            return self._int32_qargs(min_scale)
         elif target in [
             exir_ops.edge.aten.eq.Tensor,
             exir_ops.edge.aten.ge.Tensor,
diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py
@@ -666,16 +666,11 @@ def any_or_hardtanh_min_zero(n: Node):
         torch.ops.aten.minimum.default,
         torch.ops.aten.maximum.default,
     ):
-        lhs_node = ensure_type(Node, node.args[0])
-        shared_qspec = SharedQuantizationSpec((lhs_node, node))
         quant_properties.quant_inputs = [
             _QuantProperty(0, input_act_qspec),
-            _QuantProperty(
-                1,
-                input_act_qspec if node.args[0] == node.args[1] else shared_qspec,
-            ),
+            _QuantProperty(1, input_act_qspec),
         ]
-        quant_properties.quant_output = _QuantProperty(0, shared_qspec)
+        quant_properties.quant_output = _QuantProperty(0, output_act_qspec)
     elif node.target in (torch.ops.aten.where.self,):
         true_node = ensure_type(Node, node.args[1])
         input_qspec = (
diff --git a/backends/arm/test/misc/test_shared_qspecs.py b/backends/arm/test/misc/test_shared_qspecs.py
@@ -126,10 +126,12 @@ class SharedQspecInputForkNonShared(torch.nn.Module):
     outputs_qspecs = {None: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
-            (0.01959827, -26, -128, 127, torch.int8): 4,
+            (0.015678614, -64, -128, 127, torch.int8): 3,
+            (0.015678614, 0, -128, 127, torch.int8): 1,
         },
         "quantized_decomposed.quantize_per_tensor.default": {
-            (0.01959827, -26, -128, 127, torch.int8): 4,
+            (0.015678614, -64, -128, 127, torch.int8): 3,
+            (0.015678614, 0, -128, 127, torch.int8): 1,
         },
     }
 
@@ -151,10 +153,12 @@ class SharedQspecInputForkShared(torch.nn.Module):
     outputs_qspecs = {None: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
-            (0.01959827, -26, -128, 127, torch.int8): 5,
+            (0.015678614, -64, -128, 127, torch.int8): 2,
+            (0.015678614, 0, -128, 127, torch.int8): 3,
         },
         "quantized_decomposed.quantize_per_tensor.default": {
-            (0.01959827, -26, -128, 127, torch.int8): 5,
+            (0.015678614, -64, -128, 127, torch.int8): 2,
+            (0.015678614, 0, -128, 127, torch.int8): 3,
         },
     }
 
@@ -178,10 +182,12 @@ class SharedQspecInputForkXShared(torch.nn.Module):
     outputs_qspecs = {None: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
-            (0.01959827, -26, -128, 127, torch.int8): 4,
+            (0.015678614, -64, -128, 127, torch.int8): 2,
+            (0.015678614, 0, -128, 127, torch.int8): 2,
         },
         "quantized_decomposed.quantize_per_tensor.default": {
-            (0.01959827, -26, -128, 127, torch.int8): 4,
+            (0.015678614, -64, -128, 127, torch.int8): 2,
+            (0.015678614, 0, -128, 127, torch.int8): 2,
         },
     }
 
@@ -204,10 +210,12 @@ class SharedQspecInputForkYShared(torch.nn.Module):
     outputs_qspecs = {None: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
-            (0.01959827, -26, -128, 127, torch.int8): 5,
+            (0.015678614, -64, -128, 127, torch.int8): 2,
+            (0.015678614, 0, -128, 127, torch.int8): 3,
         },
         "quantized_decomposed.quantize_per_tensor.default": {
-            (0.01959827, -26, -128, 127, torch.int8): 5,
+            (0.015678614, -64, -128, 127, torch.int8): 2,
+            (0.015678614, 0, -128, 127, torch.int8): 3,
         },
     }
 
@@ -230,10 +238,11 @@ class SharedQspecInputForkXConstant(torch.nn.Module):
     outputs_qspecs = {None: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
-            (0.027437577, -55, -128, 127, torch.int8): 3,
+            (0.015678614, 0, -128, 127, torch.int8): 2,
+            (0.019607844, -128, -128, 127, torch.int8): 1,
         },
         "quantized_decomposed.quantize_per_tensor.default": {
-            (0.027437577, -55, -128, 127, torch.int8): 2,
+            (0.015678614, 0, -128, 127, torch.int8): 2,
         },
     }
     constant = torch.tensor(5.0)
@@ -255,10 +264,12 @@ class SharedQspecInputForkYConstant(torch.nn.Module):
     outputs_qspecs = {None: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
-            (0.027437577, -55, -128, 127, torch.int8): 3,
+            (0.015678614, 0, -128, 127, torch.int8): 1,
+            (0.019607844, -128, -128, 127, torch.int8): 2,
         },
         "quantized_decomposed.quantize_per_tensor.default": {
-            (0.027437577, -55, -128, 127, torch.int8): 2,
+            (0.015678614, 0, -128, 127, torch.int8): 1,
+            (0.019607844, -128, -128, 127, torch.int8): 1,
         },
     }
 
@@ -365,10 +376,14 @@ class SharedQspecSurroundedQuantizedOp(torch.nn.Module):
     outputs_qspecs = {None: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
-            (1.019109964, 123, -128, 127, torch.int8): 5,
+            (0.509554982, 123, -128, 127, torch.int8): 3,
+            (0.517394304, 119, -128, 127, torch.int8): 1,
+            (1.019109964, 123, -128, 127, torch.int8): 1,
         },
         "quantized_decomposed.quantize_per_tensor.default": {
-            (1.019109964, 123, -128, 127, torch.int8): 4,
+            (0.509554982, 123, -128, 127, torch.int8): 2,
+            (0.517394304, 119, -128, 127, torch.int8): 1,
+            (1.019109964, 123, -128, 127, torch.int8): 1,
         },
     }
 
@@ -393,11 +408,13 @@ class SharedQspecSurroundedQuantizedOpConstant(torch.nn.Module):
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.003921569, -128, -128, 127, torch.int8): 1,
-            (0.01959827, -26, -128, 127, torch.int8): 5,
+            (0.015678614, -64, -128, 127, torch.int8): 2,
+            (0.015678614, 0, -128, 127, torch.int8): 3,
         },
         "quantized_decomposed.quantize_per_tensor.default": {
             (0.003921569, -128, -128, 127, torch.int8): 1,
-            (0.01959827, -26, -128, 127, torch.int8): 4,
+            (0.015678614, -64, -128, 127, torch.int8): 2,
+            (0.015678614, 0, -128, 127, torch.int8): 2,
         },
     }
 
@@ -532,11 +549,13 @@ class MixedMaximumInt8Int16(torch.nn.Module):
     output_qspecs = {None: 1}
     quant_params = {
         "quantized_decomposed.quantize_per_tensor.default": {
-            (0.015678614, 0, -128, 127, torch.int8): 4,
+            (0.007839307, -128, -128, 127, torch.int8): 2,
+            (0.015678614, 0, -128, 127, torch.int8): 2,
             (0.000244141, 0, -32767, 32767, torch.int16): 2,
         },
         "quantized_decomposed.dequantize_per_tensor.default": {
-            (0.015678614, 0, -128, 127, torch.int8): 4,
+            (0.007839307, -128, -128, 127, torch.int8): 2,
+            (0.015678614, 0, -128, 127, torch.int8): 2,
             (0.000244141, 0, -32767, 32767, torch.int16): 2,
         },
     }