NXP backend: Test max_pool2d with new Neutron flow. (pytorch#19272)

MartinPavella · web-flow · commit 3a381b308521 · 2026-05-11T09:27:21.000+02:00
### Summary Reflect the requirements of the new Neutron MLIR flow for the `max_pool2d` operator in NXP backend. ### Test plan Unit tests provided. cc @robert-kalmar @JakeStevens @digantdesai
diff --git a/backends/nxp/backend/edge_helper.py b/backends/nxp/backend/edge_helper.py
@@ -415,14 +415,13 @@ def input_quantization_type(
     return dequantize_input_val.dtype
 
 
-def output_quantization_type(
-    node: Node, output_index: int | None = None
-) -> torch.dtype | None:
+def output_quantization_type(node: Node, output_index: int) -> torch.dtype | None:
     """Return the quantization output datatype of the QDQ quantized `node`.
 
     :param node: The compute node.
     :param output_index: If the `node` has multiple outputs and therefore multiple `getitem` nodes follow it, the
-                          index selects the output.
+                          index selects the output. If no `getitem` nodes follow it, the operator
+                          produces only 1 output (most common case), and the value `0` must be used.
     :return: The output quantization datatype of the QDQ quantized `node`, or `None` if the graph does not follow the
               QDQ pattern or some metadata is incomplete or an invalid input index is given.
 
@@ -441,11 +440,13 @@ def output_quantization_type(
                                             │ <returned type>
     """
     users = list(node.users)
-    if len(users) == 1:
-        if not _is_quantize(quantize_node := users[0]):
+    if len(users) == 1 and _is_quantize(quantize_node := users[0]):
+        # Basic QDQ case.
+        if output_index != 0:
+            # There is only 1 output. Cannot access non-zero index.
             return None
 
-    else:  # Multiple users
+    else:  # Only `getitem` nodes should follow.
         if not isinstance(output_index, int):
             return None  # Invalid index.
         if not all(user.target == operator.getitem for user in users):
diff --git a/backends/nxp/backend/ir/converter/node_converter.py b/backends/nxp/backend/ir/converter/node_converter.py
@@ -325,6 +325,7 @@ def uses_quantization_type_for_inputs(
         :param node: The compute node.
         :param supported_types: List of supported quantization types.
         :param input_indices: List of indices into the `node.args`, or tuples of 2 indices into `node.args[idx1][idx2]`.
+                               If empty, no type checking is performed and `True` is returned.
         :return: True, if the `node` is QDQ quantized and has quantization input types in `supported_types`.
         """
         return all(
@@ -336,40 +337,42 @@ def uses_quantization_type_for_inputs(
     def uses_quantization_type_for_outputs(
         node: Node,
         supported_types: list[torch.dtype],
-        output_indices: list[int] | None = None,
+        output_indices: list[int],
     ):
         """Check if `node` uses the QDQ quantization schema and outputs on the provided indices use a quantization type
             that is in `supported_types`.
 
         :param node: The compute node.
         :param supported_types: List of supported quantization types.
         :param output_indices: If the `node` has multiple outputs and therefore multiple `getitem` nodes follow it, the
-                                indices select the outputs to be checked.
+                                indices select the outputs to be checked. If no `getitem` nodes follow it, the operator
+                                produces only 1 output (most common case), and the value `[0]` must be used.
+                                If empty, no type checking is performed and `True` is returned.
         :return: True, if the `node` is QDQ quantized and has quantization output types in `supported_types`.
         """
-        if output_indices is None:
-            return output_quantization_type(node) in supported_types
-        else:
-            return all(
-                output_quantization_type(node, output_index) in supported_types
-                for output_index in output_indices
-            )
+        return all(
+            output_quantization_type(node, output_index) in supported_types
+            for output_index in output_indices
+        )
 
     @staticmethod
     def uses_quantization_type_for_io(
         node: Node,
         supported_types: list[torch.dtype],
         input_indices: list[int | tuple[int, int]],
-        output_indices: list[int] | None = None,
+        output_indices: list[int],
     ):
         """Check if `node` uses the QDQ quantization schema and inputs and outputs on the provided indices use a
             quantization type that is in `supported_types`.
 
         :param node: The compute node.
         :param supported_types: List of supported quantization types.
         :param input_indices: List of indices into the `node.args`, or tuples of 2 indices into `node.args[idx1][idx2]`.
+                               If empty, no input type checking is performed.
         :param output_indices: If the `node` has multiple outputs and therefore multiple `getitem` nodes follow it, the
-                                indices select the outputs to be checked.
+                                indices select the outputs to be checked. If no `getitem` nodes follow it, the operator
+                                produces only 1 output (most common case), and the value `[0]` must be used.
+                                If empty, no output type checking is performed.
         :return: True, if the `node` is QDQ quantized and has quantization input types in `supported_types`.
         """
         return NodeConverter.uses_quantization_type_for_inputs(
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/avg_pool_2d_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/avg_pool_2d_converter.py
@@ -71,7 +71,7 @@ def _is_supported_on_target(
 
             supported_types = [torch.int8, torch.uint8]
             if not NodeConverter.uses_quantization_type_for_io(
-                node, supported_types, [0]
+                node, supported_types, [0], [0]
             ):
                 return False
 
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py
@@ -6,6 +6,7 @@
 import operator
 
 import numpy as np
+import torch
 
 from executorch.backends.nxp.backend.edge_helper import try_get_arg
 from executorch.backends.nxp.backend.ir.converter.conversion import (
@@ -73,32 +74,54 @@ def _is_supported_on_target(
             MaxPool2DWithIndicesConverter._get_node_args(node)
         )
 
-        output_shape = node.meta["val"][0].shape  # Shape of the main output (index 0)
-        if output_shape[0] != 1:
-            # /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106
-            return False
-
-        # Neutron only has a restriction on `stride_h`. `stride_w` is not restricted.
-        stride_h = stride[0]
-        if stride_h not in (1, 2):
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#901
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#923
-            return False
-
-        channels = output_shape[1]
-        if channels % neutron_target_spec.get_num_macs() != 0:
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#903
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#925
-            return False
-
-        if any(pad > kernel_dim for pad, kernel_dim in zip(padding, kernel_size)):
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#904-907
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#926-929
-
-            # Cannot be tested as PyTorch crashes in this case. It requires the padding to be at most half of the
-            #  effective kernel size, which is an even stricter requirement than what Neutron imposes.
-            # https://github.com/pytorch/pytorch/blob/449b1768410104d3ed79d3bcfe4ba1d65c7f22c0/torch/_meta_registrations.py#L4483-L4489
-            return False
+        if custom_delegation_options.use_new_flow_neutron_c:
+            # Requirements specified by the new Neutron flow documentation.
+
+            supported_types = [torch.int8, torch.uint8]
+            if not NodeConverter.uses_quantization_type_for_io(
+                node, supported_types, [0], [0]
+            ):
+                return False
+
+            maximum_supported_kernel_size = 4096
+            # If there is no padding, Neutron allows maximum stride of 4096. Otherwise, it's 32. But the converter
+            #  always inserts a `Pad` operator to add the padding, so the `MaxPool` never pads it's input itself, so
+            #  4096 is always the limit. And similarly, the `MaxPool` input padding limitation does not apply either.
+            maximum_supported_stride = 4096
+
+            if any(k > maximum_supported_kernel_size for k in kernel_size):
+                return False
+            if any(s > maximum_supported_stride for s in stride):
+                return False
+
+        else:
+            # Shape of the main output (index 0)
+            output_shape = node.meta["val"][0].shape
+            if output_shape[0] != 1:
+                # /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106
+                return False
+
+            # Neutron only has a restriction on `stride_h`. `stride_w` is not restricted.
+            stride_h = stride[0]
+            if stride_h not in (1, 2):
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#901
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#923
+                return False
+
+            channels = output_shape[1]
+            if channels % neutron_target_spec.get_num_macs() != 0:
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#903
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#925
+                return False
+
+            if any(pad > kernel_dim for pad, kernel_dim in zip(padding, kernel_size)):
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#904-907
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#926-929
+
+                # Cannot be tested as PyTorch crashes in this case. It requires the padding to be at most half of the
+                #  effective kernel size, which is an even stricter requirement than what Neutron imposes.
+                # https://github.com/pytorch/pytorch/blob/449b1768410104d3ed79d3bcfe4ba1d65c7f22c0/torch/_meta_registrations.py#L4483-L4489
+                return False
 
         return True
 
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py
@@ -28,7 +28,10 @@
     ToNCHWPreprocess,
     ToNHWCPreprocess,
 )
-from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier
+from executorch.backends.nxp.tests.graph_verifier import (
+    BaseGraphVerifier,
+    NonDelegatedNode,
+)
 from executorch.backends.nxp.tests.models import AvgPool2dConvModule, AvgPool2dModule
 
 from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
@@ -370,3 +373,23 @@ def test__stride_limit_exceeded(self):
             delegated_ep.graph, [ExecutorchDelegateCall]
         )
         assert graph_contains_any_of_ops(delegated_ep.graph, [AvgPool2D])
+
+
+class TestAvgPool1DNewNeutronFlow:
+
+    # Just a basic test to verify that the operator gets extended to the 2D variant correctly.
+    def test__basic_nsys_inference__view_not_delegated(self):
+        input_shape = (2, 4, 6)  # The old flow limited the batch size to 1.
+        model = AvgPool1DModule()
+        graph_verifier = BaseGraphVerifier(
+            exp_num_delegate_call_nodes=1,  # Delegated AvgPool.
+            exp_non_delegated_nodes=[
+                NonDelegatedNode(
+                    "aten_view_copy_default", 2
+                )  # Non delegated due to shape requirements.
+            ],
+        )
+
+        lower_run_compare(
+            model, input_shape, graph_verifier, use_new_flow_neutron_c=True
+        )
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py
@@ -6,7 +6,6 @@
 import operator
 
 import numpy as np
-import pytest
 import torch
 
 from executorch.backends.nxp.backend.edge_program_converter import (
@@ -19,7 +18,13 @@
     ToChannelFirstPreprocess,
     ToChannelLastPreprocess,
 )
+from executorch.backends.nxp.tests.graph_verifier import (
+    BaseGraphVerifier,
+    NonDelegatedNode,
+)
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
 from executorch.backends.nxp.tests.use_qat import *  # noqa F403
+import pytest
 
 # noinspection PyProtectedMember
 from executorch.exir.dialects._ops import ops as exir_ops
@@ -47,7 +52,7 @@ def forward(self, x):
 
 
 class MaxPool2dModule(torch.nn.Module):
-    def __init__(self, kernel_size=3, **kwargs):
+    def __init__(self, kernel_size: int | tuple[int, ...] = 3, **kwargs):
         super().__init__()
         self.max_pool2d = torch.nn.MaxPool2d(kernel_size, **kwargs)
 
@@ -250,3 +255,124 @@ def test_max_pool_2d__from_1d(self, mocker):
             tflite_input_preprocess=ToChannelLastPreprocess(),
             tflite_output_preprocess=ToChannelFirstPreprocess(),
         )
+
+
+class TestMaxPool2DNewNeutronFlow:
+    # noinspection PyMethodMayBeStatic
+    def assert_delegated(self, model, input_shape):
+        graph_verifier = BaseGraphVerifier(
+            exp_num_delegate_call_nodes=1,  # Delegated MaxPool.
+            exp_non_delegated_nodes=[],
+        )
+
+        lower_run_compare(
+            model, input_shape, graph_verifier, use_new_flow_neutron_c=True
+        )
+
+    # noinspection PyMethodMayBeStatic
+    def assert_not_delegated(self, model, input_shape):
+        delegated_ep = to_quantized_edge_program(
+            model, input_shape, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        # Make sure the `max_pool2d` was NOT delegated.
+        assert not graph_contains_any_of_ops(
+            delegated_ep.graph, [ExecutorchDelegateCall]
+        )
+        assert graph_contains_any_of_ops(delegated_ep.graph, [MaxPool2D])
+
+    def test__basic_nsys_inference(self):
+        input_shape = (2, 4, 6, 7)  # The old flow limited the batch size to 1.
+        model = MaxPool2dModule()
+        self.assert_delegated(model, input_shape)
+
+    def test__kernel_size_limit(self):
+        kernel_size = (1, 4096)
+        input_shape = (1, 4) + kernel_size
+        model = MaxPool2dModule(kernel_size)
+        self.assert_delegated(model, input_shape)
+
+    def test__kernel_size_limit_exceeded(self):
+        kernel_size = (1, 4097)  # Exceeds the kernel size limit.
+        input_shape = (1, 4) + kernel_size
+        model = MaxPool2dModule(kernel_size)
+        self.assert_not_delegated(model, input_shape)
+
+    def test__stride_limit__no_padding(self):
+        stride = 4096
+        input_shape = (1, 4, 1, 4096)
+        model = MaxPool2dModule(1, stride=stride)
+        self.assert_delegated(model, input_shape)
+
+    def test__stride_limit_exceeded__no_padding(self):
+        stride = 4097  # Exceeds the stride limit.
+        input_shape = (1, 4, 1, 4096)
+        model = MaxPool2dModule(1, stride=stride)
+        self.assert_not_delegated(model, input_shape)
+
+    def test__stride_limit__padding(self):
+        padding = 1
+        stride = 4096
+        input_shape = (1, 2, 3, stride)
+        model = MaxPool2dModule(3, stride=stride, padding=padding)
+        self.assert_delegated(model, input_shape)
+
+    def test__stride_limit_exceeded__padding(self):
+        padding = 1
+        stride = 4097  # Exceeds the stride limit.
+        input_shape = (1, 2, 3, stride)
+        model = MaxPool2dModule(3, stride=stride, padding=padding)
+        self.assert_not_delegated(model, input_shape)
+
+    @pytest.mark.skip(
+        reason="Large padding requires large kernel size which results in an extremely slow test."
+    )
+    def test__padding_limit(self):
+        # As the padding is added wia a `Pad` operator (not the `MaxPool` arguments), there is no limit to the padded
+        #  value. But as padding can be at most half of the kernel size (PyTorch requirement) and kernel size is limited
+        #  to 4096, padding of 2048 is the limit.
+        padding = 2048
+        kernel_size = padding * 2
+        input_shape = (1, 1, 2, 3)
+        model = MaxPool2dModule(kernel_size, padding=padding)
+        self.assert_delegated(model, input_shape)
+
+    def test__padding__max_pool_limit_exceeded(self):
+        # NeutronIR `MaxPool` padding is limited to 32. But as it is added by the `Pad` operator instead, there is no
+        #  limit. This tests ensures the `MaxPool` padding limit is not a problem.
+        padding = 33
+        kernel_size = padding * 2
+        input_shape = (1, 2, 3, 4)
+        model = MaxPool2dModule(kernel_size, padding=padding)
+        self.assert_delegated(model, input_shape)
+
+    def test__padding_to_kernel_ratio_exceeded(self):
+        # Both PyTorch and Neutron require the padding to be at most half of the kernel size.
+        kernel_size = 3
+        padding = 2  # More than half of the kernel size.
+        input_shape = (1, 2, 3, 4)
+        model = MaxPool2dModule(kernel_size, padding=padding)
+        with pytest.raises(
+            RuntimeError, match="pad should be at most half of effective kernel size"
+        ):
+            to_quantized_edge_program(model, input_shape, use_new_flow_neutron_c=True)
+
+
+class TestMaxPool1DNewNeutronFlow:
+
+    # Just a basic test to verify that the operator gets extended to the 2D variant correctly.
+    def test__basic_nsys_inference__view_not_delegated(self):
+        input_shape = (2, 4, 6)  # The old flow limited the batch size to 1.
+        model = MaxPool1DModule()
+        graph_verifier = BaseGraphVerifier(
+            exp_num_delegate_call_nodes=1,  # Delegated MaxPool.
+            exp_non_delegated_nodes=[
+                NonDelegatedNode(
+                    "aten_view_copy_default", 2
+                )  # Non delegated due to shape requirements.
+            ],
+        )
+
+        lower_run_compare(
+            model, input_shape, graph_verifier, use_new_flow_neutron_c=True
+        )