Skip to content

Commit 3a381b3

Browse files
NXP backend: Test max_pool2d with new Neutron flow. (pytorch#19272)
### Summary Reflect the requirements of the new Neutron MLIR flow for the `max_pool2d` operator in NXP backend. ### Test plan Unit tests provided. cc @robert-kalmar @JakeStevens @digantdesai
1 parent c71823c commit 3a381b3

6 files changed

Lines changed: 224 additions & 48 deletions

File tree

backends/nxp/backend/edge_helper.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -415,14 +415,13 @@ def input_quantization_type(
415415
return dequantize_input_val.dtype
416416

417417

418-
def output_quantization_type(
419-
node: Node, output_index: int | None = None
420-
) -> torch.dtype | None:
418+
def output_quantization_type(node: Node, output_index: int) -> torch.dtype | None:
421419
"""Return the quantization output datatype of the QDQ quantized `node`.
422420
423421
:param node: The compute node.
424422
:param output_index: If the `node` has multiple outputs and therefore multiple `getitem` nodes follow it, the
425-
index selects the output.
423+
index selects the output. If no `getitem` nodes follow it, the operator
424+
produces only 1 output (most common case), and the value `0` must be used.
426425
:return: The output quantization datatype of the QDQ quantized `node`, or `None` if the graph does not follow the
427426
QDQ pattern or some metadata is incomplete or an invalid input index is given.
428427
@@ -441,11 +440,13 @@ def output_quantization_type(
441440
│ <returned type>
442441
"""
443442
users = list(node.users)
444-
if len(users) == 1:
445-
if not _is_quantize(quantize_node := users[0]):
443+
if len(users) == 1 and _is_quantize(quantize_node := users[0]):
444+
# Basic QDQ case.
445+
if output_index != 0:
446+
# There is only 1 output. Cannot access non-zero index.
446447
return None
447448

448-
else: # Multiple users
449+
else: # Only `getitem` nodes should follow.
449450
if not isinstance(output_index, int):
450451
return None # Invalid index.
451452
if not all(user.target == operator.getitem for user in users):

backends/nxp/backend/ir/converter/node_converter.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ def uses_quantization_type_for_inputs(
325325
:param node: The compute node.
326326
:param supported_types: List of supported quantization types.
327327
:param input_indices: List of indices into the `node.args`, or tuples of 2 indices into `node.args[idx1][idx2]`.
328+
If empty, no type checking is performed and `True` is returned.
328329
:return: True, if the `node` is QDQ quantized and has quantization input types in `supported_types`.
329330
"""
330331
return all(
@@ -336,40 +337,42 @@ def uses_quantization_type_for_inputs(
336337
def uses_quantization_type_for_outputs(
337338
node: Node,
338339
supported_types: list[torch.dtype],
339-
output_indices: list[int] | None = None,
340+
output_indices: list[int],
340341
):
341342
"""Check if `node` uses the QDQ quantization schema and outputs on the provided indices use a quantization type
342343
that is in `supported_types`.
343344
344345
:param node: The compute node.
345346
:param supported_types: List of supported quantization types.
346347
:param output_indices: If the `node` has multiple outputs and therefore multiple `getitem` nodes follow it, the
347-
indices select the outputs to be checked.
348+
indices select the outputs to be checked. If no `getitem` nodes follow it, the operator
349+
produces only 1 output (most common case), and the value `[0]` must be used.
350+
If empty, no type checking is performed and `True` is returned.
348351
:return: True, if the `node` is QDQ quantized and has quantization output types in `supported_types`.
349352
"""
350-
if output_indices is None:
351-
return output_quantization_type(node) in supported_types
352-
else:
353-
return all(
354-
output_quantization_type(node, output_index) in supported_types
355-
for output_index in output_indices
356-
)
353+
return all(
354+
output_quantization_type(node, output_index) in supported_types
355+
for output_index in output_indices
356+
)
357357

358358
@staticmethod
359359
def uses_quantization_type_for_io(
360360
node: Node,
361361
supported_types: list[torch.dtype],
362362
input_indices: list[int | tuple[int, int]],
363-
output_indices: list[int] | None = None,
363+
output_indices: list[int],
364364
):
365365
"""Check if `node` uses the QDQ quantization schema and inputs and outputs on the provided indices use a
366366
quantization type that is in `supported_types`.
367367
368368
:param node: The compute node.
369369
:param supported_types: List of supported quantization types.
370370
:param input_indices: List of indices into the `node.args`, or tuples of 2 indices into `node.args[idx1][idx2]`.
371+
If empty, no input type checking is performed.
371372
:param output_indices: If the `node` has multiple outputs and therefore multiple `getitem` nodes follow it, the
372-
indices select the outputs to be checked.
373+
indices select the outputs to be checked. If no `getitem` nodes follow it, the operator
374+
produces only 1 output (most common case), and the value `[0]` must be used.
375+
If empty, no output type checking is performed.
373376
:return: True, if the `node` is QDQ quantized and has quantization input types in `supported_types`.
374377
"""
375378
return NodeConverter.uses_quantization_type_for_inputs(

backends/nxp/backend/ir/converter/node_converters/ops_converters/avg_pool_2d_converter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def _is_supported_on_target(
7171

7272
supported_types = [torch.int8, torch.uint8]
7373
if not NodeConverter.uses_quantization_type_for_io(
74-
node, supported_types, [0]
74+
node, supported_types, [0], [0]
7575
):
7676
return False
7777

backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py

Lines changed: 49 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import operator
77

88
import numpy as np
9+
import torch
910

1011
from executorch.backends.nxp.backend.edge_helper import try_get_arg
1112
from executorch.backends.nxp.backend.ir.converter.conversion import (
@@ -73,32 +74,54 @@ def _is_supported_on_target(
7374
MaxPool2DWithIndicesConverter._get_node_args(node)
7475
)
7576

76-
output_shape = node.meta["val"][0].shape # Shape of the main output (index 0)
77-
if output_shape[0] != 1:
78-
# /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106
79-
return False
80-
81-
# Neutron only has a restriction on `stride_h`. `stride_w` is not restricted.
82-
stride_h = stride[0]
83-
if stride_h not in (1, 2):
84-
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#901
85-
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#923
86-
return False
87-
88-
channels = output_shape[1]
89-
if channels % neutron_target_spec.get_num_macs() != 0:
90-
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#903
91-
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#925
92-
return False
93-
94-
if any(pad > kernel_dim for pad, kernel_dim in zip(padding, kernel_size)):
95-
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#904-907
96-
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#926-929
97-
98-
# Cannot be tested as PyTorch crashes in this case. It requires the padding to be at most half of the
99-
# effective kernel size, which is an even stricter requirement than what Neutron imposes.
100-
# https://github.com/pytorch/pytorch/blob/449b1768410104d3ed79d3bcfe4ba1d65c7f22c0/torch/_meta_registrations.py#L4483-L4489
101-
return False
77+
if custom_delegation_options.use_new_flow_neutron_c:
78+
# Requirements specified by the new Neutron flow documentation.
79+
80+
supported_types = [torch.int8, torch.uint8]
81+
if not NodeConverter.uses_quantization_type_for_io(
82+
node, supported_types, [0], [0]
83+
):
84+
return False
85+
86+
maximum_supported_kernel_size = 4096
87+
# If there is no padding, Neutron allows maximum stride of 4096. Otherwise, it's 32. But the converter
88+
# always inserts a `Pad` operator to add the padding, so the `MaxPool` never pads it's input itself, so
89+
# 4096 is always the limit. And similarly, the `MaxPool` input padding limitation does not apply either.
90+
maximum_supported_stride = 4096
91+
92+
if any(k > maximum_supported_kernel_size for k in kernel_size):
93+
return False
94+
if any(s > maximum_supported_stride for s in stride):
95+
return False
96+
97+
else:
98+
# Shape of the main output (index 0)
99+
output_shape = node.meta["val"][0].shape
100+
if output_shape[0] != 1:
101+
# /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106
102+
return False
103+
104+
# Neutron only has a restriction on `stride_h`. `stride_w` is not restricted.
105+
stride_h = stride[0]
106+
if stride_h not in (1, 2):
107+
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#901
108+
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#923
109+
return False
110+
111+
channels = output_shape[1]
112+
if channels % neutron_target_spec.get_num_macs() != 0:
113+
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#903
114+
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#925
115+
return False
116+
117+
if any(pad > kernel_dim for pad, kernel_dim in zip(padding, kernel_size)):
118+
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#904-907
119+
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#926-929
120+
121+
# Cannot be tested as PyTorch crashes in this case. It requires the padding to be at most half of the
122+
# effective kernel size, which is an even stricter requirement than what Neutron imposes.
123+
# https://github.com/pytorch/pytorch/blob/449b1768410104d3ed79d3bcfe4ba1d65c7f22c0/torch/_meta_registrations.py#L4483-L4489
124+
return False
102125

103126
return True
104127

backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@
2828
ToNCHWPreprocess,
2929
ToNHWCPreprocess,
3030
)
31-
from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier
31+
from executorch.backends.nxp.tests.graph_verifier import (
32+
BaseGraphVerifier,
33+
NonDelegatedNode,
34+
)
3235
from executorch.backends.nxp.tests.models import AvgPool2dConvModule, AvgPool2dModule
3336

3437
from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
@@ -370,3 +373,23 @@ def test__stride_limit_exceeded(self):
370373
delegated_ep.graph, [ExecutorchDelegateCall]
371374
)
372375
assert graph_contains_any_of_ops(delegated_ep.graph, [AvgPool2D])
376+
377+
378+
class TestAvgPool1DNewNeutronFlow:
379+
380+
# Just a basic test to verify that the operator gets extended to the 2D variant correctly.
381+
def test__basic_nsys_inference__view_not_delegated(self):
382+
input_shape = (2, 4, 6) # The old flow limited the batch size to 1.
383+
model = AvgPool1DModule()
384+
graph_verifier = BaseGraphVerifier(
385+
exp_num_delegate_call_nodes=1, # Delegated AvgPool.
386+
exp_non_delegated_nodes=[
387+
NonDelegatedNode(
388+
"aten_view_copy_default", 2
389+
) # Non delegated due to shape requirements.
390+
],
391+
)
392+
393+
lower_run_compare(
394+
model, input_shape, graph_verifier, use_new_flow_neutron_c=True
395+
)

backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py

Lines changed: 128 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import operator
77

88
import numpy as np
9-
import pytest
109
import torch
1110

1211
from executorch.backends.nxp.backend.edge_program_converter import (
@@ -19,7 +18,13 @@
1918
ToChannelFirstPreprocess,
2019
ToChannelLastPreprocess,
2120
)
21+
from executorch.backends.nxp.tests.graph_verifier import (
22+
BaseGraphVerifier,
23+
NonDelegatedNode,
24+
)
25+
from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
2226
from executorch.backends.nxp.tests.use_qat import * # noqa F403
27+
import pytest
2328

2429
# noinspection PyProtectedMember
2530
from executorch.exir.dialects._ops import ops as exir_ops
@@ -47,7 +52,7 @@ def forward(self, x):
4752

4853

4954
class MaxPool2dModule(torch.nn.Module):
50-
def __init__(self, kernel_size=3, **kwargs):
55+
def __init__(self, kernel_size: int | tuple[int, ...] = 3, **kwargs):
5156
super().__init__()
5257
self.max_pool2d = torch.nn.MaxPool2d(kernel_size, **kwargs)
5358

@@ -250,3 +255,124 @@ def test_max_pool_2d__from_1d(self, mocker):
250255
tflite_input_preprocess=ToChannelLastPreprocess(),
251256
tflite_output_preprocess=ToChannelFirstPreprocess(),
252257
)
258+
259+
260+
class TestMaxPool2DNewNeutronFlow:
261+
# noinspection PyMethodMayBeStatic
262+
def assert_delegated(self, model, input_shape):
263+
graph_verifier = BaseGraphVerifier(
264+
exp_num_delegate_call_nodes=1, # Delegated MaxPool.
265+
exp_non_delegated_nodes=[],
266+
)
267+
268+
lower_run_compare(
269+
model, input_shape, graph_verifier, use_new_flow_neutron_c=True
270+
)
271+
272+
# noinspection PyMethodMayBeStatic
273+
def assert_not_delegated(self, model, input_shape):
274+
delegated_ep = to_quantized_edge_program(
275+
model, input_shape, use_new_flow_neutron_c=True
276+
).exported_program()
277+
278+
# Make sure the `max_pool2d` was NOT delegated.
279+
assert not graph_contains_any_of_ops(
280+
delegated_ep.graph, [ExecutorchDelegateCall]
281+
)
282+
assert graph_contains_any_of_ops(delegated_ep.graph, [MaxPool2D])
283+
284+
def test__basic_nsys_inference(self):
285+
input_shape = (2, 4, 6, 7) # The old flow limited the batch size to 1.
286+
model = MaxPool2dModule()
287+
self.assert_delegated(model, input_shape)
288+
289+
def test__kernel_size_limit(self):
290+
kernel_size = (1, 4096)
291+
input_shape = (1, 4) + kernel_size
292+
model = MaxPool2dModule(kernel_size)
293+
self.assert_delegated(model, input_shape)
294+
295+
def test__kernel_size_limit_exceeded(self):
296+
kernel_size = (1, 4097) # Exceeds the kernel size limit.
297+
input_shape = (1, 4) + kernel_size
298+
model = MaxPool2dModule(kernel_size)
299+
self.assert_not_delegated(model, input_shape)
300+
301+
def test__stride_limit__no_padding(self):
302+
stride = 4096
303+
input_shape = (1, 4, 1, 4096)
304+
model = MaxPool2dModule(1, stride=stride)
305+
self.assert_delegated(model, input_shape)
306+
307+
def test__stride_limit_exceeded__no_padding(self):
308+
stride = 4097 # Exceeds the stride limit.
309+
input_shape = (1, 4, 1, 4096)
310+
model = MaxPool2dModule(1, stride=stride)
311+
self.assert_not_delegated(model, input_shape)
312+
313+
def test__stride_limit__padding(self):
314+
padding = 1
315+
stride = 4096
316+
input_shape = (1, 2, 3, stride)
317+
model = MaxPool2dModule(3, stride=stride, padding=padding)
318+
self.assert_delegated(model, input_shape)
319+
320+
def test__stride_limit_exceeded__padding(self):
321+
padding = 1
322+
stride = 4097 # Exceeds the stride limit.
323+
input_shape = (1, 2, 3, stride)
324+
model = MaxPool2dModule(3, stride=stride, padding=padding)
325+
self.assert_not_delegated(model, input_shape)
326+
327+
@pytest.mark.skip(
328+
reason="Large padding requires large kernel size which results in an extremely slow test."
329+
)
330+
def test__padding_limit(self):
331+
# As the padding is added wia a `Pad` operator (not the `MaxPool` arguments), there is no limit to the padded
332+
# value. But as padding can be at most half of the kernel size (PyTorch requirement) and kernel size is limited
333+
# to 4096, padding of 2048 is the limit.
334+
padding = 2048
335+
kernel_size = padding * 2
336+
input_shape = (1, 1, 2, 3)
337+
model = MaxPool2dModule(kernel_size, padding=padding)
338+
self.assert_delegated(model, input_shape)
339+
340+
def test__padding__max_pool_limit_exceeded(self):
341+
# NeutronIR `MaxPool` padding is limited to 32. But as it is added by the `Pad` operator instead, there is no
342+
# limit. This tests ensures the `MaxPool` padding limit is not a problem.
343+
padding = 33
344+
kernel_size = padding * 2
345+
input_shape = (1, 2, 3, 4)
346+
model = MaxPool2dModule(kernel_size, padding=padding)
347+
self.assert_delegated(model, input_shape)
348+
349+
def test__padding_to_kernel_ratio_exceeded(self):
350+
# Both PyTorch and Neutron require the padding to be at most half of the kernel size.
351+
kernel_size = 3
352+
padding = 2 # More than half of the kernel size.
353+
input_shape = (1, 2, 3, 4)
354+
model = MaxPool2dModule(kernel_size, padding=padding)
355+
with pytest.raises(
356+
RuntimeError, match="pad should be at most half of effective kernel size"
357+
):
358+
to_quantized_edge_program(model, input_shape, use_new_flow_neutron_c=True)
359+
360+
361+
class TestMaxPool1DNewNeutronFlow:
362+
363+
# Just a basic test to verify that the operator gets extended to the 2D variant correctly.
364+
def test__basic_nsys_inference__view_not_delegated(self):
365+
input_shape = (2, 4, 6) # The old flow limited the batch size to 1.
366+
model = MaxPool1DModule()
367+
graph_verifier = BaseGraphVerifier(
368+
exp_num_delegate_call_nodes=1, # Delegated MaxPool.
369+
exp_non_delegated_nodes=[
370+
NonDelegatedNode(
371+
"aten_view_copy_default", 2
372+
) # Non delegated due to shape requirements.
373+
],
374+
)
375+
376+
lower_run_compare(
377+
model, input_shape, graph_verifier, use_new_flow_neutron_c=True
378+
)

0 commit comments

Comments
 (0)