pytorch · MartinPavella · May 3, 2026 · May 4, 2026 · novak-vaclav · May 5, 2026
@@ -441,8 +441,10 @@ def output_quantization_type(
                                             │ <returned type>
     """
     users = list(node.users)
-    if len(users) == 1:
+    if output_index is None:
+        # Basic QDQ case (without getitem nodes).
         if not _is_quantize(quantize_node := users[0]):
+            # Broken QDQ schema.
             return None
 
     else:  # Multiple users

@@ -6,6 +6,7 @@
 import operator
 
 import numpy as np
+import torch
 
 from executorch.backends.nxp.backend.edge_helper import try_get_arg
 from executorch.backends.nxp.backend.ir.converter.conversion import (
@@ -73,32 +74,54 @@ def _is_supported_on_target(
             MaxPool2DWithIndicesConverter._get_node_args(node)
         )
 
-        output_shape = node.meta["val"][0].shape  # Shape of the main output (index 0)
-        if output_shape[0] != 1:
-            # /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106
-            return False
-
-        # Neutron only has a restriction on `stride_h`. `stride_w` is not restricted.
-        stride_h = stride[0]
-        if stride_h not in (1, 2):
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#901
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#923
-            return False
-
-        channels = output_shape[1]
-        if channels % neutron_target_spec.get_num_macs() != 0:
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#903
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#925
-            return False
-
-        if any(pad > kernel_dim for pad, kernel_dim in zip(padding, kernel_size)):
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#904-907
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#926-929
-
-            # Cannot be tested as PyTorch crashes in this case. It requires the padding to be at most half of the
-            #  effective kernel size, which is an even stricter requirement than what Neutron imposes.
-            # https://github.com/pytorch/pytorch/blob/449b1768410104d3ed79d3bcfe4ba1d65c7f22c0/torch/_meta_registrations.py#L4483-L4489
-            return False
+        if custom_delegation_options.use_new_flow_neutron_c:
+            # Requirements specified by the new Neutron flow documentation.
+
+            supported_types = [torch.int8, torch.uint8]
+            if not NodeConverter.uses_quantization_type_for_io(
+                node, supported_types, [0], [0]
+            ):
+                return False
+
+            maximum_supported_kernel_size = 4096
+            # If there is no padding, Neutron allows maximum stride of 4096. Otherwise, it's 32. But the converter
+            #  always inserts a `Pad` operator to add the padding, so the `MaxPool` never pads it's input itself, so
+            #  4096 is always the limit. And similarly, the `MaxPool` input padding limitation does not apply either.
+            maximum_supported_stride = 4096
+
+            if any(k > maximum_supported_kernel_size for k in kernel_size):
+                return False
+            if any(s > maximum_supported_stride for s in stride):
+                return False
+
+        else:
+            # Shape of the main output (index 0)
+            output_shape = node.meta["val"][0].shape
+            if output_shape[0] != 1:
+                # /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106
+                return False
+
+            # Neutron only has a restriction on `stride_h`. `stride_w` is not restricted.
+            stride_h = stride[0]
+            if stride_h not in (1, 2):
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#901
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#923
+                return False
+
+            channels = output_shape[1]
+            if channels % neutron_target_spec.get_num_macs() != 0:
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#903
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#925
+                return False
+
+            if any(pad > kernel_dim for pad, kernel_dim in zip(padding, kernel_size)):
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#904-907
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#926-929
+
+                # Cannot be tested as PyTorch crashes in this case. It requires the padding to be at most half of the
+                #  effective kernel size, which is an even stricter requirement than what Neutron imposes.
+                # https://github.com/pytorch/pytorch/blob/449b1768410104d3ed79d3bcfe4ba1d65c7f22c0/torch/_meta_registrations.py#L4483-L4489
+                return False
 
         return True
 

@@ -28,7 +28,10 @@
     ToNCHWPreprocess,
     ToNHWCPreprocess,
 )
-from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier
+from executorch.backends.nxp.tests.graph_verifier import (
+    BaseGraphVerifier,
+    NonDelegatedNode,
+)
 from executorch.backends.nxp.tests.models import AvgPool2dConvModule, AvgPool2dModule
 
 from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
@@ -370,3 +373,23 @@ def test__stride_limit_exceeded(self):
             delegated_ep.graph, [ExecutorchDelegateCall]
         )
         assert graph_contains_any_of_ops(delegated_ep.graph, [AvgPool2D])
+
+
+class TestAvgPool1DNewNeutronFlow:
+
+    # Just a basic test to verify that the operator gets extended to the 2D variant correctly.
+    def test__basic_nsys_inference__view_not_delegated(self):
+        input_shape = (2, 4, 6)  # The old flow limited the batch size to 1.
+        model = AvgPool1DModule()
+        graph_verifier = BaseGraphVerifier(
+            exp_num_delegate_call_nodes=1,  # Delegated AvgPool.
+            exp_non_delegated_nodes=[
+                NonDelegatedNode(
+                    "aten_view_copy_default", 2
+                )  # Non delegated due to shape requirements.
+            ],
+        )
+
+        lower_run_compare(
+            model, input_shape, graph_verifier, use_new_flow_neutron_c=True
+        )
@@ -6,7 +6,6 @@
 import operator
 
 import numpy as np
-import pytest
 import torch
 
 from executorch.backends.nxp.backend.edge_program_converter import (
@@ -19,7 +18,13 @@
     ToChannelFirstPreprocess,
     ToChannelLastPreprocess,
 )
+from executorch.backends.nxp.tests.graph_verifier import (
+    BaseGraphVerifier,
+    NonDelegatedNode,
+)
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
 from executorch.backends.nxp.tests.use_qat import *  # noqa F403
+import pytest
 
 # noinspection PyProtectedMember
 from executorch.exir.dialects._ops import ops as exir_ops
@@ -47,7 +52,7 @@ def forward(self, x):
 
 
 class MaxPool2dModule(torch.nn.Module):
-    def __init__(self, kernel_size=3, **kwargs):
+    def __init__(self, kernel_size: int | tuple[int, ...] = 3, **kwargs):
         super().__init__()
         self.max_pool2d = torch.nn.MaxPool2d(kernel_size, **kwargs)
 
@@ -250,3 +255,124 @@ def test_max_pool_2d__from_1d(self, mocker):
             tflite_input_preprocess=ToChannelLastPreprocess(),
             tflite_output_preprocess=ToChannelFirstPreprocess(),
         )
+
+
+class TestMaxPool2DNewNeutronFlow:
+    # noinspection PyMethodMayBeStatic
+    def assert_delegated(self, model, input_shape):
+        graph_verifier = BaseGraphVerifier(
+            exp_num_delegate_call_nodes=1,  # Delegated MaxPool.
+            exp_non_delegated_nodes=[],
+        )
+
+        lower_run_compare(
+            model, input_shape, graph_verifier, use_new_flow_neutron_c=True
+        )
+
+    # noinspection PyMethodMayBeStatic
+    def assert_not_delegated(self, model, input_shape):
+        delegated_ep = to_quantized_edge_program(
+            model, input_shape, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        # Make sure the `max_pool2d` was NOT delegated.
+        assert not graph_contains_any_of_ops(
+            delegated_ep.graph, [ExecutorchDelegateCall]
+        )
+        assert graph_contains_any_of_ops(delegated_ep.graph, [MaxPool2D])
+
+    def test__basic_nsys_inference(self):
+        input_shape = (2, 4, 6, 7)  # The old flow limited the batch size to 1.
+        model = MaxPool2dModule()
+        self.assert_delegated(model, input_shape)
+
+    def test__kernel_size_limit(self):
+        kernel_size = (1, 4096)
+        input_shape = (1, 4) + kernel_size
+        model = MaxPool2dModule(kernel_size)
+        self.assert_delegated(model, input_shape)
+
+    def test__kernel_size_limit_exceeded(self):
+        kernel_size = (1, 4097)  # Exceeds the kernel size limit.
+        input_shape = (1, 4) + kernel_size
+        model = MaxPool2dModule(kernel_size)
+        self.assert_not_delegated(model, input_shape)
+
+    def test__stride_limit__no_padding(self):
+        stride = 4096
+        input_shape = (1, 4, 1, 4096)
+        model = MaxPool2dModule(1, stride=stride)
+        self.assert_delegated(model, input_shape)
+
+    def test__stride_limit_exceeded__no_padding(self):
+        stride = 4097  # Exceeds the stride limit.
+        input_shape = (1, 4, 1, 4096)
+        model = MaxPool2dModule(1, stride=stride)
+        self.assert_not_delegated(model, input_shape)
+
+    def test__stride_limit__padding(self):
+        padding = 1
+        stride = 4096
+        input_shape = (1, 2, 3, stride)
+        model = MaxPool2dModule(3, stride=stride, padding=padding)
+        self.assert_delegated(model, input_shape)
+
+    def test__stride_limit_exceeded__padding(self):
+        padding = 1
+        stride = 4097  # Exceeds the stride limit.
+        input_shape = (1, 2, 3, stride)
+        model = MaxPool2dModule(3, stride=stride, padding=padding)
+        self.assert_not_delegated(model, input_shape)
+
+    @pytest.mark.skip(
+        reason="Large padding requires large kernel size which results in an extremely slow test."
+    )
+    def test__padding_limit(self):
+        # As the padding is added wia a `Pad` operator (not the `MaxPool` arguments), there is no limit to the padded
+        #  value. But as padding can be at most half of the kernel size (PyTorch requirement) and kernel size is limited
+        #  to 4096, padding of 2048 is the limit.
+        padding = 2048
+        kernel_size = padding * 2
+        input_shape = (1, 1, 2, 3)
+        model = MaxPool2dModule(kernel_size, padding=padding)
+        self.assert_delegated(model, input_shape)
+
+    def test__padding__max_pool_limit_exceeded(self):
+        # NeutronIR `MaxPool` padding is limited to 32. But as it is added by the `Pad` operator instead, there is no
+        #  limit. This tests ensures the `MaxPool` padding limit is not a problem.
+        padding = 33
+        kernel_size = padding * 2
+        input_shape = (1, 2, 3, 4)
+        model = MaxPool2dModule(kernel_size, padding=padding)
+        self.assert_delegated(model, input_shape)
+
+    def test__padding_to_kernel_ratio_exceeded(self):
+        # Both PyTorch and Neutron require the padding to be at most half of the kernel size.
+        kernel_size = 3
+        padding = 2  # More than half of the kernel size.
+        input_shape = (1, 2, 3, 4)
+        model = MaxPool2dModule(kernel_size, padding=padding)
+        with pytest.raises(
+            RuntimeError, match="pad should be at most half of effective kernel size"
+        ):
+            to_quantized_edge_program(model, input_shape, use_new_flow_neutron_c=True)
+
+
+class TestMaxPool1DNewNeutronFlow:
+
+    # Just a basic test to verify that the operator gets extended to the 2D variant correctly.
+    def test__basic_nsys_inference__view_not_delegated(self):
+        input_shape = (2, 4, 6)  # The old flow limited the batch size to 1.
+        model = MaxPool1DModule()
+        graph_verifier = BaseGraphVerifier(
+            exp_num_delegate_call_nodes=1,  # Delegated MaxPool.
+            exp_non_delegated_nodes=[
+                NonDelegatedNode(
+                    "aten_view_copy_default", 2
+                )  # Non delegated due to shape requirements.
+            ],
+        )
+
+        lower_run_compare(
+            model, input_shape, graph_verifier, use_new_flow_neutron_c=True
+        )