NXP backend: Add post-quantization data utilization to aot_neutron_compile.py. (#17479)

MartinPavella · web-flow · commit bd6a75d2a449 · 2026-02-20T11:30:38.000+01:00
### Summary A recent PR added the option to use the post-quantization state dict to access static data during quantization. This PR adds this feature to the `aot_neutron_compile.py`. ### Test plan Unit-test with the example MobileNetV2 is provided. cc @robert-kalmar @JakeStevens @digantdesai
diff --git a/backends/nxp/tests/test_aot_example.py b/backends/nxp/tests/test_aot_example.py
@@ -0,0 +1,99 @@
+# Copyright 2026 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import subprocess
+import sys
+from pathlib import Path
+
+# noinspection PyProtectedMember
+from executorch.exir._serialize import _deserialize_pte_binary
+from executorch.exir.schema import DelegateCall, KernelCall
+
+
+def test_aot_example__mobilenet_v2():
+    """Test that mobilenet can be lowered to Neutron backend via `aot_neutron_compile.py` and all ops are delegated."""
+
+    # Find the executorch root directory (4 levels up from this test file)
+    executorch_root = Path(__file__).parent.parent.parent.parent
+    assert executorch_root.exists(), f"Executorch root not found at {executorch_root}"
+
+    # Run the compilation script as a module (like run_aot_example.sh does)
+    cmd = [
+        sys.executable,
+        "-m",
+        "examples.nxp.aot_neutron_compile",
+        "--model_name",
+        "mobilenetv2",
+        "--delegate",
+        "--quantize",
+        "--target",
+        "imxrt700",
+        "--neutron_converter_flavor",
+        "SDK_25_12",
+        "--use_random_dataset",  # Avoid downloading the dataset.
+    ]
+
+    # Output file will be created in executorch_root
+    pte_file = executorch_root / "mobilenetv2_nxp_delegate.pte"
+
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=300,  # 5 minute timeout just in case. On my machine, the test usually runs ~1 minute.
+            cwd=str(
+                executorch_root
+            ),  # Run from executorch root (like run_aot_example.sh)
+        )
+
+        # Check script ran successfully
+        assert result.returncode == 0, (
+            f"Script failed with return code {result.returncode}\n"
+            f"STDOUT:\n{result.stdout}\n"
+            f"STDERR:\n{result.stderr}"
+        )
+
+        # Expected .pte file path
+        assert pte_file.exists(), f"PTE file not created at {pte_file}"
+
+        # Load and inspect the program to verify delegation
+        with open(pte_file, "rb") as f:
+            pte_data = f.read()
+
+        program = _deserialize_pte_binary(pte_data).program
+
+        # 1 execution plan (forward).
+        assert len(program.execution_plan) == 1
+        assert (forward := program.execution_plan[0]).name == "forward"
+
+        # The program only does: Quantize -> Delegate call -> Dequantize
+        assert len(ops := forward.operators) == 2  # Quantize and Dequantize
+        assert len(forward.chains) == 1
+        assert len(instructions := forward.chains[0].instructions) == 3
+        # Quantize (Can only check by string. There is no object.)
+        assert isinstance(instructions[0].instr_args, KernelCall)
+        assert (
+            instructions[0].instr_args.op_index == (q_idx := 0)
+            and ops[q_idx].name == "quantized_decomposed::quantize_per_tensor"
+        )
+        # Delegate call
+        assert isinstance(instructions[1].instr_args, DelegateCall)
+        assert len(forward.delegates) == 1
+        assert (
+            instructions[1].instr_args.delegate_index == 0
+            and forward.delegates[0].id == "NeutronBackend"
+        )
+        # Dequantize (Can only check by string. There is no object.)
+        assert isinstance(instructions[2].instr_args, KernelCall)
+        assert (
+            instructions[2].instr_args.op_index == (dq_idx := 1)
+            and ops[dq_idx].name == "quantized_decomposed::dequantize_per_tensor"
+        )
+
+    finally:
+        # Clean up the generated file
+        if pte_file.exists():
+            pte_file.unlink()
diff --git a/examples/nxp/aot_neutron_compile.py b/examples/nxp/aot_neutron_compile.py
@@ -85,7 +85,7 @@ def print_ops_in_edge_program(edge_program):
         print(f"{op: <50} {count}x")
 
 
-def get_model_and_inputs_from_name(model_name: str):
+def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
     """Given the name of an example pytorch model, return it, example inputs and calibration inputs (can be None)
 
     Raises RuntimeError if there is no example model corresponding to the given name.
@@ -94,7 +94,15 @@ def get_model_and_inputs_from_name(model_name: str):
     calibration_inputs = None
     # Case 1: Model is defined in this file
     if model_name in models.keys():
-        m = models[model_name]()
+        if use_random_dataset:
+            if model_name != "mobilenetv2":
+                raise NotImplementedError(
+                    f"Random dataset for model {model_name} is not implemented."
+                )
+            m = models[model_name](use_random_dataset=use_random_dataset)
+        else:
+            m = models[model_name]()
+
         model = m.get_eager_model()
         example_inputs = m.get_example_inputs()
         calibration_inputs = m.get_calibration_inputs(64)
@@ -214,6 +222,13 @@ def get_model_and_inputs_from_name(model_name: str):
         help="The model (including the Neutron backend) will use the channels last dim order, which can result in faster "
         "inference. The inputs must also be provided in the channels last dim order.",
     )
+    parser.add_argument(
+        "--use_random_dataset",
+        required=False,
+        default=False,
+        action="store_true",
+        help="The calibration and testing datasets will be generated randomly instead of being downloaded.",
+    )
 
     args = parser.parse_args()
 
@@ -226,7 +241,7 @@ def get_model_and_inputs_from_name(model_name: str):
 
     # 1. pick model from one of the supported lists
     model, example_inputs, calibration_inputs = get_model_and_inputs_from_name(
-        args.model_name
+        args.model_name, args.use_random_dataset
     )
     model = model.eval()
 
@@ -300,7 +315,15 @@ def get_model_and_inputs_from_name(model_name: str):
         neutron_converter_flavor=args.neutron_converter_flavor,
     )
     partitioners = (
-        [NeutronPartitioner(compile_spec, neutron_target_spec)] if args.delegate else []
+        [
+            NeutronPartitioner(
+                compile_spec,
+                neutron_target_spec,
+                post_quantization_state_dict=module.state_dict(),
+            )
+        ]
+        if args.delegate
+        else []
     )
 
     edge_program_manager = to_edge_transform_and_lower(
diff --git a/examples/nxp/models/mobilenet_v2.py b/examples/nxp/models/mobilenet_v2.py
@@ -16,6 +16,10 @@
 
 class MobilenetV2(MV2Model):
 
+    def __init__(self, use_random_dataset: bool = False):
+        super().__init__()
+        self.use_random_dataset = use_random_dataset
+
     def get_calibration_inputs(
         self, batch_size: int = 1
     ) -> Iterator[tuple[torch.Tensor]]:
@@ -40,27 +44,44 @@ def get_calibration_inputs(
         return itertools.islice(dataloader_iterable, batch_count)
 
     def get_dataset(self, batch_size):
-        # Define data transformations
-        data_transforms = transforms.Compose(
-            [
-                transforms.Resize((224, 224)),
-                transforms.ToTensor(),
-                transforms.Normalize(
-                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
-                ),  # ImageNet stats
-            ]
-        )
-
-        dataset = torchvision.datasets.Imagenette(
-            root="./data", split="val", transform=data_transforms, download=True
-        )
-        dataloader = torch.utils.data.DataLoader(
-            dataset,
-            batch_size=batch_size,
-            shuffle=False,
-            num_workers=1,
-        )
-        return dataloader
+        if self.use_random_dataset:
+            # Create random data matching the expected format (224x224 RGB images, normalized)
+            num_samples = 10
+            random_data = torch.randn(num_samples, 3, 224, 224)
+            random_labels = torch.randint(
+                0, 10, (num_samples,)
+            )  # 10 classes in Imagenette
+
+            dataset = torch.utils.data.TensorDataset(random_data, random_labels)
+            return torch.utils.data.DataLoader(
+                dataset,
+                batch_size=batch_size,
+                shuffle=False,
+                num_workers=0,  # Use 0 to avoid multiprocessing issues in tests
+            )
+
+        else:
+            # Define data transformations
+            data_transforms = transforms.Compose(
+                [
+                    transforms.Resize((224, 224)),
+                    transforms.ToTensor(),
+                    transforms.Normalize(
+                        mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+                    ),  # ImageNet stats
+                ]
+            )
+
+            dataset = torchvision.datasets.Imagenette(
+                root="./data", split="val", transform=data_transforms, download=True
+            )
+            dataloader = torch.utils.data.DataLoader(
+                dataset,
+                batch_size=batch_size,
+                shuffle=False,
+                num_workers=1,
+            )
+            return dataloader
 
 
 def gather_samples_per_class_from_dataloader(