apple · eyupcanakman · Jun 26, 2026
diff --git a/coreai_torch/_custom_to_core.py b/coreai_torch/_custom_to_core.py
@@ -295,11 +295,11 @@ def _replace_quantize_or_dequantize(
         quant_elem_type = input_type.element_type  # dequantize: quant→float
         float_elem_type = result_elem_type
 
-    # Extract axis; normalize negative axis the same way the C++ lowering does.
+    # Extract axis; normalize a negative axis the same way the eager op does.
     axis_val = _get_optional_int_arg(node, axis_idx, default=0)
     input_rank = len(input_type.shape)
     if axis_val < 0:
-        axis_val = axis_val + input_rank - 1
+        axis_val = axis_val + input_rank
 
     axis = coreai.constant(np.array(axis_val, dtype=np.int32), loc=loc)
 

diff --git a/tests/ops/test_custom_ops.py b/tests/ops/test_custom_ops.py
@@ -411,6 +411,28 @@ def forward(self, x: Tensor) -> Tensor:
             prepare_program=inject_subbyte_tensors,
         )
 
+    async def test_per_channel_negative_axis_numerical(self) -> None:
+        """quantize with a per-channel scale on a negative axis matches eager."""
+
+        class Model(nn.Module):
+            def __init__(self) -> None:
+                super().__init__()
+                self.register_buffer(
+                    "scale", torch.tensor([0.1, 0.2, 0.3, 0.4], dtype=torch.float32)
+                )
+                self.register_buffer("zero_point", torch.zeros(4, dtype=torch.int8))
+
+            def forward(self, x: Tensor) -> Tensor:
+                return torch.ops.coreai.quantize(
+                    x, self.scale, torch.int8, zero_point=self.zero_point, axis=-1
+                )
+
+        model = Model()
+        x = torch.randn(2, 4, 4)
+        await validate_numerical_output(
+            model=model, x=x, prepare_program=inject_subbyte_tensors
+        )
+
 
 # ---------------------------------------------------------------------------
 # dequantize → coreai.dequantize
@@ -540,6 +562,28 @@ def forward(self, x: Tensor) -> Tensor:
             prepare_program=inject_subbyte_tensors,
         )
 
+    async def test_per_channel_negative_axis_numerical(self) -> None:
+        """dequantize with a per-channel scale on a negative axis matches eager."""
+
+        class Model(nn.Module):
+            def __init__(self) -> None:
+                super().__init__()
+                self.register_buffer(
+                    "scale", torch.tensor([0.1, 0.2, 0.3, 0.4], dtype=torch.float32)
+                )
+                self.register_buffer("zero_point", torch.zeros(4, dtype=torch.int8))
+
+            def forward(self, x: Tensor) -> Tensor:
+                return torch.ops.coreai.dequantize(
+                    x, self.scale, zero_point=self.zero_point, axis=-1
+                )
+
+        model = Model()
+        x = torch.randint(-128, 127, (2, 4, 4), dtype=torch.int8)
+        await validate_numerical_output(
+            model=model, x=x, prepare_program=inject_subbyte_tensors
+        )
+
 
 # ---------------------------------------------------------------------------
 # sparse_to_dense → coreai.build_sparse_with_bitmask + coreai.sparse_with_bitmask_to_dense