Fix sign-extension bug in fbgemm MX4 Python reference dequantize (#5706)

purvisa-at-meta · facebook-github-bot · commit 0de13f3ec6c8 · 2026-04-28T12:51:22.000-07:00
Summary: X-link: facebookresearch/FBGEMM#2643 py_dequantize_mx4 viewed packed data as int8 and subtracted FP32_EXP_BIAS directly. For biased exponents >= 128, the int8 value is negative, producing incorrect results. Fix by viewing as uint8 then casting to int32 before subtracting the bias. Same class of bug as D101680517 and the Triton kernel fix. GH PR: #5706 Reviewed By: q10 Differential Revision: D102195911
diff --git a/fbgemm_gpu/fbgemm_gpu/triton/quantize_ref.py b/fbgemm_gpu/fbgemm_gpu/triton/quantize_ref.py
@@ -254,9 +254,10 @@ def py_dequantize_mx4(
     num_groups = a.numel() // ((group_size // 2) + 1)
     packed_input = a[:, :-1]
     shared_exp = a[:, -1:]
-    # Remove fp32 exponent bias
+    # Remove fp32 exponent bias.
+    # View as uint8 first to avoid sign-extension for biased exponents >= 128.
     FP32_EXP_BIAS = 127
-    shared_exp = shared_exp - FP32_EXP_BIAS
+    shared_exp = shared_exp.view(torch.uint8).to(torch.int32) - FP32_EXP_BIAS
     # First pull shared exponent off the end of each row.
     M, K_2 = packed_input.shape
 
diff --git a/fbgemm_gpu/test/quantize/mx4_test.py b/fbgemm_gpu/test/quantize/mx4_test.py
@@ -460,6 +460,32 @@ def test_mx4_high_bit_scale_exponent_triton(self) -> None:
                 msg=f"Triton dequantize failed for magnitude={magnitude}",
             )
 
+    def test_mx4_high_bit_scale_exponent_ref(self) -> None:
+        """Regression test: py_dequantize_mx4 sign-extension for scale exponents >= 128.
+
+        The Python reference dequantize viewed packed data as int8 and
+        subtracted FP32_EXP_BIAS directly, causing sign-extension for biased
+        exponents >= 128.
+        """
+        group_size = 32
+
+        for magnitude in [2.0, 4.0, 64.0, 1024.0]:
+            input_tensor = torch.full([1, group_size], magnitude, dtype=torch.float32)
+
+            quantized = py_quantize_mx4(
+                input_tensor, group_size, rounding_mode=RoundingMode.nearest
+            )
+            output_ref = py_dequantize_mx4(quantized, group_size)
+            output_ref = output_ref.reshape(input_tensor.shape)
+
+            torch.testing.assert_close(
+                input_tensor,
+                output_ref,
+                rtol=0.0,
+                atol=0.0,
+                msg=f"py_dequantize_mx4 failed for magnitude={magnitude}",
+            )
+
 
 if __name__ == "__main__":
     unittest.main()