From a79f21e518c92874e9af1614d6261cc3625e2aa7 Mon Sep 17 00:00:00 2001
From: gokulkrishna98 <gokulkrishna98@users.noreply.github.com>
Date: Mon, 22 Jun 2026 15:33:14 -0700
Subject: [PATCH 1/6] _aten_to_core: implement aten::atan2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a converter for aten.atan2.default. coreai has no native atan2 op,
so the conversion decomposes it into atan(y/x) with per-quadrant correction:

- x > 0: atan(y/x)
- x < 0, y ≥ 0: atan(y/x) + π
- x < 0, y < 0: atan(y/x) − π
- x = 0, y > 0: π/2
- x = 0, y < 0: −π/2
- x = 0, y = 0: 0

Division by zero when x=0 is guarded with broadcasting_where before the
divide, then the x=0 result is selected in a final where at the end.

Adds numerical tests (shapes 1D/2D/3D, float32/float16, static/dynamic,
axis-aligned edge cases) and IR FileCheck tests (static, dynamic, 1D).
---
 coreai_torch/_aten_to_core.py |  41 ++++++++++++
 tests/ops/test_ops.py         |  65 +++++++++++++++++--
 tests/ops/test_ops_ir.py      | 117 ++++++++++++++++++++++++++++++++++
 3 files changed, 216 insertions(+), 7 deletions(-)

diff --git a/coreai_torch/_aten_to_core.py b/coreai_torch/_aten_to_core.py
index 433e27a..1bea58a 100644
--- a/coreai_torch/_aten_to_core.py
+++ b/coreai_torch/_aten_to_core.py
@@ -1522,6 +1522,46 @@ def replace_argmax(values_map: dict[str, Value], node: fx.Node, loc: Location) -
     return result if keepdim else coreai.shrink_dims(result, [dim])
 
 
+def replace_atan2(values_map: dict[str, Value], node: fx.Node, loc: Location) -> Value:
+    """atan2(y, x) via atan(y/x) with quadrant correction.
+
+    atan2 is undefined for (y=0, x=0); follows the convention atan2(0, 0) = 0.
+    """
+    y, x = _get_operands(values_map, node, [0, 1])
+    ele_type = x.type.element_type
+
+    zero = coreai.constant(0.0, dtype=ele_type)
+    pi = coreai.constant(np.pi, dtype=ele_type)
+    half_pi = coreai.constant(np.pi / 2.0, dtype=ele_type)
+    neg_half_pi = coreai.constant(-np.pi / 2.0, dtype=ele_type)
+
+    # Avoid division by zero when x = 0 by substituting x = 1 for the ratio.
+    x_is_zero = coreai.broadcasting_equal(x, zero)
+    x_safe = coreai.broadcasting_where(
+        x_is_zero, coreai.constant(1.0, dtype=ele_type), x
+    )
+    base = coreai.atan(coreai.broadcasting_divide(y, x_safe))
+
+    # Quadrant correction: x < 0 shifts the result by ±π.
+    x_neg = coreai.broadcasting_greater(zero, x)
+    y_neg = coreai.broadcasting_greater(zero, y)
+    y_pos = coreai.broadcasting_greater(y, zero)
+    correction = coreai.broadcasting_where(
+        y_neg,
+        coreai.broadcasting_sub(base, pi),
+        coreai.broadcasting_add(base, pi),
+    )
+    nonzero_result = coreai.broadcasting_where(x_neg, correction, base)
+
+    # x = 0: result is π/2, −π/2, or 0 based on sign of y.
+    zero_result = coreai.broadcasting_where(
+        y_pos,
+        half_pi,
+        coreai.broadcasting_where(y_neg, neg_half_pi, zero),
+    )
+    return coreai.broadcasting_where(x_is_zero, zero_result, nonzero_result)
+
+
 def replace_gather(values_map: dict[str, Value], node: fx.Node, loc: Location) -> Value:
     """Converts aten.gather to coreai.gather_along_axis."""
     x, index = _get_operands(values_map, node, [0, 2])
@@ -3440,6 +3480,7 @@ def sdpa_maskless(q: Value, k: Value, v: Value) -> Value:
     "asin.default": replace_unary_ops,
     "asinh.default": replace_unary_ops,
     "atan.default": replace_unary_ops,
+    "atan2.default": replace_atan2,
     "atanh.default": replace_unary_ops,
     "_adaptive_avg_pool2d.default": replace_adaptive_avg_pool2d,
     "_unsafe_view.default": replace_view,
diff --git a/tests/ops/test_ops.py b/tests/ops/test_ops.py
index 6e85b7b..8211544 100644
--- a/tests/ops/test_ops.py
+++ b/tests/ops/test_ops.py
@@ -595,13 +595,64 @@ def forward(self, x: Tensor) -> Tensor:
     await validate_numerical_output(model=model, x=x, dynamic_shapes=dynamic_shapes)
 
 
-@pytest.mark.parametrize(
-    "x",
-    [
-        torch.rand(2, 3, 8, 8),
-        torch.rand(2, 3, 8, 8, dtype=torch.float16),  # fp16
-    ],
-)
+class TestAtan2:
+    """Tests for torch.atan2(y, x) — angle from the positive x-axis to the point (x, y)."""
+
+    class Atan2Model(nn.Module):
+        def forward(self, y: Tensor, x: Tensor) -> Tensor:
+            return torch.atan2(y, x)
+
+    @pytest.mark.parametrize("dynamic", [False, True])
+    @pytest.mark.parametrize(
+        "shape",
+        [
+            (4,),
+            (3, 4),
+            (2, 3, 4),
+        ],
+    )
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.float16])
+    async def test_basic(
+        self, shape: tuple[int, ...], dtype: torch.dtype, dynamic: bool
+    ) -> None:
+        model = self.Atan2Model().eval()
+        y = torch.randn(shape, dtype=dtype)
+        x = torch.randn(shape, dtype=dtype)
+        dynamic_shapes = (
+            {"y": _all_dims_dynamic(y), "x": _all_dims_dynamic(x)} if dynamic else None
+        )
+        await validate_numerical_output(
+            model=model, y=y, x=x, dynamic_shapes=dynamic_shapes
+        )
+
+    async def test_x_zero(self) -> None:
+        """x = 0 should yield ±π/2 depending on sign of y."""
+        model = self.Atan2Model().eval()
+        y = torch.tensor([1.0, -1.0, 2.0, -2.0])
+        x = torch.zeros(4)
+        await validate_numerical_output(model=model, y=y, x=x)
+
+    async def test_y_zero(self) -> None:
+        """y = 0 with x > 0 → 0, x < 0 → π."""
+        model = self.Atan2Model().eval()
+        y = torch.zeros(4)
+        x = torch.tensor([1.0, -1.0, 2.0, -2.0])
+        await validate_numerical_output(model=model, y=y, x=x)
+
+    async def test_all_quadrants(self) -> None:
+        """Cover all four quadrants and axes."""
+        model = self.Atan2Model().eval()
+        y = torch.tensor([1.0, 1.0, -1.0, -1.0, 0.0, 0.0, 1.0, -1.0])
+        x = torch.tensor([1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 0.0, 0.0])
+        await validate_numerical_output(model=model, y=y, x=x)
+
+    async def test_broadcast_shapes(self) -> None:
+        model = self.Atan2Model().eval()
+        y = torch.randn(3, 4)
+        x = torch.randn(4)
+        await validate_numerical_output(model=model, y=y, x=x)
+
+
 @pytest.mark.parametrize(
     "dynamic_dims", [tuple(), (0,), (2,), (3,), (0, 2), (0, 3), (0, 2, 3)]
 )
diff --git a/tests/ops/test_ops_ir.py b/tests/ops/test_ops_ir.py
index 50c0786..435e5ee 100644
--- a/tests/ops/test_ops_ir.py
+++ b/tests/ops/test_ops_ir.py
@@ -1132,6 +1132,123 @@ def forward(self, x: Tensor) -> Tensor:
         )
 
 
+class TestAtan2IR:
+    def test_static(self) -> None:
+        class Atan2Model(nn.Module):
+            def forward(self, y: Tensor, x: Tensor) -> Tensor:
+                return torch.atan2(y, x)
+
+        ir = get_ir(Atan2Model().eval(), y=torch.rand(2, 3), x=torch.rand(2, 3))
+        filecheck_pattern(
+            ir,
+            check_file="""
+                // CHECK-LABEL: module {
+                // CHECK-NEXT:   coreai.graph @main(%[[ARG0:.*]]: tensor<2x3xf32> {coreai.name = "y"}, %[[ARG1:.*]]: tensor<2x3xf32> {coreai.name = "x"}) -> (tensor<2x3xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} {
+                // CHECK-NEXT:     %[[C1:.*]] = coreai.constant dense<1.000000e+00> : tensor<f32>
+                // CHECK-NEXT:     %[[C0:.*]] = coreai.constant dense<0.000000e+00> : tensor<f32>
+                // CHECK-NEXT:     %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor<f32>
+                // CHECK-NEXT:     %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor<f32>
+                // CHECK-NEXT:     %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor<f32>
+                // CHECK-NEXT:     %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[ARG1]], %[[C0]] : (tensor<2x3xf32>, tensor<f32>) -> tensor<2x3xi1>
+                // CHECK-NEXT:     %[[X_SAFE:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[C1]], %[[ARG1]] : (tensor<2x3xi1>, tensor<f32>, tensor<2x3xf32>) -> tensor<2x3xf32>
+                // CHECK-NEXT:     %[[RATIO:.*]] = coreai.decomposable.broadcasting_divide %[[ARG0]], %[[X_SAFE]] : (tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32>
+                // CHECK-NEXT:     %[[BASE:.*]] = coreai.atan %[[RATIO]] : tensor<2x3xf32> -> tensor<2x3xf32>
+                // CHECK-NEXT:     %[[X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG1]] : (tensor<f32>, tensor<2x3xf32>) -> tensor<2x3xi1>
+                // CHECK-NEXT:     %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG0]] : (tensor<f32>, tensor<2x3xf32>) -> tensor<2x3xi1>
+                // CHECK-NEXT:     %[[Y_POS:.*]] = coreai.decomposable.broadcasting_greater %[[ARG0]], %[[C0]] : (tensor<2x3xf32>, tensor<f32>) -> tensor<2x3xi1>
+                // CHECK-NEXT:     %[[BASE_MINUS_PI:.*]] = coreai.decomposable.broadcasting_sub %[[BASE]], %[[PI]] : (tensor<2x3xf32>, tensor<f32>) -> tensor<2x3xf32>
+                // CHECK-NEXT:     %[[BASE_PLUS_PI:.*]] = coreai.decomposable.broadcasting_add %[[BASE]], %[[PI]] : (tensor<2x3xf32>, tensor<f32>) -> tensor<2x3xf32>
+                // CHECK-NEXT:     %[[CORRECTION:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[BASE_MINUS_PI]], %[[BASE_PLUS_PI]] : (tensor<2x3xi1>, tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32>
+                // CHECK-NEXT:     %[[NONZERO:.*]] = coreai.decomposable.broadcasting_where %[[X_NEG]], %[[CORRECTION]], %[[BASE]] : (tensor<2x3xi1>, tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32>
+                // CHECK-NEXT:     %[[ZERO_NEG:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[NHPI]], %[[C0]] : (tensor<2x3xi1>, tensor<f32>, tensor<f32>) -> tensor<2x3xf32>
+                // CHECK-NEXT:     %[[ZERO_RES:.*]] = coreai.decomposable.broadcasting_where %[[Y_POS]], %[[HPI]], %[[ZERO_NEG]] : (tensor<2x3xi1>, tensor<f32>, tensor<2x3xf32>) -> tensor<2x3xf32>
+                // CHECK-NEXT:     %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[ZERO_RES]], %[[NONZERO]] : (tensor<2x3xi1>, tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32>
+                // CHECK-NEXT:     coreai.output %[[RESULT]] : tensor<2x3xf32>
+                // CHECK-NEXT:   }
+                // CHECK-NEXT: }
+            """,
+        )
+
+    def test_dynamic(self) -> None:
+        class Atan2Model(nn.Module):
+            def forward(self, y: Tensor, x: Tensor) -> Tensor:
+                return torch.atan2(y, x)
+
+        y = torch.rand(2, 3)
+        x = torch.rand(2, 3)
+        ir = get_ir(
+            Atan2Model().eval(),
+            y=y,
+            x=x,
+            dynamic_shapes={"y": _all_dims_dynamic(y), "x": _all_dims_dynamic(x)},
+        )
+        filecheck_pattern(
+            ir,
+            check_file="""
+                // CHECK-LABEL: module {
+                // CHECK-NEXT:   coreai.graph @main(%[[ARG0:.*]]: tensor<?x?xf32> {coreai.name = "y"}, %[[ARG1:.*]]: tensor<?x?xf32> {coreai.name = "x"}) -> (tensor<?x?xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} {
+                // CHECK-NEXT:     %[[C1:.*]] = coreai.constant dense<1.000000e+00> : tensor<f32>
+                // CHECK-NEXT:     %[[C0:.*]] = coreai.constant dense<0.000000e+00> : tensor<f32>
+                // CHECK-NEXT:     %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor<f32>
+                // CHECK-NEXT:     %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor<f32>
+                // CHECK-NEXT:     %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor<f32>
+                // CHECK-NEXT:     %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[ARG1]], %[[C0]] : (tensor<?x?xf32>, tensor<f32>) -> tensor<?x?xi1>
+                // CHECK-NEXT:     %[[X_SAFE:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[C1]], %[[ARG1]] : (tensor<?x?xi1>, tensor<f32>, tensor<?x?xf32>) -> tensor<?x?xf32>
+                // CHECK-NEXT:     %[[RATIO:.*]] = coreai.decomposable.broadcasting_divide %[[ARG0]], %[[X_SAFE]] : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
+                // CHECK-NEXT:     %[[BASE:.*]] = coreai.atan %[[RATIO]] : tensor<?x?xf32> -> tensor<?x?xf32>
+                // CHECK-NEXT:     %[[X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG1]] : (tensor<f32>, tensor<?x?xf32>) -> tensor<?x?xi1>
+                // CHECK-NEXT:     %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG0]] : (tensor<f32>, tensor<?x?xf32>) -> tensor<?x?xi1>
+                // CHECK-NEXT:     %[[Y_POS:.*]] = coreai.decomposable.broadcasting_greater %[[ARG0]], %[[C0]] : (tensor<?x?xf32>, tensor<f32>) -> tensor<?x?xi1>
+                // CHECK-NEXT:     %[[BASE_MINUS_PI:.*]] = coreai.decomposable.broadcasting_sub %[[BASE]], %[[PI]] : (tensor<?x?xf32>, tensor<f32>) -> tensor<?x?xf32>
+                // CHECK-NEXT:     %[[BASE_PLUS_PI:.*]] = coreai.decomposable.broadcasting_add %[[BASE]], %[[PI]] : (tensor<?x?xf32>, tensor<f32>) -> tensor<?x?xf32>
+                // CHECK-NEXT:     %[[CORRECTION:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[BASE_MINUS_PI]], %[[BASE_PLUS_PI]] : (tensor<?x?xi1>, tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
+                // CHECK-NEXT:     %[[NONZERO:.*]] = coreai.decomposable.broadcasting_where %[[X_NEG]], %[[CORRECTION]], %[[BASE]] : (tensor<?x?xi1>, tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
+                // CHECK-NEXT:     %[[ZERO_NEG:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[NHPI]], %[[C0]] : (tensor<?x?xi1>, tensor<f32>, tensor<f32>) -> tensor<?x?xf32>
+                // CHECK-NEXT:     %[[ZERO_RES:.*]] = coreai.decomposable.broadcasting_where %[[Y_POS]], %[[HPI]], %[[ZERO_NEG]] : (tensor<?x?xi1>, tensor<f32>, tensor<?x?xf32>) -> tensor<?x?xf32>
+                // CHECK-NEXT:     %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[ZERO_RES]], %[[NONZERO]] : (tensor<?x?xi1>, tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
+                // CHECK-NEXT:     coreai.output %[[RESULT]] : tensor<?x?xf32>
+                // CHECK-NEXT:   }
+                // CHECK-NEXT: }
+            """,
+        )
+
+    def test_1d(self) -> None:
+        class Atan2Model(nn.Module):
+            def forward(self, y: Tensor, x: Tensor) -> Tensor:
+                return torch.atan2(y, x)
+
+        ir = get_ir(Atan2Model().eval(), y=torch.rand(4), x=torch.rand(4))
+        filecheck_pattern(
+            ir,
+            check_file="""
+                // CHECK-LABEL: module {
+                // CHECK-NEXT:   coreai.graph @main(%[[ARG0:.*]]: tensor<4xf32> {coreai.name = "y"}, %[[ARG1:.*]]: tensor<4xf32> {coreai.name = "x"}) -> (tensor<4xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} {
+                // CHECK-NEXT:     %[[C1:.*]] = coreai.constant dense<1.000000e+00> : tensor<f32>
+                // CHECK-NEXT:     %[[C0:.*]] = coreai.constant dense<0.000000e+00> : tensor<f32>
+                // CHECK-NEXT:     %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor<f32>
+                // CHECK-NEXT:     %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor<f32>
+                // CHECK-NEXT:     %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor<f32>
+                // CHECK-NEXT:     %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[ARG1]], %[[C0]] : (tensor<4xf32>, tensor<f32>) -> tensor<4xi1>
+                // CHECK-NEXT:     %[[X_SAFE:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[C1]], %[[ARG1]] : (tensor<4xi1>, tensor<f32>, tensor<4xf32>) -> tensor<4xf32>
+                // CHECK-NEXT:     %[[RATIO:.*]] = coreai.decomposable.broadcasting_divide %[[ARG0]], %[[X_SAFE]] : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+                // CHECK-NEXT:     %[[BASE:.*]] = coreai.atan %[[RATIO]] : tensor<4xf32> -> tensor<4xf32>
+                // CHECK-NEXT:     %[[X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG1]] : (tensor<f32>, tensor<4xf32>) -> tensor<4xi1>
+                // CHECK-NEXT:     %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG0]] : (tensor<f32>, tensor<4xf32>) -> tensor<4xi1>
+                // CHECK-NEXT:     %[[Y_POS:.*]] = coreai.decomposable.broadcasting_greater %[[ARG0]], %[[C0]] : (tensor<4xf32>, tensor<f32>) -> tensor<4xi1>
+                // CHECK-NEXT:     %[[BASE_MINUS_PI:.*]] = coreai.decomposable.broadcasting_sub %[[BASE]], %[[PI]] : (tensor<4xf32>, tensor<f32>) -> tensor<4xf32>
+                // CHECK-NEXT:     %[[BASE_PLUS_PI:.*]] = coreai.decomposable.broadcasting_add %[[BASE]], %[[PI]] : (tensor<4xf32>, tensor<f32>) -> tensor<4xf32>
+                // CHECK-NEXT:     %[[CORRECTION:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[BASE_MINUS_PI]], %[[BASE_PLUS_PI]] : (tensor<4xi1>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+                // CHECK-NEXT:     %[[NONZERO:.*]] = coreai.decomposable.broadcasting_where %[[X_NEG]], %[[CORRECTION]], %[[BASE]] : (tensor<4xi1>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+                // CHECK-NEXT:     %[[ZERO_NEG:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[NHPI]], %[[C0]] : (tensor<4xi1>, tensor<f32>, tensor<f32>) -> tensor<4xf32>
+                // CHECK-NEXT:     %[[ZERO_RES:.*]] = coreai.decomposable.broadcasting_where %[[Y_POS]], %[[HPI]], %[[ZERO_NEG]] : (tensor<4xi1>, tensor<f32>, tensor<4xf32>) -> tensor<4xf32>
+                // CHECK-NEXT:     %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[ZERO_RES]], %[[NONZERO]] : (tensor<4xi1>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+                // CHECK-NEXT:     coreai.output %[[RESULT]] : tensor<4xf32>
+                // CHECK-NEXT:   }
+                // CHECK-NEXT: }
+            """,
+        )
+
+
 class TestAvgPool2dIR:
     def test_static(self) -> None:
         class AvgPool2dModel(nn.Module):

From 4b9c442c0a8f686f9f9d822727e14ff873974a3f Mon Sep 17 00:00:00 2001
From: gokulkrishna98 <gokulkrishna98@users.noreply.github.com>
Date: Mon, 22 Jun 2026 15:54:08 -0700
Subject: [PATCH 2/6] _aten_to_core: expand replace_atan2 docstring

---
 coreai_torch/_aten_to_core.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/coreai_torch/_aten_to_core.py b/coreai_torch/_aten_to_core.py
index 1bea58a..467a7c5 100644
--- a/coreai_torch/_aten_to_core.py
+++ b/coreai_torch/_aten_to_core.py
@@ -1523,9 +1523,15 @@ def replace_argmax(values_map: dict[str, Value], node: fx.Node, loc: Location) -
 
 
 def replace_atan2(values_map: dict[str, Value], node: fx.Node, loc: Location) -> Value:
-    """atan2(y, x) via atan(y/x) with quadrant correction.
+    """Lower atan2(y, x) using atan(y/x) with quadrant correction.
 
-    atan2 is undefined for (y=0, x=0); follows the convention atan2(0, 0) = 0.
+    CoreAI has no native atan2, so it is decomposed as:
+      - x != 0: atan(y/x) adjusted by ±π to place the result in the correct quadrant.
+      - x == 0: ±π/2 or 0 based on sign of y.
+
+    When x=0, x is replaced with 1 before the divide solely to avoid NaN/inf; that
+    intermediate result is discarded by the final where-select in favour of the x=0 branch.
+    atan2(0, 0) = 0 by convention.
     """
     y, x = _get_operands(values_map, node, [0, 1])
     ele_type = x.type.element_type

From ba2487443944d9e7b960488734978aea9f3ed62c Mon Sep 17 00:00:00 2001
From: gokulkrishna98 <gokulkrishna98@users.noreply.github.com>
Date: Mon, 22 Jun 2026 15:57:22 -0700
Subject: [PATCH 3/6] tests/ops: restore missing x parametrize for
 test_batchnorm

Accidentally dropped when TestAtan2 was inserted at the same location.
---
 tests/ops/test_ops.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/ops/test_ops.py b/tests/ops/test_ops.py
index 8211544..ff7532b 100644
--- a/tests/ops/test_ops.py
+++ b/tests/ops/test_ops.py
@@ -653,6 +653,13 @@ async def test_broadcast_shapes(self) -> None:
         await validate_numerical_output(model=model, y=y, x=x)
 
 
+@pytest.mark.parametrize(
+    "x",
+    [
+        torch.rand(2, 3, 8, 8),
+        torch.rand(2, 3, 8, 8, dtype=torch.float16),  # fp16
+    ],
+)
 @pytest.mark.parametrize(
     "dynamic_dims", [tuple(), (0,), (2,), (3,), (0, 2), (0, 3), (0, 2, 3)]
 )

From 0dd1ccf39cdd7782a11a4b1585c7d5b512f3b88b Mon Sep 17 00:00:00 2001
From: gokulkrishna98 <gokulkrishna98@users.noreply.github.com>
Date: Mon, 22 Jun 2026 19:27:41 -0700
Subject: [PATCH 4/6] docs: strip notebook outputs (nbstripout)

---
 .../tutorials/construct-a-graph.ipynb         | 171 ++++--------------
 .../tutorials/run-an-aimodel.ipynb            | 131 +++-----------
 2 files changed, 62 insertions(+), 240 deletions(-)

diff --git a/docs/coreai-core/tutorials/construct-a-graph.ipynb b/docs/coreai-core/tutorials/construct-a-graph.ipynb
index a10b05d..86f0acc 100644
--- a/docs/coreai-core/tutorials/construct-a-graph.ipynb
+++ b/docs/coreai-core/tutorials/construct-a-graph.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "intro",
+   "id": "0",
    "metadata": {},
    "source": [
     "# Constructing a CoreAI Graph\n",
@@ -25,7 +25,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "warning",
+   "id": "1",
    "metadata": {},
    "source": [
     ":::{warning}\n",
@@ -39,7 +39,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "setup-md",
+   "id": "2",
    "metadata": {},
    "source": [
     "## Setup\n",
@@ -50,25 +50,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "imports",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:09.232797Z",
-     "iopub.status.busy": "2026-06-04T23:13:09.232330Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.445622Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.444678Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Dev installation detected. Using local Core AI Framework.\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "3",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import shutil\n",
     "from pathlib import Path\n",
@@ -84,7 +69,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "specs-md",
+   "id": "4",
    "metadata": {},
    "source": [
     "## Describe the inputs and outputs\n",
@@ -99,26 +84,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "specs",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.447443Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.447294Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.451834Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.451081Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "input:  tensor<2x3xf32>\n",
-      "output: tensor<2x3xf32>\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "5",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "input_spec = TensorSpec(shape=[2, 3], dtype=np.float32)\n",
     "output_spec = TensorSpec(shape=[2, 3], dtype=np.float32, name=\"y\")\n",
@@ -129,7 +98,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "build-md",
+   "id": "6",
    "metadata": {},
    "source": [
     "## Build the graph\n",
@@ -145,25 +114,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "id": "build",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.453228Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.453111Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.470623Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.470201Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Module verified.\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "7",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "module = Module.create()\n",
     "with module:\n",
@@ -181,7 +135,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "program-md",
+   "id": "8",
    "metadata": {},
    "source": [
     "## Wrap in an `AIProgram`\n",
@@ -192,16 +146,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "program",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.472009Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.471916Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.474216Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.473635Z"
-    }
-   },
+   "execution_count": null,
+   "id": "9",
+   "metadata": {},
    "outputs": [],
    "source": [
     "program = AIProgram(module)"
@@ -209,7 +156,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "save-md",
+   "id": "10",
    "metadata": {},
    "source": [
     "## Save as an `.aimodel`\n",
@@ -222,25 +169,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "save",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.475505Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.475388Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.479489Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.479034Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved to: hello-graph.aimodel\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "11",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "asset_path = Path(\"./hello-graph.aimodel\")\n",
     "if asset_path.exists():\n",
@@ -252,7 +184,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "inspect-md",
+   "id": "12",
    "metadata": {},
    "source": [
     "## Inspect the saved asset\n",
@@ -263,26 +195,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "inspect",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.480788Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.480705Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.483398Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.483007Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "contents:   ['main.hash', 'main.mlirb', 'metadata.json']\n",
-      "total size: 446 bytes\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "13",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "files = sorted(p.name for p in asset_path.iterdir())\n",
     "total_bytes = sum(p.stat().st_size for p in asset_path.rglob(\"*\") if p.is_file())\n",
@@ -293,7 +209,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "validate-md",
+   "id": "14",
    "metadata": {},
    "source": [
     "## Validate that the asset is loadable\n",
@@ -305,25 +221,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "validate",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.484574Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.484499Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.498350Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.497839Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "OK\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "15",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "reloaded = AIModelAsset.load(asset_path)\n",
     "async with reloaded.executable() as model:\n",
diff --git a/docs/coreai-core/tutorials/run-an-aimodel.ipynb b/docs/coreai-core/tutorials/run-an-aimodel.ipynb
index 4d878f9..7e23884 100644
--- a/docs/coreai-core/tutorials/run-an-aimodel.ipynb
+++ b/docs/coreai-core/tutorials/run-an-aimodel.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "intro",
+   "id": "0",
    "metadata": {},
    "source": [
     "# Running an `.aimodel` with `coreai.runtime`\n",
@@ -29,7 +29,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "setup-md",
+   "id": "1",
    "metadata": {},
    "source": [
     "## Setup\n",
@@ -41,25 +41,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "imports",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:09.233244Z",
-     "iopub.status.busy": "2026-06-04T23:13:09.232818Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.445952Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.444672Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Dev installation detected. Using local Core AI Framework.\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "2",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "from pathlib import Path\n",
     "\n",
@@ -72,7 +57,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "ensure-md",
+   "id": "3",
    "metadata": {},
    "source": [
     "### Ensure `hello.aimodel` exists\n",
@@ -84,25 +69,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "ensure",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.447569Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.447413Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.473679Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.473128Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "created hello-run.aimodel\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "4",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "from shutil import rmtree\n",
     "from typing import Annotated\n",
@@ -132,7 +102,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "open-asset-md",
+   "id": "5",
    "metadata": {},
    "source": [
     "## Open the asset\n",
@@ -155,16 +125,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "id": "open-asset",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.475086Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.474967Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.477296Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.476906Z"
-    }
-   },
+   "execution_count": null,
+   "id": "6",
+   "metadata": {},
    "outputs": [],
    "source": [
     "asset = AIModelAsset.load(asset_path)"
@@ -172,7 +135,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "run-md",
+   "id": "7",
    "metadata": {},
    "source": [
     "## Open the model and run inference\n",
@@ -195,32 +158,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "run",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.478555Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.478462Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.505717Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.505274Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "functions: ['main']\n",
-      "name:    main\n",
-      "inputs:  ['x']\n",
-      "outputs: ['y']\n",
-      "input x:\n",
-      "[[1.5 1.5 1.5]\n",
-      " [1.5 1.5 1.5]]\n",
-      "output keys: ['y']\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "8",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "async with asset.executable() as model:\n",
     "    print(f\"functions: {model.function_names}\")\n",
@@ -245,7 +186,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "inspect-md",
+   "id": "9",
    "metadata": {},
    "source": [
     "## Inspect the output\n",
@@ -258,30 +199,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "inspect",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.506908Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.506825Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.509068Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.508740Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "shape: (2, 3)\n",
-      "dtype: float32\n",
-      "value:\n",
-      "[[3. 3. 3.]\n",
-      " [3. 3. 3.]]\n",
-      "OK — inference produced expected output shape and dtype\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "10",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "print(f\"shape: {result.shape}\")\n",
     "print(f\"dtype: {result.dtype}\")\n",
@@ -294,7 +215,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "advanced-md",
+   "id": "11",
    "metadata": {},
    "source": [
     "## What's next\n",

From ab8cdb381eea28b548af1580ba4b26fe7c5e905a Mon Sep 17 00:00:00 2001
From: gokulkrishna98 <gokulkrishna98@users.noreply.github.com>
Date: Mon, 22 Jun 2026 19:55:00 -0700
Subject: [PATCH 5/6] address PR review feedback

- Document IEEE-754 limitations (signed zeros, infinities) in replace_atan2 docstring
- Add (0, 0) case to test_x_zero to lock in atan2(0, 0) = 0 by convention
---
 coreai_torch/_aten_to_core.py | 9 +++++++++
 tests/ops/test_ops.py         | 6 +++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/coreai_torch/_aten_to_core.py b/coreai_torch/_aten_to_core.py
index 467a7c5..7bdcff4 100644
--- a/coreai_torch/_aten_to_core.py
+++ b/coreai_torch/_aten_to_core.py
@@ -1532,6 +1532,15 @@ def replace_atan2(values_map: dict[str, Value], node: fx.Node, loc: Location) ->
     When x=0, x is replaced with 1 before the divide solely to avoid NaN/inf; that
     intermediate result is discarded by the final where-select in favour of the x=0 branch.
     atan2(0, 0) = 0 by convention.
+
+    IEEE-754 limitations:
+      - Signed zeros: ``-0.0`` is treated the same as ``+0.0`` because the
+        ``0 > y`` predicate is false for ``y = -0.0``. Results are numerically
+        equal to PyTorch for finite inputs but the sign bit may differ
+        (e.g. ``atan2(-0.0, -1.0)`` returns ``+π`` here, ``-π`` in PyTorch).
+      - Infinities: ``atan2(±inf, ±inf)`` returns NaN because ``inf/inf``
+        produces NaN before ``atan`` is applied. PyTorch returns ``±π/4``
+        or ``±3π/4`` per IEEE-754. Do not pass infinite inputs to this op.
     """
     y, x = _get_operands(values_map, node, [0, 1])
     ele_type = x.type.element_type
diff --git a/tests/ops/test_ops.py b/tests/ops/test_ops.py
index ff7532b..1c9a370 100644
--- a/tests/ops/test_ops.py
+++ b/tests/ops/test_ops.py
@@ -626,10 +626,10 @@ async def test_basic(
         )
 
     async def test_x_zero(self) -> None:
-        """x = 0 should yield ±π/2 depending on sign of y."""
+        """x = 0 should yield ±π/2 depending on sign of y; (0, 0) → 0 by convention."""
         model = self.Atan2Model().eval()
-        y = torch.tensor([1.0, -1.0, 2.0, -2.0])
-        x = torch.zeros(4)
+        y = torch.tensor([1.0, -1.0, 2.0, -2.0, 0.0])
+        x = torch.zeros(5)
         await validate_numerical_output(model=model, y=y, x=x)
 
     async def test_y_zero(self) -> None:

From 5e2a2d7ddc6f971fd4990fcd48fa29229c411cb2 Mon Sep 17 00:00:00 2001
From: gokulkrishna98 <gokulkrishna98@users.noreply.github.com>
Date: Mon, 22 Jun 2026 20:11:58 -0700
Subject: [PATCH 6/6] _aten_to_core: fix IEEE-754 edge cases in replace_atan2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Signed zeros: use 1/v trick (1/-0.0 = -inf) combined with strict >
  to make y_neg and x_neg correct for -0.0 inputs without misclassifying
  ±inf values which use the strict > path directly
- x = -0.0 branch: split x=0 into +0 (returns ±π/2) and -0 (returns ±π)
  per IEEE-754 atan2 specification
- Both infinite: add explicit branch for atan2(±inf, ±inf) → ±π/4 or
  ±3π/4; previously produced NaN via atan(inf/inf) = atan(NaN)
- Add test_signed_zeros and test_infinities to lock in correct behaviour
- Update IR tests to reflect the expanded op sequence
---
 coreai_torch/_aten_to_core.py | 106 +++++++++++++++++++-------
 tests/ops/test_ops.py         |  16 ++++
 tests/ops/test_ops_ir.py      | 136 +++++++++++++++++++---------------
 3 files changed, 170 insertions(+), 88 deletions(-)

diff --git a/coreai_torch/_aten_to_core.py b/coreai_torch/_aten_to_core.py
index 7bdcff4..93b042c 100644
--- a/coreai_torch/_aten_to_core.py
+++ b/coreai_torch/_aten_to_core.py
@@ -1526,41 +1526,95 @@ def replace_atan2(values_map: dict[str, Value], node: fx.Node, loc: Location) ->
     """Lower atan2(y, x) using atan(y/x) with quadrant correction.
 
     CoreAI has no native atan2, so it is decomposed as:
-      - x != 0: atan(y/x) adjusted by ±π to place the result in the correct quadrant.
-      - x == 0: ±π/2 or 0 based on sign of y.
+      - x != 0, finite: atan(y/x) adjusted by ±π for the correct quadrant.
+      - x == +0: ±π/2 for non-zero y, 0 for y = 0.
+      - x == -0: ±π for all y (including ±0 → ±π per IEEE-754).
+      - both infinite: ±π/4 or ±3π/4 per IEEE-754.
+
+    Signed-zero handling: IEEE-754 treats -0.0 as distinct from +0.0 for atan2
+    (e.g. atan2(-0, -1) = -π, not +π). The 1/v trick — 1/-0.0 = -inf — is used
+    to detect the sign bit of zero inputs so that y_neg and x_neg are correct
+    for -0.0 inputs without misclassifying ±inf (which use the strict > path).
 
     When x=0, x is replaced with 1 before the divide solely to avoid NaN/inf; that
-    intermediate result is discarded by the final where-select in favour of the x=0 branch.
+    intermediate result is discarded by the final where-select.
     atan2(0, 0) = 0 by convention.
-
-    IEEE-754 limitations:
-      - Signed zeros: ``-0.0`` is treated the same as ``+0.0`` because the
-        ``0 > y`` predicate is false for ``y = -0.0``. Results are numerically
-        equal to PyTorch for finite inputs but the sign bit may differ
-        (e.g. ``atan2(-0.0, -1.0)`` returns ``+π`` here, ``-π`` in PyTorch).
-      - Infinities: ``atan2(±inf, ±inf)`` returns NaN because ``inf/inf``
-        produces NaN before ``atan`` is applied. PyTorch returns ``±π/4``
-        or ``±3π/4`` per IEEE-754. Do not pass infinite inputs to this op.
     """
     y, x = _get_operands(values_map, node, [0, 1])
     ele_type = x.type.element_type
 
     zero = coreai.constant(0.0, dtype=ele_type)
+    one = coreai.constant(1.0, dtype=ele_type)
     pi = coreai.constant(np.pi, dtype=ele_type)
+    neg_pi = coreai.constant(-np.pi, dtype=ele_type)
     half_pi = coreai.constant(np.pi / 2.0, dtype=ele_type)
     neg_half_pi = coreai.constant(-np.pi / 2.0, dtype=ele_type)
-
-    # Avoid division by zero when x = 0 by substituting x = 1 for the ratio.
+    quarter_pi = coreai.constant(np.pi / 4.0, dtype=ele_type)
+    neg_quarter_pi = coreai.constant(-np.pi / 4.0, dtype=ele_type)
+    three_quarter_pi = coreai.constant(3.0 * np.pi / 4.0, dtype=ele_type)
+    neg_three_quarter_pi = coreai.constant(-3.0 * np.pi / 4.0, dtype=ele_type)
+
+    # ── signed-zero-aware sign predicates ─────────────────────────────────────
+    # 1 / -0.0 = -inf (IEEE-754), so (0 > 1/v) is True iff v = -0.0. Combine with
+    # the strict > predicate (handles ±inf and non-zero finites) via OR.
+    y_is_zero = coreai.broadcasting_equal(y, zero)
     x_is_zero = coreai.broadcasting_equal(x, zero)
-    x_safe = coreai.broadcasting_where(
-        x_is_zero, coreai.constant(1.0, dtype=ele_type), x
+    y_neg = coreai.broadcasting_or(
+        coreai.broadcasting_greater(zero, y),
+        coreai.broadcasting_and(
+            y_is_zero,
+            coreai.broadcasting_greater(zero, coreai.broadcasting_divide(one, y)),
+        ),
+    )
+    x_neg = coreai.broadcasting_or(
+        coreai.broadcasting_greater(zero, x),
+        coreai.broadcasting_and(
+            x_is_zero,
+            coreai.broadcasting_greater(zero, coreai.broadcasting_divide(one, x)),
+        ),
+    )
+    x_is_neg_zero = coreai.broadcasting_and(
+        x_is_zero,
+        coreai.broadcasting_greater(zero, coreai.broadcasting_divide(one, x)),
+    )
+
+    # ── both-infinite branch ──────────────────────────────────────────────────
+    # atan(inf/inf) = atan(NaN) = NaN; handle before the divide.
+    pos_inf = coreai.constant(float("inf"), dtype=ele_type)
+    neg_inf = coreai.constant(float("-inf"), dtype=ele_type)
+    x_is_inf = coreai.broadcasting_or(
+        coreai.broadcasting_equal(x, pos_inf), coreai.broadcasting_equal(x, neg_inf)
+    )
+    y_is_inf = coreai.broadcasting_or(
+        coreai.broadcasting_equal(y, pos_inf), coreai.broadcasting_equal(y, neg_inf)
+    )
+    both_inf = coreai.broadcasting_and(x_is_inf, y_is_inf)
+    inf_result = coreai.broadcasting_where(
+        y_neg,
+        coreai.broadcasting_where(x_neg, neg_three_quarter_pi, neg_quarter_pi),
+        coreai.broadcasting_where(x_neg, three_quarter_pi, quarter_pi),
     )
-    base = coreai.atan(coreai.broadcasting_divide(y, x_safe))
 
-    # Quadrant correction: x < 0 shifts the result by ±π.
-    x_neg = coreai.broadcasting_greater(zero, x)
-    y_neg = coreai.broadcasting_greater(zero, y)
-    y_pos = coreai.broadcasting_greater(y, zero)
+    # ── x = 0 branch ──────────────────────────────────────────────────────────
+    # x = +0: ±π/2 for strictly ±y, 0 when y = 0.
+    # x = -0: ±π for all y (y_neg covers y = -0.0 via the 1/y trick above).
+    y_pos_strict = coreai.broadcasting_greater(y, zero)
+    y_neg_strict = coreai.broadcasting_greater(zero, y)
+    pos_x_zero_result = coreai.broadcasting_where(
+        y_pos_strict,
+        half_pi,
+        coreai.broadcasting_where(y_neg_strict, neg_half_pi, zero),
+    )
+    neg_x_zero_result = coreai.broadcasting_where(y_neg, neg_pi, pi)
+    zero_result = coreai.broadcasting_where(
+        x_is_neg_zero, neg_x_zero_result, pos_x_zero_result
+    )
+
+    # ── finite nonzero x branch ────────────────────────────────────────────────
+    # Avoid division by zero: substitute x = 1 when x = 0; result discarded by
+    # the outer where-select.
+    x_safe = coreai.broadcasting_where(x_is_zero, one, x)
+    base = coreai.atan(coreai.broadcasting_divide(y, x_safe))
     correction = coreai.broadcasting_where(
         y_neg,
         coreai.broadcasting_sub(base, pi),
@@ -1568,13 +1622,9 @@ def replace_atan2(values_map: dict[str, Value], node: fx.Node, loc: Location) ->
     )
     nonzero_result = coreai.broadcasting_where(x_neg, correction, base)
 
-    # x = 0: result is π/2, −π/2, or 0 based on sign of y.
-    zero_result = coreai.broadcasting_where(
-        y_pos,
-        half_pi,
-        coreai.broadcasting_where(y_neg, neg_half_pi, zero),
-    )
-    return coreai.broadcasting_where(x_is_zero, zero_result, nonzero_result)
+    # ── combine ────────────────────────────────────────────────────────────────
+    result = coreai.broadcasting_where(x_is_zero, zero_result, nonzero_result)
+    return coreai.broadcasting_where(both_inf, inf_result, result)
 
 
 def replace_gather(values_map: dict[str, Value], node: fx.Node, loc: Location) -> Value:
diff --git a/tests/ops/test_ops.py b/tests/ops/test_ops.py
index 1c9a370..b8f5dfb 100644
--- a/tests/ops/test_ops.py
+++ b/tests/ops/test_ops.py
@@ -652,6 +652,22 @@ async def test_broadcast_shapes(self) -> None:
         x = torch.randn(4)
         await validate_numerical_output(model=model, y=y, x=x)
 
+    async def test_signed_zeros(self) -> None:
+        """IEEE-754 signed-zero cases: atan2(-0, x) and atan2(y, -0)."""
+        model = self.Atan2Model().eval()
+        # y = -0.0 with various x signs
+        y = torch.tensor([-0.0, -0.0, -0.0, 0.0])
+        x = torch.tensor([-1.0, 1.0, -0.0, -0.0])
+        await validate_numerical_output(model=model, y=y, x=x)
+
+    async def test_infinities(self) -> None:
+        """IEEE-754 both-infinite cases: atan2(±inf, ±inf) → ±π/4 or ±3π/4."""
+        model = self.Atan2Model().eval()
+        inf = float("inf")
+        y = torch.tensor([inf, inf, -inf, -inf])
+        x = torch.tensor([inf, -inf, inf, -inf])
+        await validate_numerical_output(model=model, y=y, x=x)
+
 
 @pytest.mark.parametrize(
     "x",
diff --git a/tests/ops/test_ops_ir.py b/tests/ops/test_ops_ir.py
index 435e5ee..ce6c7f9 100644
--- a/tests/ops/test_ops_ir.py
+++ b/tests/ops/test_ops_ir.py
@@ -1143,26 +1143,40 @@ def forward(self, y: Tensor, x: Tensor) -> Tensor:
             ir,
             check_file="""
                 // CHECK-LABEL: module {
-                // CHECK-NEXT:   coreai.graph @main(%[[ARG0:.*]]: tensor<2x3xf32> {coreai.name = "y"}, %[[ARG1:.*]]: tensor<2x3xf32> {coreai.name = "x"}) -> (tensor<2x3xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} {
-                // CHECK-NEXT:     %[[C1:.*]] = coreai.constant dense<1.000000e+00> : tensor<f32>
-                // CHECK-NEXT:     %[[C0:.*]] = coreai.constant dense<0.000000e+00> : tensor<f32>
-                // CHECK-NEXT:     %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor<f32>
-                // CHECK-NEXT:     %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor<f32>
-                // CHECK-NEXT:     %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor<f32>
-                // CHECK-NEXT:     %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[ARG1]], %[[C0]] : (tensor<2x3xf32>, tensor<f32>) -> tensor<2x3xi1>
-                // CHECK-NEXT:     %[[X_SAFE:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[C1]], %[[ARG1]] : (tensor<2x3xi1>, tensor<f32>, tensor<2x3xf32>) -> tensor<2x3xf32>
-                // CHECK-NEXT:     %[[RATIO:.*]] = coreai.decomposable.broadcasting_divide %[[ARG0]], %[[X_SAFE]] : (tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32>
-                // CHECK-NEXT:     %[[BASE:.*]] = coreai.atan %[[RATIO]] : tensor<2x3xf32> -> tensor<2x3xf32>
-                // CHECK-NEXT:     %[[X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG1]] : (tensor<f32>, tensor<2x3xf32>) -> tensor<2x3xi1>
-                // CHECK-NEXT:     %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG0]] : (tensor<f32>, tensor<2x3xf32>) -> tensor<2x3xi1>
-                // CHECK-NEXT:     %[[Y_POS:.*]] = coreai.decomposable.broadcasting_greater %[[ARG0]], %[[C0]] : (tensor<2x3xf32>, tensor<f32>) -> tensor<2x3xi1>
-                // CHECK-NEXT:     %[[BASE_MINUS_PI:.*]] = coreai.decomposable.broadcasting_sub %[[BASE]], %[[PI]] : (tensor<2x3xf32>, tensor<f32>) -> tensor<2x3xf32>
-                // CHECK-NEXT:     %[[BASE_PLUS_PI:.*]] = coreai.decomposable.broadcasting_add %[[BASE]], %[[PI]] : (tensor<2x3xf32>, tensor<f32>) -> tensor<2x3xf32>
-                // CHECK-NEXT:     %[[CORRECTION:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[BASE_MINUS_PI]], %[[BASE_PLUS_PI]] : (tensor<2x3xi1>, tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32>
-                // CHECK-NEXT:     %[[NONZERO:.*]] = coreai.decomposable.broadcasting_where %[[X_NEG]], %[[CORRECTION]], %[[BASE]] : (tensor<2x3xi1>, tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32>
-                // CHECK-NEXT:     %[[ZERO_NEG:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[NHPI]], %[[C0]] : (tensor<2x3xi1>, tensor<f32>, tensor<f32>) -> tensor<2x3xf32>
-                // CHECK-NEXT:     %[[ZERO_RES:.*]] = coreai.decomposable.broadcasting_where %[[Y_POS]], %[[HPI]], %[[ZERO_NEG]] : (tensor<2x3xi1>, tensor<f32>, tensor<2x3xf32>) -> tensor<2x3xf32>
-                // CHECK-NEXT:     %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[ZERO_RES]], %[[NONZERO]] : (tensor<2x3xi1>, tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32>
+                // CHECK-NEXT:   coreai.graph @main(%[[Y:.*]]: tensor<2x3xf32> {coreai.name = "y"}, %[[X:.*]]: tensor<2x3xf32> {coreai.name = "x"}) -> (tensor<2x3xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} {
+                // CHECK:          %[[NEG_INF:.*]] = coreai.constant dense<0xFF800000> : tensor<f32>
+                // CHECK:          %[[POS_INF:.*]] = coreai.constant dense<0x7F800000> : tensor<f32>
+                // CHECK:          %[[ZERO:.*]] = coreai.constant dense<0.000000e+00> : tensor<f32>
+                // CHECK:          %[[ONE:.*]] = coreai.constant dense<1.000000e+00> : tensor<f32>
+                // CHECK:          %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor<f32>
+                // CHECK:          %[[NEG_PI:.*]] = coreai.constant dense<-3.14159274> : tensor<f32>
+                // CHECK:          %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor<f32>
+                // CHECK:          %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor<f32>
+                // CHECK:          %[[QPI:.*]] = coreai.constant dense<0.785398185> : tensor<f32>
+                // CHECK:          %[[NQPI:.*]] = coreai.constant dense<-0.785398185> : tensor<f32>
+                // CHECK:          %[[THREEQPI:.*]] = coreai.constant dense<2.3561945> : tensor<f32>
+                // CHECK:          %[[NTHREEQPI:.*]] = coreai.constant dense<-2.3561945> : tensor<f32>
+                // CHECK:          %[[Y_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[ZERO]]
+                // CHECK:          %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[ZERO]]
+                // CHECK:          %[[Y_NEG_STRICT:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[Y]]
+                // CHECK:          %[[RECIP_Y:.*]] = coreai.decomposable.broadcasting_divide %[[ONE]], %[[Y]]
+                // CHECK:          %[[RECIP_Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[RECIP_Y]]
+                // CHECK:          %[[Y_ZERO_NEG:.*]] = coreai.decomposable.broadcasting_and %[[Y_IS_ZERO]], %[[RECIP_Y_NEG]]
+                // CHECK:          %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_or %[[Y_NEG_STRICT]], %[[Y_ZERO_NEG]]
+                // CHECK:          %[[X_NEG_STRICT:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[X]]
+                // CHECK:          %[[RECIP_X:.*]] = coreai.decomposable.broadcasting_divide %[[ONE]], %[[X]]
+                // CHECK:          %[[RECIP_X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[RECIP_X]]
+                // CHECK:          %[[X_ZERO_NEG:.*]] = coreai.decomposable.broadcasting_and %[[X_IS_ZERO]], %[[RECIP_X_NEG]]
+                // CHECK:          %[[X_NEG:.*]] = coreai.decomposable.broadcasting_or %[[X_NEG_STRICT]], %[[X_ZERO_NEG]]
+                // CHECK:          %[[X_IS_POS_INF:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[POS_INF]]
+                // CHECK:          %[[X_IS_NEG_INF:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[NEG_INF]]
+                // CHECK:          %[[X_IS_INF:.*]] = coreai.decomposable.broadcasting_or %[[X_IS_POS_INF]], %[[X_IS_NEG_INF]]
+                // CHECK:          %[[Y_IS_POS_INF:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[POS_INF]]
+                // CHECK:          %[[Y_IS_NEG_INF:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[NEG_INF]]
+                // CHECK:          %[[Y_IS_INF:.*]] = coreai.decomposable.broadcasting_or %[[Y_IS_POS_INF]], %[[Y_IS_NEG_INF]]
+                // CHECK:          %[[BOTH_INF:.*]] = coreai.decomposable.broadcasting_and %[[X_IS_INF]], %[[Y_IS_INF]]
+                // CHECK:          %[[BASE:.*]] = coreai.atan
+                // CHECK:          %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[BOTH_INF]],
                 // CHECK-NEXT:     coreai.output %[[RESULT]] : tensor<2x3xf32>
                 // CHECK-NEXT:   }
                 // CHECK-NEXT: }
@@ -1186,26 +1200,40 @@ def forward(self, y: Tensor, x: Tensor) -> Tensor:
             ir,
             check_file="""
                 // CHECK-LABEL: module {
-                // CHECK-NEXT:   coreai.graph @main(%[[ARG0:.*]]: tensor<?x?xf32> {coreai.name = "y"}, %[[ARG1:.*]]: tensor<?x?xf32> {coreai.name = "x"}) -> (tensor<?x?xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} {
-                // CHECK-NEXT:     %[[C1:.*]] = coreai.constant dense<1.000000e+00> : tensor<f32>
-                // CHECK-NEXT:     %[[C0:.*]] = coreai.constant dense<0.000000e+00> : tensor<f32>
-                // CHECK-NEXT:     %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor<f32>
-                // CHECK-NEXT:     %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor<f32>
-                // CHECK-NEXT:     %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor<f32>
-                // CHECK-NEXT:     %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[ARG1]], %[[C0]] : (tensor<?x?xf32>, tensor<f32>) -> tensor<?x?xi1>
-                // CHECK-NEXT:     %[[X_SAFE:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[C1]], %[[ARG1]] : (tensor<?x?xi1>, tensor<f32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-                // CHECK-NEXT:     %[[RATIO:.*]] = coreai.decomposable.broadcasting_divide %[[ARG0]], %[[X_SAFE]] : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-                // CHECK-NEXT:     %[[BASE:.*]] = coreai.atan %[[RATIO]] : tensor<?x?xf32> -> tensor<?x?xf32>
-                // CHECK-NEXT:     %[[X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG1]] : (tensor<f32>, tensor<?x?xf32>) -> tensor<?x?xi1>
-                // CHECK-NEXT:     %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG0]] : (tensor<f32>, tensor<?x?xf32>) -> tensor<?x?xi1>
-                // CHECK-NEXT:     %[[Y_POS:.*]] = coreai.decomposable.broadcasting_greater %[[ARG0]], %[[C0]] : (tensor<?x?xf32>, tensor<f32>) -> tensor<?x?xi1>
-                // CHECK-NEXT:     %[[BASE_MINUS_PI:.*]] = coreai.decomposable.broadcasting_sub %[[BASE]], %[[PI]] : (tensor<?x?xf32>, tensor<f32>) -> tensor<?x?xf32>
-                // CHECK-NEXT:     %[[BASE_PLUS_PI:.*]] = coreai.decomposable.broadcasting_add %[[BASE]], %[[PI]] : (tensor<?x?xf32>, tensor<f32>) -> tensor<?x?xf32>
-                // CHECK-NEXT:     %[[CORRECTION:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[BASE_MINUS_PI]], %[[BASE_PLUS_PI]] : (tensor<?x?xi1>, tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-                // CHECK-NEXT:     %[[NONZERO:.*]] = coreai.decomposable.broadcasting_where %[[X_NEG]], %[[CORRECTION]], %[[BASE]] : (tensor<?x?xi1>, tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-                // CHECK-NEXT:     %[[ZERO_NEG:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[NHPI]], %[[C0]] : (tensor<?x?xi1>, tensor<f32>, tensor<f32>) -> tensor<?x?xf32>
-                // CHECK-NEXT:     %[[ZERO_RES:.*]] = coreai.decomposable.broadcasting_where %[[Y_POS]], %[[HPI]], %[[ZERO_NEG]] : (tensor<?x?xi1>, tensor<f32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-                // CHECK-NEXT:     %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[ZERO_RES]], %[[NONZERO]] : (tensor<?x?xi1>, tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
+                // CHECK-NEXT:   coreai.graph @main(%[[Y:.*]]: tensor<?x?xf32> {coreai.name = "y"}, %[[X:.*]]: tensor<?x?xf32> {coreai.name = "x"}) -> (tensor<?x?xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} {
+                // CHECK:          %[[NEG_INF:.*]] = coreai.constant dense<0xFF800000> : tensor<f32>
+                // CHECK:          %[[POS_INF:.*]] = coreai.constant dense<0x7F800000> : tensor<f32>
+                // CHECK:          %[[ZERO:.*]] = coreai.constant dense<0.000000e+00> : tensor<f32>
+                // CHECK:          %[[ONE:.*]] = coreai.constant dense<1.000000e+00> : tensor<f32>
+                // CHECK:          %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor<f32>
+                // CHECK:          %[[NEG_PI:.*]] = coreai.constant dense<-3.14159274> : tensor<f32>
+                // CHECK:          %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor<f32>
+                // CHECK:          %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor<f32>
+                // CHECK:          %[[QPI:.*]] = coreai.constant dense<0.785398185> : tensor<f32>
+                // CHECK:          %[[NQPI:.*]] = coreai.constant dense<-0.785398185> : tensor<f32>
+                // CHECK:          %[[THREEQPI:.*]] = coreai.constant dense<2.3561945> : tensor<f32>
+                // CHECK:          %[[NTHREEQPI:.*]] = coreai.constant dense<-2.3561945> : tensor<f32>
+                // CHECK:          %[[Y_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[ZERO]]
+                // CHECK:          %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[ZERO]]
+                // CHECK:          %[[Y_NEG_STRICT:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[Y]]
+                // CHECK:          %[[RECIP_Y:.*]] = coreai.decomposable.broadcasting_divide %[[ONE]], %[[Y]]
+                // CHECK:          %[[RECIP_Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[RECIP_Y]]
+                // CHECK:          %[[Y_ZERO_NEG:.*]] = coreai.decomposable.broadcasting_and %[[Y_IS_ZERO]], %[[RECIP_Y_NEG]]
+                // CHECK:          %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_or %[[Y_NEG_STRICT]], %[[Y_ZERO_NEG]]
+                // CHECK:          %[[X_NEG_STRICT:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[X]]
+                // CHECK:          %[[RECIP_X:.*]] = coreai.decomposable.broadcasting_divide %[[ONE]], %[[X]]
+                // CHECK:          %[[RECIP_X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[RECIP_X]]
+                // CHECK:          %[[X_ZERO_NEG:.*]] = coreai.decomposable.broadcasting_and %[[X_IS_ZERO]], %[[RECIP_X_NEG]]
+                // CHECK:          %[[X_NEG:.*]] = coreai.decomposable.broadcasting_or %[[X_NEG_STRICT]], %[[X_ZERO_NEG]]
+                // CHECK:          %[[X_IS_POS_INF:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[POS_INF]]
+                // CHECK:          %[[X_IS_NEG_INF:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[NEG_INF]]
+                // CHECK:          %[[X_IS_INF:.*]] = coreai.decomposable.broadcasting_or %[[X_IS_POS_INF]], %[[X_IS_NEG_INF]]
+                // CHECK:          %[[Y_IS_POS_INF:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[POS_INF]]
+                // CHECK:          %[[Y_IS_NEG_INF:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[NEG_INF]]
+                // CHECK:          %[[Y_IS_INF:.*]] = coreai.decomposable.broadcasting_or %[[Y_IS_POS_INF]], %[[Y_IS_NEG_INF]]
+                // CHECK:          %[[BOTH_INF:.*]] = coreai.decomposable.broadcasting_and %[[X_IS_INF]], %[[Y_IS_INF]]
+                // CHECK:          %[[BASE:.*]] = coreai.atan
+                // CHECK:          %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[BOTH_INF]],
                 // CHECK-NEXT:     coreai.output %[[RESULT]] : tensor<?x?xf32>
                 // CHECK-NEXT:   }
                 // CHECK-NEXT: }
@@ -1222,26 +1250,14 @@ def forward(self, y: Tensor, x: Tensor) -> Tensor:
             ir,
             check_file="""
                 // CHECK-LABEL: module {
-                // CHECK-NEXT:   coreai.graph @main(%[[ARG0:.*]]: tensor<4xf32> {coreai.name = "y"}, %[[ARG1:.*]]: tensor<4xf32> {coreai.name = "x"}) -> (tensor<4xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} {
-                // CHECK-NEXT:     %[[C1:.*]] = coreai.constant dense<1.000000e+00> : tensor<f32>
-                // CHECK-NEXT:     %[[C0:.*]] = coreai.constant dense<0.000000e+00> : tensor<f32>
-                // CHECK-NEXT:     %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor<f32>
-                // CHECK-NEXT:     %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor<f32>
-                // CHECK-NEXT:     %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor<f32>
-                // CHECK-NEXT:     %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[ARG1]], %[[C0]] : (tensor<4xf32>, tensor<f32>) -> tensor<4xi1>
-                // CHECK-NEXT:     %[[X_SAFE:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[C1]], %[[ARG1]] : (tensor<4xi1>, tensor<f32>, tensor<4xf32>) -> tensor<4xf32>
-                // CHECK-NEXT:     %[[RATIO:.*]] = coreai.decomposable.broadcasting_divide %[[ARG0]], %[[X_SAFE]] : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
-                // CHECK-NEXT:     %[[BASE:.*]] = coreai.atan %[[RATIO]] : tensor<4xf32> -> tensor<4xf32>
-                // CHECK-NEXT:     %[[X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG1]] : (tensor<f32>, tensor<4xf32>) -> tensor<4xi1>
-                // CHECK-NEXT:     %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG0]] : (tensor<f32>, tensor<4xf32>) -> tensor<4xi1>
-                // CHECK-NEXT:     %[[Y_POS:.*]] = coreai.decomposable.broadcasting_greater %[[ARG0]], %[[C0]] : (tensor<4xf32>, tensor<f32>) -> tensor<4xi1>
-                // CHECK-NEXT:     %[[BASE_MINUS_PI:.*]] = coreai.decomposable.broadcasting_sub %[[BASE]], %[[PI]] : (tensor<4xf32>, tensor<f32>) -> tensor<4xf32>
-                // CHECK-NEXT:     %[[BASE_PLUS_PI:.*]] = coreai.decomposable.broadcasting_add %[[BASE]], %[[PI]] : (tensor<4xf32>, tensor<f32>) -> tensor<4xf32>
-                // CHECK-NEXT:     %[[CORRECTION:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[BASE_MINUS_PI]], %[[BASE_PLUS_PI]] : (tensor<4xi1>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
-                // CHECK-NEXT:     %[[NONZERO:.*]] = coreai.decomposable.broadcasting_where %[[X_NEG]], %[[CORRECTION]], %[[BASE]] : (tensor<4xi1>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
-                // CHECK-NEXT:     %[[ZERO_NEG:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[NHPI]], %[[C0]] : (tensor<4xi1>, tensor<f32>, tensor<f32>) -> tensor<4xf32>
-                // CHECK-NEXT:     %[[ZERO_RES:.*]] = coreai.decomposable.broadcasting_where %[[Y_POS]], %[[HPI]], %[[ZERO_NEG]] : (tensor<4xi1>, tensor<f32>, tensor<4xf32>) -> tensor<4xf32>
-                // CHECK-NEXT:     %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[ZERO_RES]], %[[NONZERO]] : (tensor<4xi1>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+                // CHECK-NEXT:   coreai.graph @main(%[[Y:.*]]: tensor<4xf32> {coreai.name = "y"}, %[[X:.*]]: tensor<4xf32> {coreai.name = "x"}) -> (tensor<4xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} {
+                // CHECK:          %[[ZERO:.*]] = coreai.constant dense<0.000000e+00> : tensor<f32>
+                // CHECK:          %[[ONE:.*]] = coreai.constant dense<1.000000e+00> : tensor<f32>
+                // CHECK:          %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_or
+                // CHECK:          %[[X_NEG:.*]] = coreai.decomposable.broadcasting_or
+                // CHECK:          %[[BOTH_INF:.*]] = coreai.decomposable.broadcasting_and
+                // CHECK:          %[[BASE:.*]] = coreai.atan
+                // CHECK:          %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[BOTH_INF]],
                 // CHECK-NEXT:     coreai.output %[[RESULT]] : tensor<4xf32>
                 // CHECK-NEXT:   }
                 // CHECK-NEXT: }