From a79f21e518c92874e9af1614d6261cc3625e2aa7 Mon Sep 17 00:00:00 2001 From: gokulkrishna98 Date: Mon, 22 Jun 2026 15:33:14 -0700 Subject: [PATCH 1/6] _aten_to_core: implement aten::atan2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a converter for aten.atan2.default. coreai has no native atan2 op, so the conversion decomposes it into atan(y/x) with per-quadrant correction: - x > 0: atan(y/x) - x < 0, y ≥ 0: atan(y/x) + π - x < 0, y < 0: atan(y/x) − π - x = 0, y > 0: π/2 - x = 0, y < 0: −π/2 - x = 0, y = 0: 0 Division by zero when x=0 is guarded with broadcasting_where before the divide, then the x=0 result is selected in a final where at the end. Adds numerical tests (shapes 1D/2D/3D, float32/float16, static/dynamic, axis-aligned edge cases) and IR FileCheck tests (static, dynamic, 1D). --- coreai_torch/_aten_to_core.py | 41 ++++++++++++ tests/ops/test_ops.py | 65 +++++++++++++++++-- tests/ops/test_ops_ir.py | 117 ++++++++++++++++++++++++++++++++++ 3 files changed, 216 insertions(+), 7 deletions(-) diff --git a/coreai_torch/_aten_to_core.py b/coreai_torch/_aten_to_core.py index 433e27a..1bea58a 100644 --- a/coreai_torch/_aten_to_core.py +++ b/coreai_torch/_aten_to_core.py @@ -1522,6 +1522,46 @@ def replace_argmax(values_map: dict[str, Value], node: fx.Node, loc: Location) - return result if keepdim else coreai.shrink_dims(result, [dim]) +def replace_atan2(values_map: dict[str, Value], node: fx.Node, loc: Location) -> Value: + """atan2(y, x) via atan(y/x) with quadrant correction. + + atan2 is undefined for (y=0, x=0); follows the convention atan2(0, 0) = 0. + """ + y, x = _get_operands(values_map, node, [0, 1]) + ele_type = x.type.element_type + + zero = coreai.constant(0.0, dtype=ele_type) + pi = coreai.constant(np.pi, dtype=ele_type) + half_pi = coreai.constant(np.pi / 2.0, dtype=ele_type) + neg_half_pi = coreai.constant(-np.pi / 2.0, dtype=ele_type) + + # Avoid division by zero when x = 0 by substituting x = 1 for the ratio. + x_is_zero = coreai.broadcasting_equal(x, zero) + x_safe = coreai.broadcasting_where( + x_is_zero, coreai.constant(1.0, dtype=ele_type), x + ) + base = coreai.atan(coreai.broadcasting_divide(y, x_safe)) + + # Quadrant correction: x < 0 shifts the result by ±π. + x_neg = coreai.broadcasting_greater(zero, x) + y_neg = coreai.broadcasting_greater(zero, y) + y_pos = coreai.broadcasting_greater(y, zero) + correction = coreai.broadcasting_where( + y_neg, + coreai.broadcasting_sub(base, pi), + coreai.broadcasting_add(base, pi), + ) + nonzero_result = coreai.broadcasting_where(x_neg, correction, base) + + # x = 0: result is π/2, −π/2, or 0 based on sign of y. + zero_result = coreai.broadcasting_where( + y_pos, + half_pi, + coreai.broadcasting_where(y_neg, neg_half_pi, zero), + ) + return coreai.broadcasting_where(x_is_zero, zero_result, nonzero_result) + + def replace_gather(values_map: dict[str, Value], node: fx.Node, loc: Location) -> Value: """Converts aten.gather to coreai.gather_along_axis.""" x, index = _get_operands(values_map, node, [0, 2]) @@ -3440,6 +3480,7 @@ def sdpa_maskless(q: Value, k: Value, v: Value) -> Value: "asin.default": replace_unary_ops, "asinh.default": replace_unary_ops, "atan.default": replace_unary_ops, + "atan2.default": replace_atan2, "atanh.default": replace_unary_ops, "_adaptive_avg_pool2d.default": replace_adaptive_avg_pool2d, "_unsafe_view.default": replace_view, diff --git a/tests/ops/test_ops.py b/tests/ops/test_ops.py index 6e85b7b..8211544 100644 --- a/tests/ops/test_ops.py +++ b/tests/ops/test_ops.py @@ -595,13 +595,64 @@ def forward(self, x: Tensor) -> Tensor: await validate_numerical_output(model=model, x=x, dynamic_shapes=dynamic_shapes) -@pytest.mark.parametrize( - "x", - [ - torch.rand(2, 3, 8, 8), - torch.rand(2, 3, 8, 8, dtype=torch.float16), # fp16 - ], -) +class TestAtan2: + """Tests for torch.atan2(y, x) — angle from the positive x-axis to the point (x, y).""" + + class Atan2Model(nn.Module): + def forward(self, y: Tensor, x: Tensor) -> Tensor: + return torch.atan2(y, x) + + @pytest.mark.parametrize("dynamic", [False, True]) + @pytest.mark.parametrize( + "shape", + [ + (4,), + (3, 4), + (2, 3, 4), + ], + ) + @pytest.mark.parametrize("dtype", [torch.float32, torch.float16]) + async def test_basic( + self, shape: tuple[int, ...], dtype: torch.dtype, dynamic: bool + ) -> None: + model = self.Atan2Model().eval() + y = torch.randn(shape, dtype=dtype) + x = torch.randn(shape, dtype=dtype) + dynamic_shapes = ( + {"y": _all_dims_dynamic(y), "x": _all_dims_dynamic(x)} if dynamic else None + ) + await validate_numerical_output( + model=model, y=y, x=x, dynamic_shapes=dynamic_shapes + ) + + async def test_x_zero(self) -> None: + """x = 0 should yield ±π/2 depending on sign of y.""" + model = self.Atan2Model().eval() + y = torch.tensor([1.0, -1.0, 2.0, -2.0]) + x = torch.zeros(4) + await validate_numerical_output(model=model, y=y, x=x) + + async def test_y_zero(self) -> None: + """y = 0 with x > 0 → 0, x < 0 → π.""" + model = self.Atan2Model().eval() + y = torch.zeros(4) + x = torch.tensor([1.0, -1.0, 2.0, -2.0]) + await validate_numerical_output(model=model, y=y, x=x) + + async def test_all_quadrants(self) -> None: + """Cover all four quadrants and axes.""" + model = self.Atan2Model().eval() + y = torch.tensor([1.0, 1.0, -1.0, -1.0, 0.0, 0.0, 1.0, -1.0]) + x = torch.tensor([1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 0.0, 0.0]) + await validate_numerical_output(model=model, y=y, x=x) + + async def test_broadcast_shapes(self) -> None: + model = self.Atan2Model().eval() + y = torch.randn(3, 4) + x = torch.randn(4) + await validate_numerical_output(model=model, y=y, x=x) + + @pytest.mark.parametrize( "dynamic_dims", [tuple(), (0,), (2,), (3,), (0, 2), (0, 3), (0, 2, 3)] ) diff --git a/tests/ops/test_ops_ir.py b/tests/ops/test_ops_ir.py index 50c0786..435e5ee 100644 --- a/tests/ops/test_ops_ir.py +++ b/tests/ops/test_ops_ir.py @@ -1132,6 +1132,123 @@ def forward(self, x: Tensor) -> Tensor: ) +class TestAtan2IR: + def test_static(self) -> None: + class Atan2Model(nn.Module): + def forward(self, y: Tensor, x: Tensor) -> Tensor: + return torch.atan2(y, x) + + ir = get_ir(Atan2Model().eval(), y=torch.rand(2, 3), x=torch.rand(2, 3)) + filecheck_pattern( + ir, + check_file=""" + // CHECK-LABEL: module { + // CHECK-NEXT: coreai.graph @main(%[[ARG0:.*]]: tensor<2x3xf32> {coreai.name = "y"}, %[[ARG1:.*]]: tensor<2x3xf32> {coreai.name = "x"}) -> (tensor<2x3xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} { + // CHECK-NEXT: %[[C1:.*]] = coreai.constant dense<1.000000e+00> : tensor + // CHECK-NEXT: %[[C0:.*]] = coreai.constant dense<0.000000e+00> : tensor + // CHECK-NEXT: %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor + // CHECK-NEXT: %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor + // CHECK-NEXT: %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor + // CHECK-NEXT: %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[ARG1]], %[[C0]] : (tensor<2x3xf32>, tensor) -> tensor<2x3xi1> + // CHECK-NEXT: %[[X_SAFE:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[C1]], %[[ARG1]] : (tensor<2x3xi1>, tensor, tensor<2x3xf32>) -> tensor<2x3xf32> + // CHECK-NEXT: %[[RATIO:.*]] = coreai.decomposable.broadcasting_divide %[[ARG0]], %[[X_SAFE]] : (tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32> + // CHECK-NEXT: %[[BASE:.*]] = coreai.atan %[[RATIO]] : tensor<2x3xf32> -> tensor<2x3xf32> + // CHECK-NEXT: %[[X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG1]] : (tensor, tensor<2x3xf32>) -> tensor<2x3xi1> + // CHECK-NEXT: %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG0]] : (tensor, tensor<2x3xf32>) -> tensor<2x3xi1> + // CHECK-NEXT: %[[Y_POS:.*]] = coreai.decomposable.broadcasting_greater %[[ARG0]], %[[C0]] : (tensor<2x3xf32>, tensor) -> tensor<2x3xi1> + // CHECK-NEXT: %[[BASE_MINUS_PI:.*]] = coreai.decomposable.broadcasting_sub %[[BASE]], %[[PI]] : (tensor<2x3xf32>, tensor) -> tensor<2x3xf32> + // CHECK-NEXT: %[[BASE_PLUS_PI:.*]] = coreai.decomposable.broadcasting_add %[[BASE]], %[[PI]] : (tensor<2x3xf32>, tensor) -> tensor<2x3xf32> + // CHECK-NEXT: %[[CORRECTION:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[BASE_MINUS_PI]], %[[BASE_PLUS_PI]] : (tensor<2x3xi1>, tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32> + // CHECK-NEXT: %[[NONZERO:.*]] = coreai.decomposable.broadcasting_where %[[X_NEG]], %[[CORRECTION]], %[[BASE]] : (tensor<2x3xi1>, tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32> + // CHECK-NEXT: %[[ZERO_NEG:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[NHPI]], %[[C0]] : (tensor<2x3xi1>, tensor, tensor) -> tensor<2x3xf32> + // CHECK-NEXT: %[[ZERO_RES:.*]] = coreai.decomposable.broadcasting_where %[[Y_POS]], %[[HPI]], %[[ZERO_NEG]] : (tensor<2x3xi1>, tensor, tensor<2x3xf32>) -> tensor<2x3xf32> + // CHECK-NEXT: %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[ZERO_RES]], %[[NONZERO]] : (tensor<2x3xi1>, tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32> + // CHECK-NEXT: coreai.output %[[RESULT]] : tensor<2x3xf32> + // CHECK-NEXT: } + // CHECK-NEXT: } + """, + ) + + def test_dynamic(self) -> None: + class Atan2Model(nn.Module): + def forward(self, y: Tensor, x: Tensor) -> Tensor: + return torch.atan2(y, x) + + y = torch.rand(2, 3) + x = torch.rand(2, 3) + ir = get_ir( + Atan2Model().eval(), + y=y, + x=x, + dynamic_shapes={"y": _all_dims_dynamic(y), "x": _all_dims_dynamic(x)}, + ) + filecheck_pattern( + ir, + check_file=""" + // CHECK-LABEL: module { + // CHECK-NEXT: coreai.graph @main(%[[ARG0:.*]]: tensor {coreai.name = "y"}, %[[ARG1:.*]]: tensor {coreai.name = "x"}) -> (tensor {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} { + // CHECK-NEXT: %[[C1:.*]] = coreai.constant dense<1.000000e+00> : tensor + // CHECK-NEXT: %[[C0:.*]] = coreai.constant dense<0.000000e+00> : tensor + // CHECK-NEXT: %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor + // CHECK-NEXT: %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor + // CHECK-NEXT: %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor + // CHECK-NEXT: %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[ARG1]], %[[C0]] : (tensor, tensor) -> tensor + // CHECK-NEXT: %[[X_SAFE:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[C1]], %[[ARG1]] : (tensor, tensor, tensor) -> tensor + // CHECK-NEXT: %[[RATIO:.*]] = coreai.decomposable.broadcasting_divide %[[ARG0]], %[[X_SAFE]] : (tensor, tensor) -> tensor + // CHECK-NEXT: %[[BASE:.*]] = coreai.atan %[[RATIO]] : tensor -> tensor + // CHECK-NEXT: %[[X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG1]] : (tensor, tensor) -> tensor + // CHECK-NEXT: %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG0]] : (tensor, tensor) -> tensor + // CHECK-NEXT: %[[Y_POS:.*]] = coreai.decomposable.broadcasting_greater %[[ARG0]], %[[C0]] : (tensor, tensor) -> tensor + // CHECK-NEXT: %[[BASE_MINUS_PI:.*]] = coreai.decomposable.broadcasting_sub %[[BASE]], %[[PI]] : (tensor, tensor) -> tensor + // CHECK-NEXT: %[[BASE_PLUS_PI:.*]] = coreai.decomposable.broadcasting_add %[[BASE]], %[[PI]] : (tensor, tensor) -> tensor + // CHECK-NEXT: %[[CORRECTION:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[BASE_MINUS_PI]], %[[BASE_PLUS_PI]] : (tensor, tensor, tensor) -> tensor + // CHECK-NEXT: %[[NONZERO:.*]] = coreai.decomposable.broadcasting_where %[[X_NEG]], %[[CORRECTION]], %[[BASE]] : (tensor, tensor, tensor) -> tensor + // CHECK-NEXT: %[[ZERO_NEG:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[NHPI]], %[[C0]] : (tensor, tensor, tensor) -> tensor + // CHECK-NEXT: %[[ZERO_RES:.*]] = coreai.decomposable.broadcasting_where %[[Y_POS]], %[[HPI]], %[[ZERO_NEG]] : (tensor, tensor, tensor) -> tensor + // CHECK-NEXT: %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[ZERO_RES]], %[[NONZERO]] : (tensor, tensor, tensor) -> tensor + // CHECK-NEXT: coreai.output %[[RESULT]] : tensor + // CHECK-NEXT: } + // CHECK-NEXT: } + """, + ) + + def test_1d(self) -> None: + class Atan2Model(nn.Module): + def forward(self, y: Tensor, x: Tensor) -> Tensor: + return torch.atan2(y, x) + + ir = get_ir(Atan2Model().eval(), y=torch.rand(4), x=torch.rand(4)) + filecheck_pattern( + ir, + check_file=""" + // CHECK-LABEL: module { + // CHECK-NEXT: coreai.graph @main(%[[ARG0:.*]]: tensor<4xf32> {coreai.name = "y"}, %[[ARG1:.*]]: tensor<4xf32> {coreai.name = "x"}) -> (tensor<4xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} { + // CHECK-NEXT: %[[C1:.*]] = coreai.constant dense<1.000000e+00> : tensor + // CHECK-NEXT: %[[C0:.*]] = coreai.constant dense<0.000000e+00> : tensor + // CHECK-NEXT: %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor + // CHECK-NEXT: %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor + // CHECK-NEXT: %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor + // CHECK-NEXT: %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[ARG1]], %[[C0]] : (tensor<4xf32>, tensor) -> tensor<4xi1> + // CHECK-NEXT: %[[X_SAFE:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[C1]], %[[ARG1]] : (tensor<4xi1>, tensor, tensor<4xf32>) -> tensor<4xf32> + // CHECK-NEXT: %[[RATIO:.*]] = coreai.decomposable.broadcasting_divide %[[ARG0]], %[[X_SAFE]] : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> + // CHECK-NEXT: %[[BASE:.*]] = coreai.atan %[[RATIO]] : tensor<4xf32> -> tensor<4xf32> + // CHECK-NEXT: %[[X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG1]] : (tensor, tensor<4xf32>) -> tensor<4xi1> + // CHECK-NEXT: %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG0]] : (tensor, tensor<4xf32>) -> tensor<4xi1> + // CHECK-NEXT: %[[Y_POS:.*]] = coreai.decomposable.broadcasting_greater %[[ARG0]], %[[C0]] : (tensor<4xf32>, tensor) -> tensor<4xi1> + // CHECK-NEXT: %[[BASE_MINUS_PI:.*]] = coreai.decomposable.broadcasting_sub %[[BASE]], %[[PI]] : (tensor<4xf32>, tensor) -> tensor<4xf32> + // CHECK-NEXT: %[[BASE_PLUS_PI:.*]] = coreai.decomposable.broadcasting_add %[[BASE]], %[[PI]] : (tensor<4xf32>, tensor) -> tensor<4xf32> + // CHECK-NEXT: %[[CORRECTION:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[BASE_MINUS_PI]], %[[BASE_PLUS_PI]] : (tensor<4xi1>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> + // CHECK-NEXT: %[[NONZERO:.*]] = coreai.decomposable.broadcasting_where %[[X_NEG]], %[[CORRECTION]], %[[BASE]] : (tensor<4xi1>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> + // CHECK-NEXT: %[[ZERO_NEG:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[NHPI]], %[[C0]] : (tensor<4xi1>, tensor, tensor) -> tensor<4xf32> + // CHECK-NEXT: %[[ZERO_RES:.*]] = coreai.decomposable.broadcasting_where %[[Y_POS]], %[[HPI]], %[[ZERO_NEG]] : (tensor<4xi1>, tensor, tensor<4xf32>) -> tensor<4xf32> + // CHECK-NEXT: %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[ZERO_RES]], %[[NONZERO]] : (tensor<4xi1>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> + // CHECK-NEXT: coreai.output %[[RESULT]] : tensor<4xf32> + // CHECK-NEXT: } + // CHECK-NEXT: } + """, + ) + + class TestAvgPool2dIR: def test_static(self) -> None: class AvgPool2dModel(nn.Module): From 4b9c442c0a8f686f9f9d822727e14ff873974a3f Mon Sep 17 00:00:00 2001 From: gokulkrishna98 Date: Mon, 22 Jun 2026 15:54:08 -0700 Subject: [PATCH 2/6] _aten_to_core: expand replace_atan2 docstring --- coreai_torch/_aten_to_core.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/coreai_torch/_aten_to_core.py b/coreai_torch/_aten_to_core.py index 1bea58a..467a7c5 100644 --- a/coreai_torch/_aten_to_core.py +++ b/coreai_torch/_aten_to_core.py @@ -1523,9 +1523,15 @@ def replace_argmax(values_map: dict[str, Value], node: fx.Node, loc: Location) - def replace_atan2(values_map: dict[str, Value], node: fx.Node, loc: Location) -> Value: - """atan2(y, x) via atan(y/x) with quadrant correction. + """Lower atan2(y, x) using atan(y/x) with quadrant correction. - atan2 is undefined for (y=0, x=0); follows the convention atan2(0, 0) = 0. + CoreAI has no native atan2, so it is decomposed as: + - x != 0: atan(y/x) adjusted by ±π to place the result in the correct quadrant. + - x == 0: ±π/2 or 0 based on sign of y. + + When x=0, x is replaced with 1 before the divide solely to avoid NaN/inf; that + intermediate result is discarded by the final where-select in favour of the x=0 branch. + atan2(0, 0) = 0 by convention. """ y, x = _get_operands(values_map, node, [0, 1]) ele_type = x.type.element_type From ba2487443944d9e7b960488734978aea9f3ed62c Mon Sep 17 00:00:00 2001 From: gokulkrishna98 Date: Mon, 22 Jun 2026 15:57:22 -0700 Subject: [PATCH 3/6] tests/ops: restore missing x parametrize for test_batchnorm Accidentally dropped when TestAtan2 was inserted at the same location. --- tests/ops/test_ops.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/ops/test_ops.py b/tests/ops/test_ops.py index 8211544..ff7532b 100644 --- a/tests/ops/test_ops.py +++ b/tests/ops/test_ops.py @@ -653,6 +653,13 @@ async def test_broadcast_shapes(self) -> None: await validate_numerical_output(model=model, y=y, x=x) +@pytest.mark.parametrize( + "x", + [ + torch.rand(2, 3, 8, 8), + torch.rand(2, 3, 8, 8, dtype=torch.float16), # fp16 + ], +) @pytest.mark.parametrize( "dynamic_dims", [tuple(), (0,), (2,), (3,), (0, 2), (0, 3), (0, 2, 3)] ) From 0dd1ccf39cdd7782a11a4b1585c7d5b512f3b88b Mon Sep 17 00:00:00 2001 From: gokulkrishna98 Date: Mon, 22 Jun 2026 19:27:41 -0700 Subject: [PATCH 4/6] docs: strip notebook outputs (nbstripout) --- .../tutorials/construct-a-graph.ipynb | 171 ++++-------------- .../tutorials/run-an-aimodel.ipynb | 131 +++----------- 2 files changed, 62 insertions(+), 240 deletions(-) diff --git a/docs/coreai-core/tutorials/construct-a-graph.ipynb b/docs/coreai-core/tutorials/construct-a-graph.ipynb index a10b05d..86f0acc 100644 --- a/docs/coreai-core/tutorials/construct-a-graph.ipynb +++ b/docs/coreai-core/tutorials/construct-a-graph.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "intro", + "id": "0", "metadata": {}, "source": [ "# Constructing a CoreAI Graph\n", @@ -25,7 +25,7 @@ }, { "cell_type": "markdown", - "id": "warning", + "id": "1", "metadata": {}, "source": [ ":::{warning}\n", @@ -39,7 +39,7 @@ }, { "cell_type": "markdown", - "id": "setup-md", + "id": "2", "metadata": {}, "source": [ "## Setup\n", @@ -50,25 +50,10 @@ }, { "cell_type": "code", - "execution_count": 1, - "id": "imports", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:09.232797Z", - "iopub.status.busy": "2026-06-04T23:13:09.232330Z", - "iopub.status.idle": "2026-06-04T23:13:12.445622Z", - "shell.execute_reply": "2026-06-04T23:13:12.444678Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Dev installation detected. Using local Core AI Framework.\n" - ] - } - ], + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], "source": [ "import shutil\n", "from pathlib import Path\n", @@ -84,7 +69,7 @@ }, { "cell_type": "markdown", - "id": "specs-md", + "id": "4", "metadata": {}, "source": [ "## Describe the inputs and outputs\n", @@ -99,26 +84,10 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "specs", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.447443Z", - "iopub.status.busy": "2026-06-04T23:13:12.447294Z", - "iopub.status.idle": "2026-06-04T23:13:12.451834Z", - "shell.execute_reply": "2026-06-04T23:13:12.451081Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "input: tensor<2x3xf32>\n", - "output: tensor<2x3xf32>\n" - ] - } - ], + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], "source": [ "input_spec = TensorSpec(shape=[2, 3], dtype=np.float32)\n", "output_spec = TensorSpec(shape=[2, 3], dtype=np.float32, name=\"y\")\n", @@ -129,7 +98,7 @@ }, { "cell_type": "markdown", - "id": "build-md", + "id": "6", "metadata": {}, "source": [ "## Build the graph\n", @@ -145,25 +114,10 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "build", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.453228Z", - "iopub.status.busy": "2026-06-04T23:13:12.453111Z", - "iopub.status.idle": "2026-06-04T23:13:12.470623Z", - "shell.execute_reply": "2026-06-04T23:13:12.470201Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Module verified.\n" - ] - } - ], + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], "source": [ "module = Module.create()\n", "with module:\n", @@ -181,7 +135,7 @@ }, { "cell_type": "markdown", - "id": "program-md", + "id": "8", "metadata": {}, "source": [ "## Wrap in an `AIProgram`\n", @@ -192,16 +146,9 @@ }, { "cell_type": "code", - "execution_count": 4, - "id": "program", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.472009Z", - "iopub.status.busy": "2026-06-04T23:13:12.471916Z", - "iopub.status.idle": "2026-06-04T23:13:12.474216Z", - "shell.execute_reply": "2026-06-04T23:13:12.473635Z" - } - }, + "execution_count": null, + "id": "9", + "metadata": {}, "outputs": [], "source": [ "program = AIProgram(module)" @@ -209,7 +156,7 @@ }, { "cell_type": "markdown", - "id": "save-md", + "id": "10", "metadata": {}, "source": [ "## Save as an `.aimodel`\n", @@ -222,25 +169,10 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "save", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.475505Z", - "iopub.status.busy": "2026-06-04T23:13:12.475388Z", - "iopub.status.idle": "2026-06-04T23:13:12.479489Z", - "shell.execute_reply": "2026-06-04T23:13:12.479034Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "saved to: hello-graph.aimodel\n" - ] - } - ], + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], "source": [ "asset_path = Path(\"./hello-graph.aimodel\")\n", "if asset_path.exists():\n", @@ -252,7 +184,7 @@ }, { "cell_type": "markdown", - "id": "inspect-md", + "id": "12", "metadata": {}, "source": [ "## Inspect the saved asset\n", @@ -263,26 +195,10 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "inspect", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.480788Z", - "iopub.status.busy": "2026-06-04T23:13:12.480705Z", - "iopub.status.idle": "2026-06-04T23:13:12.483398Z", - "shell.execute_reply": "2026-06-04T23:13:12.483007Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "contents: ['main.hash', 'main.mlirb', 'metadata.json']\n", - "total size: 446 bytes\n" - ] - } - ], + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], "source": [ "files = sorted(p.name for p in asset_path.iterdir())\n", "total_bytes = sum(p.stat().st_size for p in asset_path.rglob(\"*\") if p.is_file())\n", @@ -293,7 +209,7 @@ }, { "cell_type": "markdown", - "id": "validate-md", + "id": "14", "metadata": {}, "source": [ "## Validate that the asset is loadable\n", @@ -305,25 +221,10 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "validate", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.484574Z", - "iopub.status.busy": "2026-06-04T23:13:12.484499Z", - "iopub.status.idle": "2026-06-04T23:13:12.498350Z", - "shell.execute_reply": "2026-06-04T23:13:12.497839Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "OK\n" - ] - } - ], + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], "source": [ "reloaded = AIModelAsset.load(asset_path)\n", "async with reloaded.executable() as model:\n", diff --git a/docs/coreai-core/tutorials/run-an-aimodel.ipynb b/docs/coreai-core/tutorials/run-an-aimodel.ipynb index 4d878f9..7e23884 100644 --- a/docs/coreai-core/tutorials/run-an-aimodel.ipynb +++ b/docs/coreai-core/tutorials/run-an-aimodel.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "intro", + "id": "0", "metadata": {}, "source": [ "# Running an `.aimodel` with `coreai.runtime`\n", @@ -29,7 +29,7 @@ }, { "cell_type": "markdown", - "id": "setup-md", + "id": "1", "metadata": {}, "source": [ "## Setup\n", @@ -41,25 +41,10 @@ }, { "cell_type": "code", - "execution_count": 1, - "id": "imports", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:09.233244Z", - "iopub.status.busy": "2026-06-04T23:13:09.232818Z", - "iopub.status.idle": "2026-06-04T23:13:12.445952Z", - "shell.execute_reply": "2026-06-04T23:13:12.444672Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Dev installation detected. Using local Core AI Framework.\n" - ] - } - ], + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], "source": [ "from pathlib import Path\n", "\n", @@ -72,7 +57,7 @@ }, { "cell_type": "markdown", - "id": "ensure-md", + "id": "3", "metadata": {}, "source": [ "### Ensure `hello.aimodel` exists\n", @@ -84,25 +69,10 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "ensure", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.447569Z", - "iopub.status.busy": "2026-06-04T23:13:12.447413Z", - "iopub.status.idle": "2026-06-04T23:13:12.473679Z", - "shell.execute_reply": "2026-06-04T23:13:12.473128Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "created hello-run.aimodel\n" - ] - } - ], + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], "source": [ "from shutil import rmtree\n", "from typing import Annotated\n", @@ -132,7 +102,7 @@ }, { "cell_type": "markdown", - "id": "open-asset-md", + "id": "5", "metadata": {}, "source": [ "## Open the asset\n", @@ -155,16 +125,9 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "open-asset", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.475086Z", - "iopub.status.busy": "2026-06-04T23:13:12.474967Z", - "iopub.status.idle": "2026-06-04T23:13:12.477296Z", - "shell.execute_reply": "2026-06-04T23:13:12.476906Z" - } - }, + "execution_count": null, + "id": "6", + "metadata": {}, "outputs": [], "source": [ "asset = AIModelAsset.load(asset_path)" @@ -172,7 +135,7 @@ }, { "cell_type": "markdown", - "id": "run-md", + "id": "7", "metadata": {}, "source": [ "## Open the model and run inference\n", @@ -195,32 +158,10 @@ }, { "cell_type": "code", - "execution_count": 4, - "id": "run", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.478555Z", - "iopub.status.busy": "2026-06-04T23:13:12.478462Z", - "iopub.status.idle": "2026-06-04T23:13:12.505717Z", - "shell.execute_reply": "2026-06-04T23:13:12.505274Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "functions: ['main']\n", - "name: main\n", - "inputs: ['x']\n", - "outputs: ['y']\n", - "input x:\n", - "[[1.5 1.5 1.5]\n", - " [1.5 1.5 1.5]]\n", - "output keys: ['y']\n" - ] - } - ], + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], "source": [ "async with asset.executable() as model:\n", " print(f\"functions: {model.function_names}\")\n", @@ -245,7 +186,7 @@ }, { "cell_type": "markdown", - "id": "inspect-md", + "id": "9", "metadata": {}, "source": [ "## Inspect the output\n", @@ -258,30 +199,10 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "inspect", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.506908Z", - "iopub.status.busy": "2026-06-04T23:13:12.506825Z", - "iopub.status.idle": "2026-06-04T23:13:12.509068Z", - "shell.execute_reply": "2026-06-04T23:13:12.508740Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "shape: (2, 3)\n", - "dtype: float32\n", - "value:\n", - "[[3. 3. 3.]\n", - " [3. 3. 3.]]\n", - "OK — inference produced expected output shape and dtype\n" - ] - } - ], + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], "source": [ "print(f\"shape: {result.shape}\")\n", "print(f\"dtype: {result.dtype}\")\n", @@ -294,7 +215,7 @@ }, { "cell_type": "markdown", - "id": "advanced-md", + "id": "11", "metadata": {}, "source": [ "## What's next\n", From ab8cdb381eea28b548af1580ba4b26fe7c5e905a Mon Sep 17 00:00:00 2001 From: gokulkrishna98 Date: Mon, 22 Jun 2026 19:55:00 -0700 Subject: [PATCH 5/6] address PR review feedback - Document IEEE-754 limitations (signed zeros, infinities) in replace_atan2 docstring - Add (0, 0) case to test_x_zero to lock in atan2(0, 0) = 0 by convention --- coreai_torch/_aten_to_core.py | 9 +++++++++ tests/ops/test_ops.py | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/coreai_torch/_aten_to_core.py b/coreai_torch/_aten_to_core.py index 467a7c5..7bdcff4 100644 --- a/coreai_torch/_aten_to_core.py +++ b/coreai_torch/_aten_to_core.py @@ -1532,6 +1532,15 @@ def replace_atan2(values_map: dict[str, Value], node: fx.Node, loc: Location) -> When x=0, x is replaced with 1 before the divide solely to avoid NaN/inf; that intermediate result is discarded by the final where-select in favour of the x=0 branch. atan2(0, 0) = 0 by convention. + + IEEE-754 limitations: + - Signed zeros: ``-0.0`` is treated the same as ``+0.0`` because the + ``0 > y`` predicate is false for ``y = -0.0``. Results are numerically + equal to PyTorch for finite inputs but the sign bit may differ + (e.g. ``atan2(-0.0, -1.0)`` returns ``+π`` here, ``-π`` in PyTorch). + - Infinities: ``atan2(±inf, ±inf)`` returns NaN because ``inf/inf`` + produces NaN before ``atan`` is applied. PyTorch returns ``±π/4`` + or ``±3π/4`` per IEEE-754. Do not pass infinite inputs to this op. """ y, x = _get_operands(values_map, node, [0, 1]) ele_type = x.type.element_type diff --git a/tests/ops/test_ops.py b/tests/ops/test_ops.py index ff7532b..1c9a370 100644 --- a/tests/ops/test_ops.py +++ b/tests/ops/test_ops.py @@ -626,10 +626,10 @@ async def test_basic( ) async def test_x_zero(self) -> None: - """x = 0 should yield ±π/2 depending on sign of y.""" + """x = 0 should yield ±π/2 depending on sign of y; (0, 0) → 0 by convention.""" model = self.Atan2Model().eval() - y = torch.tensor([1.0, -1.0, 2.0, -2.0]) - x = torch.zeros(4) + y = torch.tensor([1.0, -1.0, 2.0, -2.0, 0.0]) + x = torch.zeros(5) await validate_numerical_output(model=model, y=y, x=x) async def test_y_zero(self) -> None: From 5e2a2d7ddc6f971fd4990fcd48fa29229c411cb2 Mon Sep 17 00:00:00 2001 From: gokulkrishna98 Date: Mon, 22 Jun 2026 20:11:58 -0700 Subject: [PATCH 6/6] _aten_to_core: fix IEEE-754 edge cases in replace_atan2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Signed zeros: use 1/v trick (1/-0.0 = -inf) combined with strict > to make y_neg and x_neg correct for -0.0 inputs without misclassifying ±inf values which use the strict > path directly - x = -0.0 branch: split x=0 into +0 (returns ±π/2) and -0 (returns ±π) per IEEE-754 atan2 specification - Both infinite: add explicit branch for atan2(±inf, ±inf) → ±π/4 or ±3π/4; previously produced NaN via atan(inf/inf) = atan(NaN) - Add test_signed_zeros and test_infinities to lock in correct behaviour - Update IR tests to reflect the expanded op sequence --- coreai_torch/_aten_to_core.py | 106 +++++++++++++++++++------- tests/ops/test_ops.py | 16 ++++ tests/ops/test_ops_ir.py | 136 +++++++++++++++++++--------------- 3 files changed, 170 insertions(+), 88 deletions(-) diff --git a/coreai_torch/_aten_to_core.py b/coreai_torch/_aten_to_core.py index 7bdcff4..93b042c 100644 --- a/coreai_torch/_aten_to_core.py +++ b/coreai_torch/_aten_to_core.py @@ -1526,41 +1526,95 @@ def replace_atan2(values_map: dict[str, Value], node: fx.Node, loc: Location) -> """Lower atan2(y, x) using atan(y/x) with quadrant correction. CoreAI has no native atan2, so it is decomposed as: - - x != 0: atan(y/x) adjusted by ±π to place the result in the correct quadrant. - - x == 0: ±π/2 or 0 based on sign of y. + - x != 0, finite: atan(y/x) adjusted by ±π for the correct quadrant. + - x == +0: ±π/2 for non-zero y, 0 for y = 0. + - x == -0: ±π for all y (including ±0 → ±π per IEEE-754). + - both infinite: ±π/4 or ±3π/4 per IEEE-754. + + Signed-zero handling: IEEE-754 treats -0.0 as distinct from +0.0 for atan2 + (e.g. atan2(-0, -1) = -π, not +π). The 1/v trick — 1/-0.0 = -inf — is used + to detect the sign bit of zero inputs so that y_neg and x_neg are correct + for -0.0 inputs without misclassifying ±inf (which use the strict > path). When x=0, x is replaced with 1 before the divide solely to avoid NaN/inf; that - intermediate result is discarded by the final where-select in favour of the x=0 branch. + intermediate result is discarded by the final where-select. atan2(0, 0) = 0 by convention. - - IEEE-754 limitations: - - Signed zeros: ``-0.0`` is treated the same as ``+0.0`` because the - ``0 > y`` predicate is false for ``y = -0.0``. Results are numerically - equal to PyTorch for finite inputs but the sign bit may differ - (e.g. ``atan2(-0.0, -1.0)`` returns ``+π`` here, ``-π`` in PyTorch). - - Infinities: ``atan2(±inf, ±inf)`` returns NaN because ``inf/inf`` - produces NaN before ``atan`` is applied. PyTorch returns ``±π/4`` - or ``±3π/4`` per IEEE-754. Do not pass infinite inputs to this op. """ y, x = _get_operands(values_map, node, [0, 1]) ele_type = x.type.element_type zero = coreai.constant(0.0, dtype=ele_type) + one = coreai.constant(1.0, dtype=ele_type) pi = coreai.constant(np.pi, dtype=ele_type) + neg_pi = coreai.constant(-np.pi, dtype=ele_type) half_pi = coreai.constant(np.pi / 2.0, dtype=ele_type) neg_half_pi = coreai.constant(-np.pi / 2.0, dtype=ele_type) - - # Avoid division by zero when x = 0 by substituting x = 1 for the ratio. + quarter_pi = coreai.constant(np.pi / 4.0, dtype=ele_type) + neg_quarter_pi = coreai.constant(-np.pi / 4.0, dtype=ele_type) + three_quarter_pi = coreai.constant(3.0 * np.pi / 4.0, dtype=ele_type) + neg_three_quarter_pi = coreai.constant(-3.0 * np.pi / 4.0, dtype=ele_type) + + # ── signed-zero-aware sign predicates ───────────────────────────────────── + # 1 / -0.0 = -inf (IEEE-754), so (0 > 1/v) is True iff v = -0.0. Combine with + # the strict > predicate (handles ±inf and non-zero finites) via OR. + y_is_zero = coreai.broadcasting_equal(y, zero) x_is_zero = coreai.broadcasting_equal(x, zero) - x_safe = coreai.broadcasting_where( - x_is_zero, coreai.constant(1.0, dtype=ele_type), x + y_neg = coreai.broadcasting_or( + coreai.broadcasting_greater(zero, y), + coreai.broadcasting_and( + y_is_zero, + coreai.broadcasting_greater(zero, coreai.broadcasting_divide(one, y)), + ), + ) + x_neg = coreai.broadcasting_or( + coreai.broadcasting_greater(zero, x), + coreai.broadcasting_and( + x_is_zero, + coreai.broadcasting_greater(zero, coreai.broadcasting_divide(one, x)), + ), + ) + x_is_neg_zero = coreai.broadcasting_and( + x_is_zero, + coreai.broadcasting_greater(zero, coreai.broadcasting_divide(one, x)), + ) + + # ── both-infinite branch ────────────────────────────────────────────────── + # atan(inf/inf) = atan(NaN) = NaN; handle before the divide. + pos_inf = coreai.constant(float("inf"), dtype=ele_type) + neg_inf = coreai.constant(float("-inf"), dtype=ele_type) + x_is_inf = coreai.broadcasting_or( + coreai.broadcasting_equal(x, pos_inf), coreai.broadcasting_equal(x, neg_inf) + ) + y_is_inf = coreai.broadcasting_or( + coreai.broadcasting_equal(y, pos_inf), coreai.broadcasting_equal(y, neg_inf) + ) + both_inf = coreai.broadcasting_and(x_is_inf, y_is_inf) + inf_result = coreai.broadcasting_where( + y_neg, + coreai.broadcasting_where(x_neg, neg_three_quarter_pi, neg_quarter_pi), + coreai.broadcasting_where(x_neg, three_quarter_pi, quarter_pi), ) - base = coreai.atan(coreai.broadcasting_divide(y, x_safe)) - # Quadrant correction: x < 0 shifts the result by ±π. - x_neg = coreai.broadcasting_greater(zero, x) - y_neg = coreai.broadcasting_greater(zero, y) - y_pos = coreai.broadcasting_greater(y, zero) + # ── x = 0 branch ────────────────────────────────────────────────────────── + # x = +0: ±π/2 for strictly ±y, 0 when y = 0. + # x = -0: ±π for all y (y_neg covers y = -0.0 via the 1/y trick above). + y_pos_strict = coreai.broadcasting_greater(y, zero) + y_neg_strict = coreai.broadcasting_greater(zero, y) + pos_x_zero_result = coreai.broadcasting_where( + y_pos_strict, + half_pi, + coreai.broadcasting_where(y_neg_strict, neg_half_pi, zero), + ) + neg_x_zero_result = coreai.broadcasting_where(y_neg, neg_pi, pi) + zero_result = coreai.broadcasting_where( + x_is_neg_zero, neg_x_zero_result, pos_x_zero_result + ) + + # ── finite nonzero x branch ──────────────────────────────────────────────── + # Avoid division by zero: substitute x = 1 when x = 0; result discarded by + # the outer where-select. + x_safe = coreai.broadcasting_where(x_is_zero, one, x) + base = coreai.atan(coreai.broadcasting_divide(y, x_safe)) correction = coreai.broadcasting_where( y_neg, coreai.broadcasting_sub(base, pi), @@ -1568,13 +1622,9 @@ def replace_atan2(values_map: dict[str, Value], node: fx.Node, loc: Location) -> ) nonzero_result = coreai.broadcasting_where(x_neg, correction, base) - # x = 0: result is π/2, −π/2, or 0 based on sign of y. - zero_result = coreai.broadcasting_where( - y_pos, - half_pi, - coreai.broadcasting_where(y_neg, neg_half_pi, zero), - ) - return coreai.broadcasting_where(x_is_zero, zero_result, nonzero_result) + # ── combine ──────────────────────────────────────────────────────────────── + result = coreai.broadcasting_where(x_is_zero, zero_result, nonzero_result) + return coreai.broadcasting_where(both_inf, inf_result, result) def replace_gather(values_map: dict[str, Value], node: fx.Node, loc: Location) -> Value: diff --git a/tests/ops/test_ops.py b/tests/ops/test_ops.py index 1c9a370..b8f5dfb 100644 --- a/tests/ops/test_ops.py +++ b/tests/ops/test_ops.py @@ -652,6 +652,22 @@ async def test_broadcast_shapes(self) -> None: x = torch.randn(4) await validate_numerical_output(model=model, y=y, x=x) + async def test_signed_zeros(self) -> None: + """IEEE-754 signed-zero cases: atan2(-0, x) and atan2(y, -0).""" + model = self.Atan2Model().eval() + # y = -0.0 with various x signs + y = torch.tensor([-0.0, -0.0, -0.0, 0.0]) + x = torch.tensor([-1.0, 1.0, -0.0, -0.0]) + await validate_numerical_output(model=model, y=y, x=x) + + async def test_infinities(self) -> None: + """IEEE-754 both-infinite cases: atan2(±inf, ±inf) → ±π/4 or ±3π/4.""" + model = self.Atan2Model().eval() + inf = float("inf") + y = torch.tensor([inf, inf, -inf, -inf]) + x = torch.tensor([inf, -inf, inf, -inf]) + await validate_numerical_output(model=model, y=y, x=x) + @pytest.mark.parametrize( "x", diff --git a/tests/ops/test_ops_ir.py b/tests/ops/test_ops_ir.py index 435e5ee..ce6c7f9 100644 --- a/tests/ops/test_ops_ir.py +++ b/tests/ops/test_ops_ir.py @@ -1143,26 +1143,40 @@ def forward(self, y: Tensor, x: Tensor) -> Tensor: ir, check_file=""" // CHECK-LABEL: module { - // CHECK-NEXT: coreai.graph @main(%[[ARG0:.*]]: tensor<2x3xf32> {coreai.name = "y"}, %[[ARG1:.*]]: tensor<2x3xf32> {coreai.name = "x"}) -> (tensor<2x3xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} { - // CHECK-NEXT: %[[C1:.*]] = coreai.constant dense<1.000000e+00> : tensor - // CHECK-NEXT: %[[C0:.*]] = coreai.constant dense<0.000000e+00> : tensor - // CHECK-NEXT: %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor - // CHECK-NEXT: %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor - // CHECK-NEXT: %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor - // CHECK-NEXT: %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[ARG1]], %[[C0]] : (tensor<2x3xf32>, tensor) -> tensor<2x3xi1> - // CHECK-NEXT: %[[X_SAFE:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[C1]], %[[ARG1]] : (tensor<2x3xi1>, tensor, tensor<2x3xf32>) -> tensor<2x3xf32> - // CHECK-NEXT: %[[RATIO:.*]] = coreai.decomposable.broadcasting_divide %[[ARG0]], %[[X_SAFE]] : (tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32> - // CHECK-NEXT: %[[BASE:.*]] = coreai.atan %[[RATIO]] : tensor<2x3xf32> -> tensor<2x3xf32> - // CHECK-NEXT: %[[X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG1]] : (tensor, tensor<2x3xf32>) -> tensor<2x3xi1> - // CHECK-NEXT: %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG0]] : (tensor, tensor<2x3xf32>) -> tensor<2x3xi1> - // CHECK-NEXT: %[[Y_POS:.*]] = coreai.decomposable.broadcasting_greater %[[ARG0]], %[[C0]] : (tensor<2x3xf32>, tensor) -> tensor<2x3xi1> - // CHECK-NEXT: %[[BASE_MINUS_PI:.*]] = coreai.decomposable.broadcasting_sub %[[BASE]], %[[PI]] : (tensor<2x3xf32>, tensor) -> tensor<2x3xf32> - // CHECK-NEXT: %[[BASE_PLUS_PI:.*]] = coreai.decomposable.broadcasting_add %[[BASE]], %[[PI]] : (tensor<2x3xf32>, tensor) -> tensor<2x3xf32> - // CHECK-NEXT: %[[CORRECTION:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[BASE_MINUS_PI]], %[[BASE_PLUS_PI]] : (tensor<2x3xi1>, tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32> - // CHECK-NEXT: %[[NONZERO:.*]] = coreai.decomposable.broadcasting_where %[[X_NEG]], %[[CORRECTION]], %[[BASE]] : (tensor<2x3xi1>, tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32> - // CHECK-NEXT: %[[ZERO_NEG:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[NHPI]], %[[C0]] : (tensor<2x3xi1>, tensor, tensor) -> tensor<2x3xf32> - // CHECK-NEXT: %[[ZERO_RES:.*]] = coreai.decomposable.broadcasting_where %[[Y_POS]], %[[HPI]], %[[ZERO_NEG]] : (tensor<2x3xi1>, tensor, tensor<2x3xf32>) -> tensor<2x3xf32> - // CHECK-NEXT: %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[ZERO_RES]], %[[NONZERO]] : (tensor<2x3xi1>, tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32> + // CHECK-NEXT: coreai.graph @main(%[[Y:.*]]: tensor<2x3xf32> {coreai.name = "y"}, %[[X:.*]]: tensor<2x3xf32> {coreai.name = "x"}) -> (tensor<2x3xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} { + // CHECK: %[[NEG_INF:.*]] = coreai.constant dense<0xFF800000> : tensor + // CHECK: %[[POS_INF:.*]] = coreai.constant dense<0x7F800000> : tensor + // CHECK: %[[ZERO:.*]] = coreai.constant dense<0.000000e+00> : tensor + // CHECK: %[[ONE:.*]] = coreai.constant dense<1.000000e+00> : tensor + // CHECK: %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor + // CHECK: %[[NEG_PI:.*]] = coreai.constant dense<-3.14159274> : tensor + // CHECK: %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor + // CHECK: %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor + // CHECK: %[[QPI:.*]] = coreai.constant dense<0.785398185> : tensor + // CHECK: %[[NQPI:.*]] = coreai.constant dense<-0.785398185> : tensor + // CHECK: %[[THREEQPI:.*]] = coreai.constant dense<2.3561945> : tensor + // CHECK: %[[NTHREEQPI:.*]] = coreai.constant dense<-2.3561945> : tensor + // CHECK: %[[Y_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[ZERO]] + // CHECK: %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[ZERO]] + // CHECK: %[[Y_NEG_STRICT:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[Y]] + // CHECK: %[[RECIP_Y:.*]] = coreai.decomposable.broadcasting_divide %[[ONE]], %[[Y]] + // CHECK: %[[RECIP_Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[RECIP_Y]] + // CHECK: %[[Y_ZERO_NEG:.*]] = coreai.decomposable.broadcasting_and %[[Y_IS_ZERO]], %[[RECIP_Y_NEG]] + // CHECK: %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_or %[[Y_NEG_STRICT]], %[[Y_ZERO_NEG]] + // CHECK: %[[X_NEG_STRICT:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[X]] + // CHECK: %[[RECIP_X:.*]] = coreai.decomposable.broadcasting_divide %[[ONE]], %[[X]] + // CHECK: %[[RECIP_X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[RECIP_X]] + // CHECK: %[[X_ZERO_NEG:.*]] = coreai.decomposable.broadcasting_and %[[X_IS_ZERO]], %[[RECIP_X_NEG]] + // CHECK: %[[X_NEG:.*]] = coreai.decomposable.broadcasting_or %[[X_NEG_STRICT]], %[[X_ZERO_NEG]] + // CHECK: %[[X_IS_POS_INF:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[POS_INF]] + // CHECK: %[[X_IS_NEG_INF:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[NEG_INF]] + // CHECK: %[[X_IS_INF:.*]] = coreai.decomposable.broadcasting_or %[[X_IS_POS_INF]], %[[X_IS_NEG_INF]] + // CHECK: %[[Y_IS_POS_INF:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[POS_INF]] + // CHECK: %[[Y_IS_NEG_INF:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[NEG_INF]] + // CHECK: %[[Y_IS_INF:.*]] = coreai.decomposable.broadcasting_or %[[Y_IS_POS_INF]], %[[Y_IS_NEG_INF]] + // CHECK: %[[BOTH_INF:.*]] = coreai.decomposable.broadcasting_and %[[X_IS_INF]], %[[Y_IS_INF]] + // CHECK: %[[BASE:.*]] = coreai.atan + // CHECK: %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[BOTH_INF]], // CHECK-NEXT: coreai.output %[[RESULT]] : tensor<2x3xf32> // CHECK-NEXT: } // CHECK-NEXT: } @@ -1186,26 +1200,40 @@ def forward(self, y: Tensor, x: Tensor) -> Tensor: ir, check_file=""" // CHECK-LABEL: module { - // CHECK-NEXT: coreai.graph @main(%[[ARG0:.*]]: tensor {coreai.name = "y"}, %[[ARG1:.*]]: tensor {coreai.name = "x"}) -> (tensor {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} { - // CHECK-NEXT: %[[C1:.*]] = coreai.constant dense<1.000000e+00> : tensor - // CHECK-NEXT: %[[C0:.*]] = coreai.constant dense<0.000000e+00> : tensor - // CHECK-NEXT: %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor - // CHECK-NEXT: %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor - // CHECK-NEXT: %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor - // CHECK-NEXT: %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[ARG1]], %[[C0]] : (tensor, tensor) -> tensor - // CHECK-NEXT: %[[X_SAFE:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[C1]], %[[ARG1]] : (tensor, tensor, tensor) -> tensor - // CHECK-NEXT: %[[RATIO:.*]] = coreai.decomposable.broadcasting_divide %[[ARG0]], %[[X_SAFE]] : (tensor, tensor) -> tensor - // CHECK-NEXT: %[[BASE:.*]] = coreai.atan %[[RATIO]] : tensor -> tensor - // CHECK-NEXT: %[[X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG1]] : (tensor, tensor) -> tensor - // CHECK-NEXT: %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG0]] : (tensor, tensor) -> tensor - // CHECK-NEXT: %[[Y_POS:.*]] = coreai.decomposable.broadcasting_greater %[[ARG0]], %[[C0]] : (tensor, tensor) -> tensor - // CHECK-NEXT: %[[BASE_MINUS_PI:.*]] = coreai.decomposable.broadcasting_sub %[[BASE]], %[[PI]] : (tensor, tensor) -> tensor - // CHECK-NEXT: %[[BASE_PLUS_PI:.*]] = coreai.decomposable.broadcasting_add %[[BASE]], %[[PI]] : (tensor, tensor) -> tensor - // CHECK-NEXT: %[[CORRECTION:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[BASE_MINUS_PI]], %[[BASE_PLUS_PI]] : (tensor, tensor, tensor) -> tensor - // CHECK-NEXT: %[[NONZERO:.*]] = coreai.decomposable.broadcasting_where %[[X_NEG]], %[[CORRECTION]], %[[BASE]] : (tensor, tensor, tensor) -> tensor - // CHECK-NEXT: %[[ZERO_NEG:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[NHPI]], %[[C0]] : (tensor, tensor, tensor) -> tensor - // CHECK-NEXT: %[[ZERO_RES:.*]] = coreai.decomposable.broadcasting_where %[[Y_POS]], %[[HPI]], %[[ZERO_NEG]] : (tensor, tensor, tensor) -> tensor - // CHECK-NEXT: %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[ZERO_RES]], %[[NONZERO]] : (tensor, tensor, tensor) -> tensor + // CHECK-NEXT: coreai.graph @main(%[[Y:.*]]: tensor {coreai.name = "y"}, %[[X:.*]]: tensor {coreai.name = "x"}) -> (tensor {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} { + // CHECK: %[[NEG_INF:.*]] = coreai.constant dense<0xFF800000> : tensor + // CHECK: %[[POS_INF:.*]] = coreai.constant dense<0x7F800000> : tensor + // CHECK: %[[ZERO:.*]] = coreai.constant dense<0.000000e+00> : tensor + // CHECK: %[[ONE:.*]] = coreai.constant dense<1.000000e+00> : tensor + // CHECK: %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor + // CHECK: %[[NEG_PI:.*]] = coreai.constant dense<-3.14159274> : tensor + // CHECK: %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor + // CHECK: %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor + // CHECK: %[[QPI:.*]] = coreai.constant dense<0.785398185> : tensor + // CHECK: %[[NQPI:.*]] = coreai.constant dense<-0.785398185> : tensor + // CHECK: %[[THREEQPI:.*]] = coreai.constant dense<2.3561945> : tensor + // CHECK: %[[NTHREEQPI:.*]] = coreai.constant dense<-2.3561945> : tensor + // CHECK: %[[Y_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[ZERO]] + // CHECK: %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[ZERO]] + // CHECK: %[[Y_NEG_STRICT:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[Y]] + // CHECK: %[[RECIP_Y:.*]] = coreai.decomposable.broadcasting_divide %[[ONE]], %[[Y]] + // CHECK: %[[RECIP_Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[RECIP_Y]] + // CHECK: %[[Y_ZERO_NEG:.*]] = coreai.decomposable.broadcasting_and %[[Y_IS_ZERO]], %[[RECIP_Y_NEG]] + // CHECK: %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_or %[[Y_NEG_STRICT]], %[[Y_ZERO_NEG]] + // CHECK: %[[X_NEG_STRICT:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[X]] + // CHECK: %[[RECIP_X:.*]] = coreai.decomposable.broadcasting_divide %[[ONE]], %[[X]] + // CHECK: %[[RECIP_X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[RECIP_X]] + // CHECK: %[[X_ZERO_NEG:.*]] = coreai.decomposable.broadcasting_and %[[X_IS_ZERO]], %[[RECIP_X_NEG]] + // CHECK: %[[X_NEG:.*]] = coreai.decomposable.broadcasting_or %[[X_NEG_STRICT]], %[[X_ZERO_NEG]] + // CHECK: %[[X_IS_POS_INF:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[POS_INF]] + // CHECK: %[[X_IS_NEG_INF:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[NEG_INF]] + // CHECK: %[[X_IS_INF:.*]] = coreai.decomposable.broadcasting_or %[[X_IS_POS_INF]], %[[X_IS_NEG_INF]] + // CHECK: %[[Y_IS_POS_INF:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[POS_INF]] + // CHECK: %[[Y_IS_NEG_INF:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[NEG_INF]] + // CHECK: %[[Y_IS_INF:.*]] = coreai.decomposable.broadcasting_or %[[Y_IS_POS_INF]], %[[Y_IS_NEG_INF]] + // CHECK: %[[BOTH_INF:.*]] = coreai.decomposable.broadcasting_and %[[X_IS_INF]], %[[Y_IS_INF]] + // CHECK: %[[BASE:.*]] = coreai.atan + // CHECK: %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[BOTH_INF]], // CHECK-NEXT: coreai.output %[[RESULT]] : tensor // CHECK-NEXT: } // CHECK-NEXT: } @@ -1222,26 +1250,14 @@ def forward(self, y: Tensor, x: Tensor) -> Tensor: ir, check_file=""" // CHECK-LABEL: module { - // CHECK-NEXT: coreai.graph @main(%[[ARG0:.*]]: tensor<4xf32> {coreai.name = "y"}, %[[ARG1:.*]]: tensor<4xf32> {coreai.name = "x"}) -> (tensor<4xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} { - // CHECK-NEXT: %[[C1:.*]] = coreai.constant dense<1.000000e+00> : tensor - // CHECK-NEXT: %[[C0:.*]] = coreai.constant dense<0.000000e+00> : tensor - // CHECK-NEXT: %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor - // CHECK-NEXT: %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor - // CHECK-NEXT: %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor - // CHECK-NEXT: %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[ARG1]], %[[C0]] : (tensor<4xf32>, tensor) -> tensor<4xi1> - // CHECK-NEXT: %[[X_SAFE:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[C1]], %[[ARG1]] : (tensor<4xi1>, tensor, tensor<4xf32>) -> tensor<4xf32> - // CHECK-NEXT: %[[RATIO:.*]] = coreai.decomposable.broadcasting_divide %[[ARG0]], %[[X_SAFE]] : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> - // CHECK-NEXT: %[[BASE:.*]] = coreai.atan %[[RATIO]] : tensor<4xf32> -> tensor<4xf32> - // CHECK-NEXT: %[[X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG1]] : (tensor, tensor<4xf32>) -> tensor<4xi1> - // CHECK-NEXT: %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[C0]], %[[ARG0]] : (tensor, tensor<4xf32>) -> tensor<4xi1> - // CHECK-NEXT: %[[Y_POS:.*]] = coreai.decomposable.broadcasting_greater %[[ARG0]], %[[C0]] : (tensor<4xf32>, tensor) -> tensor<4xi1> - // CHECK-NEXT: %[[BASE_MINUS_PI:.*]] = coreai.decomposable.broadcasting_sub %[[BASE]], %[[PI]] : (tensor<4xf32>, tensor) -> tensor<4xf32> - // CHECK-NEXT: %[[BASE_PLUS_PI:.*]] = coreai.decomposable.broadcasting_add %[[BASE]], %[[PI]] : (tensor<4xf32>, tensor) -> tensor<4xf32> - // CHECK-NEXT: %[[CORRECTION:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[BASE_MINUS_PI]], %[[BASE_PLUS_PI]] : (tensor<4xi1>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> - // CHECK-NEXT: %[[NONZERO:.*]] = coreai.decomposable.broadcasting_where %[[X_NEG]], %[[CORRECTION]], %[[BASE]] : (tensor<4xi1>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> - // CHECK-NEXT: %[[ZERO_NEG:.*]] = coreai.decomposable.broadcasting_where %[[Y_NEG]], %[[NHPI]], %[[C0]] : (tensor<4xi1>, tensor, tensor) -> tensor<4xf32> - // CHECK-NEXT: %[[ZERO_RES:.*]] = coreai.decomposable.broadcasting_where %[[Y_POS]], %[[HPI]], %[[ZERO_NEG]] : (tensor<4xi1>, tensor, tensor<4xf32>) -> tensor<4xf32> - // CHECK-NEXT: %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[X_IS_ZERO]], %[[ZERO_RES]], %[[NONZERO]] : (tensor<4xi1>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> + // CHECK-NEXT: coreai.graph @main(%[[Y:.*]]: tensor<4xf32> {coreai.name = "y"}, %[[X:.*]]: tensor<4xf32> {coreai.name = "x"}) -> (tensor<4xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} { + // CHECK: %[[ZERO:.*]] = coreai.constant dense<0.000000e+00> : tensor + // CHECK: %[[ONE:.*]] = coreai.constant dense<1.000000e+00> : tensor + // CHECK: %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_or + // CHECK: %[[X_NEG:.*]] = coreai.decomposable.broadcasting_or + // CHECK: %[[BOTH_INF:.*]] = coreai.decomposable.broadcasting_and + // CHECK: %[[BASE:.*]] = coreai.atan + // CHECK: %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[BOTH_INF]], // CHECK-NEXT: coreai.output %[[RESULT]] : tensor<4xf32> // CHECK-NEXT: } // CHECK-NEXT: }