diff --git a/coreai_torch/_aten_to_core.py b/coreai_torch/_aten_to_core.py index 433e27a..93b042c 100644 --- a/coreai_torch/_aten_to_core.py +++ b/coreai_torch/_aten_to_core.py @@ -1522,6 +1522,111 @@ def replace_argmax(values_map: dict[str, Value], node: fx.Node, loc: Location) - return result if keepdim else coreai.shrink_dims(result, [dim]) +def replace_atan2(values_map: dict[str, Value], node: fx.Node, loc: Location) -> Value: + """Lower atan2(y, x) using atan(y/x) with quadrant correction. + + CoreAI has no native atan2, so it is decomposed as: + - x != 0, finite: atan(y/x) adjusted by ±π for the correct quadrant. + - x == +0: ±π/2 for non-zero y, 0 for y = 0. + - x == -0: ±π for all y (including ±0 → ±π per IEEE-754). + - both infinite: ±π/4 or ±3π/4 per IEEE-754. + + Signed-zero handling: IEEE-754 treats -0.0 as distinct from +0.0 for atan2 + (e.g. atan2(-0, -1) = -π, not +π). The 1/v trick — 1/-0.0 = -inf — is used + to detect the sign bit of zero inputs so that y_neg and x_neg are correct + for -0.0 inputs without misclassifying ±inf (which use the strict > path). + + When x=0, x is replaced with 1 before the divide solely to avoid NaN/inf; that + intermediate result is discarded by the final where-select. + atan2(0, 0) = 0 by convention. + """ + y, x = _get_operands(values_map, node, [0, 1]) + ele_type = x.type.element_type + + zero = coreai.constant(0.0, dtype=ele_type) + one = coreai.constant(1.0, dtype=ele_type) + pi = coreai.constant(np.pi, dtype=ele_type) + neg_pi = coreai.constant(-np.pi, dtype=ele_type) + half_pi = coreai.constant(np.pi / 2.0, dtype=ele_type) + neg_half_pi = coreai.constant(-np.pi / 2.0, dtype=ele_type) + quarter_pi = coreai.constant(np.pi / 4.0, dtype=ele_type) + neg_quarter_pi = coreai.constant(-np.pi / 4.0, dtype=ele_type) + three_quarter_pi = coreai.constant(3.0 * np.pi / 4.0, dtype=ele_type) + neg_three_quarter_pi = coreai.constant(-3.0 * np.pi / 4.0, dtype=ele_type) + + # ── signed-zero-aware sign predicates ───────────────────────────────────── + # 1 / -0.0 = -inf (IEEE-754), so (0 > 1/v) is True iff v = -0.0. Combine with + # the strict > predicate (handles ±inf and non-zero finites) via OR. + y_is_zero = coreai.broadcasting_equal(y, zero) + x_is_zero = coreai.broadcasting_equal(x, zero) + y_neg = coreai.broadcasting_or( + coreai.broadcasting_greater(zero, y), + coreai.broadcasting_and( + y_is_zero, + coreai.broadcasting_greater(zero, coreai.broadcasting_divide(one, y)), + ), + ) + x_neg = coreai.broadcasting_or( + coreai.broadcasting_greater(zero, x), + coreai.broadcasting_and( + x_is_zero, + coreai.broadcasting_greater(zero, coreai.broadcasting_divide(one, x)), + ), + ) + x_is_neg_zero = coreai.broadcasting_and( + x_is_zero, + coreai.broadcasting_greater(zero, coreai.broadcasting_divide(one, x)), + ) + + # ── both-infinite branch ────────────────────────────────────────────────── + # atan(inf/inf) = atan(NaN) = NaN; handle before the divide. + pos_inf = coreai.constant(float("inf"), dtype=ele_type) + neg_inf = coreai.constant(float("-inf"), dtype=ele_type) + x_is_inf = coreai.broadcasting_or( + coreai.broadcasting_equal(x, pos_inf), coreai.broadcasting_equal(x, neg_inf) + ) + y_is_inf = coreai.broadcasting_or( + coreai.broadcasting_equal(y, pos_inf), coreai.broadcasting_equal(y, neg_inf) + ) + both_inf = coreai.broadcasting_and(x_is_inf, y_is_inf) + inf_result = coreai.broadcasting_where( + y_neg, + coreai.broadcasting_where(x_neg, neg_three_quarter_pi, neg_quarter_pi), + coreai.broadcasting_where(x_neg, three_quarter_pi, quarter_pi), + ) + + # ── x = 0 branch ────────────────────────────────────────────────────────── + # x = +0: ±π/2 for strictly ±y, 0 when y = 0. + # x = -0: ±π for all y (y_neg covers y = -0.0 via the 1/y trick above). + y_pos_strict = coreai.broadcasting_greater(y, zero) + y_neg_strict = coreai.broadcasting_greater(zero, y) + pos_x_zero_result = coreai.broadcasting_where( + y_pos_strict, + half_pi, + coreai.broadcasting_where(y_neg_strict, neg_half_pi, zero), + ) + neg_x_zero_result = coreai.broadcasting_where(y_neg, neg_pi, pi) + zero_result = coreai.broadcasting_where( + x_is_neg_zero, neg_x_zero_result, pos_x_zero_result + ) + + # ── finite nonzero x branch ──────────────────────────────────────────────── + # Avoid division by zero: substitute x = 1 when x = 0; result discarded by + # the outer where-select. + x_safe = coreai.broadcasting_where(x_is_zero, one, x) + base = coreai.atan(coreai.broadcasting_divide(y, x_safe)) + correction = coreai.broadcasting_where( + y_neg, + coreai.broadcasting_sub(base, pi), + coreai.broadcasting_add(base, pi), + ) + nonzero_result = coreai.broadcasting_where(x_neg, correction, base) + + # ── combine ──────────────────────────────────────────────────────────────── + result = coreai.broadcasting_where(x_is_zero, zero_result, nonzero_result) + return coreai.broadcasting_where(both_inf, inf_result, result) + + def replace_gather(values_map: dict[str, Value], node: fx.Node, loc: Location) -> Value: """Converts aten.gather to coreai.gather_along_axis.""" x, index = _get_operands(values_map, node, [0, 2]) @@ -3440,6 +3545,7 @@ def sdpa_maskless(q: Value, k: Value, v: Value) -> Value: "asin.default": replace_unary_ops, "asinh.default": replace_unary_ops, "atan.default": replace_unary_ops, + "atan2.default": replace_atan2, "atanh.default": replace_unary_ops, "_adaptive_avg_pool2d.default": replace_adaptive_avg_pool2d, "_unsafe_view.default": replace_view, diff --git a/docs/coreai-core/tutorials/construct-a-graph.ipynb b/docs/coreai-core/tutorials/construct-a-graph.ipynb index a10b05d..86f0acc 100644 --- a/docs/coreai-core/tutorials/construct-a-graph.ipynb +++ b/docs/coreai-core/tutorials/construct-a-graph.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "intro", + "id": "0", "metadata": {}, "source": [ "# Constructing a CoreAI Graph\n", @@ -25,7 +25,7 @@ }, { "cell_type": "markdown", - "id": "warning", + "id": "1", "metadata": {}, "source": [ ":::{warning}\n", @@ -39,7 +39,7 @@ }, { "cell_type": "markdown", - "id": "setup-md", + "id": "2", "metadata": {}, "source": [ "## Setup\n", @@ -50,25 +50,10 @@ }, { "cell_type": "code", - "execution_count": 1, - "id": "imports", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:09.232797Z", - "iopub.status.busy": "2026-06-04T23:13:09.232330Z", - "iopub.status.idle": "2026-06-04T23:13:12.445622Z", - "shell.execute_reply": "2026-06-04T23:13:12.444678Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Dev installation detected. Using local Core AI Framework.\n" - ] - } - ], + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], "source": [ "import shutil\n", "from pathlib import Path\n", @@ -84,7 +69,7 @@ }, { "cell_type": "markdown", - "id": "specs-md", + "id": "4", "metadata": {}, "source": [ "## Describe the inputs and outputs\n", @@ -99,26 +84,10 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "specs", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.447443Z", - "iopub.status.busy": "2026-06-04T23:13:12.447294Z", - "iopub.status.idle": "2026-06-04T23:13:12.451834Z", - "shell.execute_reply": "2026-06-04T23:13:12.451081Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "input: tensor<2x3xf32>\n", - "output: tensor<2x3xf32>\n" - ] - } - ], + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], "source": [ "input_spec = TensorSpec(shape=[2, 3], dtype=np.float32)\n", "output_spec = TensorSpec(shape=[2, 3], dtype=np.float32, name=\"y\")\n", @@ -129,7 +98,7 @@ }, { "cell_type": "markdown", - "id": "build-md", + "id": "6", "metadata": {}, "source": [ "## Build the graph\n", @@ -145,25 +114,10 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "build", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.453228Z", - "iopub.status.busy": "2026-06-04T23:13:12.453111Z", - "iopub.status.idle": "2026-06-04T23:13:12.470623Z", - "shell.execute_reply": "2026-06-04T23:13:12.470201Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Module verified.\n" - ] - } - ], + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], "source": [ "module = Module.create()\n", "with module:\n", @@ -181,7 +135,7 @@ }, { "cell_type": "markdown", - "id": "program-md", + "id": "8", "metadata": {}, "source": [ "## Wrap in an `AIProgram`\n", @@ -192,16 +146,9 @@ }, { "cell_type": "code", - "execution_count": 4, - "id": "program", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.472009Z", - "iopub.status.busy": "2026-06-04T23:13:12.471916Z", - "iopub.status.idle": "2026-06-04T23:13:12.474216Z", - "shell.execute_reply": "2026-06-04T23:13:12.473635Z" - } - }, + "execution_count": null, + "id": "9", + "metadata": {}, "outputs": [], "source": [ "program = AIProgram(module)" @@ -209,7 +156,7 @@ }, { "cell_type": "markdown", - "id": "save-md", + "id": "10", "metadata": {}, "source": [ "## Save as an `.aimodel`\n", @@ -222,25 +169,10 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "save", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.475505Z", - "iopub.status.busy": "2026-06-04T23:13:12.475388Z", - "iopub.status.idle": "2026-06-04T23:13:12.479489Z", - "shell.execute_reply": "2026-06-04T23:13:12.479034Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "saved to: hello-graph.aimodel\n" - ] - } - ], + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], "source": [ "asset_path = Path(\"./hello-graph.aimodel\")\n", "if asset_path.exists():\n", @@ -252,7 +184,7 @@ }, { "cell_type": "markdown", - "id": "inspect-md", + "id": "12", "metadata": {}, "source": [ "## Inspect the saved asset\n", @@ -263,26 +195,10 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "inspect", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.480788Z", - "iopub.status.busy": "2026-06-04T23:13:12.480705Z", - "iopub.status.idle": "2026-06-04T23:13:12.483398Z", - "shell.execute_reply": "2026-06-04T23:13:12.483007Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "contents: ['main.hash', 'main.mlirb', 'metadata.json']\n", - "total size: 446 bytes\n" - ] - } - ], + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], "source": [ "files = sorted(p.name for p in asset_path.iterdir())\n", "total_bytes = sum(p.stat().st_size for p in asset_path.rglob(\"*\") if p.is_file())\n", @@ -293,7 +209,7 @@ }, { "cell_type": "markdown", - "id": "validate-md", + "id": "14", "metadata": {}, "source": [ "## Validate that the asset is loadable\n", @@ -305,25 +221,10 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "validate", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.484574Z", - "iopub.status.busy": "2026-06-04T23:13:12.484499Z", - "iopub.status.idle": "2026-06-04T23:13:12.498350Z", - "shell.execute_reply": "2026-06-04T23:13:12.497839Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "OK\n" - ] - } - ], + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], "source": [ "reloaded = AIModelAsset.load(asset_path)\n", "async with reloaded.executable() as model:\n", diff --git a/docs/coreai-core/tutorials/run-an-aimodel.ipynb b/docs/coreai-core/tutorials/run-an-aimodel.ipynb index 4d878f9..7e23884 100644 --- a/docs/coreai-core/tutorials/run-an-aimodel.ipynb +++ b/docs/coreai-core/tutorials/run-an-aimodel.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "intro", + "id": "0", "metadata": {}, "source": [ "# Running an `.aimodel` with `coreai.runtime`\n", @@ -29,7 +29,7 @@ }, { "cell_type": "markdown", - "id": "setup-md", + "id": "1", "metadata": {}, "source": [ "## Setup\n", @@ -41,25 +41,10 @@ }, { "cell_type": "code", - "execution_count": 1, - "id": "imports", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:09.233244Z", - "iopub.status.busy": "2026-06-04T23:13:09.232818Z", - "iopub.status.idle": "2026-06-04T23:13:12.445952Z", - "shell.execute_reply": "2026-06-04T23:13:12.444672Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Dev installation detected. Using local Core AI Framework.\n" - ] - } - ], + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], "source": [ "from pathlib import Path\n", "\n", @@ -72,7 +57,7 @@ }, { "cell_type": "markdown", - "id": "ensure-md", + "id": "3", "metadata": {}, "source": [ "### Ensure `hello.aimodel` exists\n", @@ -84,25 +69,10 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "ensure", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.447569Z", - "iopub.status.busy": "2026-06-04T23:13:12.447413Z", - "iopub.status.idle": "2026-06-04T23:13:12.473679Z", - "shell.execute_reply": "2026-06-04T23:13:12.473128Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "created hello-run.aimodel\n" - ] - } - ], + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], "source": [ "from shutil import rmtree\n", "from typing import Annotated\n", @@ -132,7 +102,7 @@ }, { "cell_type": "markdown", - "id": "open-asset-md", + "id": "5", "metadata": {}, "source": [ "## Open the asset\n", @@ -155,16 +125,9 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "open-asset", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.475086Z", - "iopub.status.busy": "2026-06-04T23:13:12.474967Z", - "iopub.status.idle": "2026-06-04T23:13:12.477296Z", - "shell.execute_reply": "2026-06-04T23:13:12.476906Z" - } - }, + "execution_count": null, + "id": "6", + "metadata": {}, "outputs": [], "source": [ "asset = AIModelAsset.load(asset_path)" @@ -172,7 +135,7 @@ }, { "cell_type": "markdown", - "id": "run-md", + "id": "7", "metadata": {}, "source": [ "## Open the model and run inference\n", @@ -195,32 +158,10 @@ }, { "cell_type": "code", - "execution_count": 4, - "id": "run", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.478555Z", - "iopub.status.busy": "2026-06-04T23:13:12.478462Z", - "iopub.status.idle": "2026-06-04T23:13:12.505717Z", - "shell.execute_reply": "2026-06-04T23:13:12.505274Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "functions: ['main']\n", - "name: main\n", - "inputs: ['x']\n", - "outputs: ['y']\n", - "input x:\n", - "[[1.5 1.5 1.5]\n", - " [1.5 1.5 1.5]]\n", - "output keys: ['y']\n" - ] - } - ], + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], "source": [ "async with asset.executable() as model:\n", " print(f\"functions: {model.function_names}\")\n", @@ -245,7 +186,7 @@ }, { "cell_type": "markdown", - "id": "inspect-md", + "id": "9", "metadata": {}, "source": [ "## Inspect the output\n", @@ -258,30 +199,10 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "inspect", - "metadata": { - "execution": { - "iopub.execute_input": "2026-06-04T23:13:12.506908Z", - "iopub.status.busy": "2026-06-04T23:13:12.506825Z", - "iopub.status.idle": "2026-06-04T23:13:12.509068Z", - "shell.execute_reply": "2026-06-04T23:13:12.508740Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "shape: (2, 3)\n", - "dtype: float32\n", - "value:\n", - "[[3. 3. 3.]\n", - " [3. 3. 3.]]\n", - "OK — inference produced expected output shape and dtype\n" - ] - } - ], + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], "source": [ "print(f\"shape: {result.shape}\")\n", "print(f\"dtype: {result.dtype}\")\n", @@ -294,7 +215,7 @@ }, { "cell_type": "markdown", - "id": "advanced-md", + "id": "11", "metadata": {}, "source": [ "## What's next\n", diff --git a/tests/ops/test_ops.py b/tests/ops/test_ops.py index 6e85b7b..b8f5dfb 100644 --- a/tests/ops/test_ops.py +++ b/tests/ops/test_ops.py @@ -595,6 +595,80 @@ def forward(self, x: Tensor) -> Tensor: await validate_numerical_output(model=model, x=x, dynamic_shapes=dynamic_shapes) +class TestAtan2: + """Tests for torch.atan2(y, x) — angle from the positive x-axis to the point (x, y).""" + + class Atan2Model(nn.Module): + def forward(self, y: Tensor, x: Tensor) -> Tensor: + return torch.atan2(y, x) + + @pytest.mark.parametrize("dynamic", [False, True]) + @pytest.mark.parametrize( + "shape", + [ + (4,), + (3, 4), + (2, 3, 4), + ], + ) + @pytest.mark.parametrize("dtype", [torch.float32, torch.float16]) + async def test_basic( + self, shape: tuple[int, ...], dtype: torch.dtype, dynamic: bool + ) -> None: + model = self.Atan2Model().eval() + y = torch.randn(shape, dtype=dtype) + x = torch.randn(shape, dtype=dtype) + dynamic_shapes = ( + {"y": _all_dims_dynamic(y), "x": _all_dims_dynamic(x)} if dynamic else None + ) + await validate_numerical_output( + model=model, y=y, x=x, dynamic_shapes=dynamic_shapes + ) + + async def test_x_zero(self) -> None: + """x = 0 should yield ±π/2 depending on sign of y; (0, 0) → 0 by convention.""" + model = self.Atan2Model().eval() + y = torch.tensor([1.0, -1.0, 2.0, -2.0, 0.0]) + x = torch.zeros(5) + await validate_numerical_output(model=model, y=y, x=x) + + async def test_y_zero(self) -> None: + """y = 0 with x > 0 → 0, x < 0 → π.""" + model = self.Atan2Model().eval() + y = torch.zeros(4) + x = torch.tensor([1.0, -1.0, 2.0, -2.0]) + await validate_numerical_output(model=model, y=y, x=x) + + async def test_all_quadrants(self) -> None: + """Cover all four quadrants and axes.""" + model = self.Atan2Model().eval() + y = torch.tensor([1.0, 1.0, -1.0, -1.0, 0.0, 0.0, 1.0, -1.0]) + x = torch.tensor([1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 0.0, 0.0]) + await validate_numerical_output(model=model, y=y, x=x) + + async def test_broadcast_shapes(self) -> None: + model = self.Atan2Model().eval() + y = torch.randn(3, 4) + x = torch.randn(4) + await validate_numerical_output(model=model, y=y, x=x) + + async def test_signed_zeros(self) -> None: + """IEEE-754 signed-zero cases: atan2(-0, x) and atan2(y, -0).""" + model = self.Atan2Model().eval() + # y = -0.0 with various x signs + y = torch.tensor([-0.0, -0.0, -0.0, 0.0]) + x = torch.tensor([-1.0, 1.0, -0.0, -0.0]) + await validate_numerical_output(model=model, y=y, x=x) + + async def test_infinities(self) -> None: + """IEEE-754 both-infinite cases: atan2(±inf, ±inf) → ±π/4 or ±3π/4.""" + model = self.Atan2Model().eval() + inf = float("inf") + y = torch.tensor([inf, inf, -inf, -inf]) + x = torch.tensor([inf, -inf, inf, -inf]) + await validate_numerical_output(model=model, y=y, x=x) + + @pytest.mark.parametrize( "x", [ diff --git a/tests/ops/test_ops_ir.py b/tests/ops/test_ops_ir.py index 50c0786..ce6c7f9 100644 --- a/tests/ops/test_ops_ir.py +++ b/tests/ops/test_ops_ir.py @@ -1132,6 +1132,139 @@ def forward(self, x: Tensor) -> Tensor: ) +class TestAtan2IR: + def test_static(self) -> None: + class Atan2Model(nn.Module): + def forward(self, y: Tensor, x: Tensor) -> Tensor: + return torch.atan2(y, x) + + ir = get_ir(Atan2Model().eval(), y=torch.rand(2, 3), x=torch.rand(2, 3)) + filecheck_pattern( + ir, + check_file=""" + // CHECK-LABEL: module { + // CHECK-NEXT: coreai.graph @main(%[[Y:.*]]: tensor<2x3xf32> {coreai.name = "y"}, %[[X:.*]]: tensor<2x3xf32> {coreai.name = "x"}) -> (tensor<2x3xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} { + // CHECK: %[[NEG_INF:.*]] = coreai.constant dense<0xFF800000> : tensor + // CHECK: %[[POS_INF:.*]] = coreai.constant dense<0x7F800000> : tensor + // CHECK: %[[ZERO:.*]] = coreai.constant dense<0.000000e+00> : tensor + // CHECK: %[[ONE:.*]] = coreai.constant dense<1.000000e+00> : tensor + // CHECK: %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor + // CHECK: %[[NEG_PI:.*]] = coreai.constant dense<-3.14159274> : tensor + // CHECK: %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor + // CHECK: %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor + // CHECK: %[[QPI:.*]] = coreai.constant dense<0.785398185> : tensor + // CHECK: %[[NQPI:.*]] = coreai.constant dense<-0.785398185> : tensor + // CHECK: %[[THREEQPI:.*]] = coreai.constant dense<2.3561945> : tensor + // CHECK: %[[NTHREEQPI:.*]] = coreai.constant dense<-2.3561945> : tensor + // CHECK: %[[Y_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[ZERO]] + // CHECK: %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[ZERO]] + // CHECK: %[[Y_NEG_STRICT:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[Y]] + // CHECK: %[[RECIP_Y:.*]] = coreai.decomposable.broadcasting_divide %[[ONE]], %[[Y]] + // CHECK: %[[RECIP_Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[RECIP_Y]] + // CHECK: %[[Y_ZERO_NEG:.*]] = coreai.decomposable.broadcasting_and %[[Y_IS_ZERO]], %[[RECIP_Y_NEG]] + // CHECK: %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_or %[[Y_NEG_STRICT]], %[[Y_ZERO_NEG]] + // CHECK: %[[X_NEG_STRICT:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[X]] + // CHECK: %[[RECIP_X:.*]] = coreai.decomposable.broadcasting_divide %[[ONE]], %[[X]] + // CHECK: %[[RECIP_X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[RECIP_X]] + // CHECK: %[[X_ZERO_NEG:.*]] = coreai.decomposable.broadcasting_and %[[X_IS_ZERO]], %[[RECIP_X_NEG]] + // CHECK: %[[X_NEG:.*]] = coreai.decomposable.broadcasting_or %[[X_NEG_STRICT]], %[[X_ZERO_NEG]] + // CHECK: %[[X_IS_POS_INF:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[POS_INF]] + // CHECK: %[[X_IS_NEG_INF:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[NEG_INF]] + // CHECK: %[[X_IS_INF:.*]] = coreai.decomposable.broadcasting_or %[[X_IS_POS_INF]], %[[X_IS_NEG_INF]] + // CHECK: %[[Y_IS_POS_INF:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[POS_INF]] + // CHECK: %[[Y_IS_NEG_INF:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[NEG_INF]] + // CHECK: %[[Y_IS_INF:.*]] = coreai.decomposable.broadcasting_or %[[Y_IS_POS_INF]], %[[Y_IS_NEG_INF]] + // CHECK: %[[BOTH_INF:.*]] = coreai.decomposable.broadcasting_and %[[X_IS_INF]], %[[Y_IS_INF]] + // CHECK: %[[BASE:.*]] = coreai.atan + // CHECK: %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[BOTH_INF]], + // CHECK-NEXT: coreai.output %[[RESULT]] : tensor<2x3xf32> + // CHECK-NEXT: } + // CHECK-NEXT: } + """, + ) + + def test_dynamic(self) -> None: + class Atan2Model(nn.Module): + def forward(self, y: Tensor, x: Tensor) -> Tensor: + return torch.atan2(y, x) + + y = torch.rand(2, 3) + x = torch.rand(2, 3) + ir = get_ir( + Atan2Model().eval(), + y=y, + x=x, + dynamic_shapes={"y": _all_dims_dynamic(y), "x": _all_dims_dynamic(x)}, + ) + filecheck_pattern( + ir, + check_file=""" + // CHECK-LABEL: module { + // CHECK-NEXT: coreai.graph @main(%[[Y:.*]]: tensor {coreai.name = "y"}, %[[X:.*]]: tensor {coreai.name = "x"}) -> (tensor {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} { + // CHECK: %[[NEG_INF:.*]] = coreai.constant dense<0xFF800000> : tensor + // CHECK: %[[POS_INF:.*]] = coreai.constant dense<0x7F800000> : tensor + // CHECK: %[[ZERO:.*]] = coreai.constant dense<0.000000e+00> : tensor + // CHECK: %[[ONE:.*]] = coreai.constant dense<1.000000e+00> : tensor + // CHECK: %[[PI:.*]] = coreai.constant dense<3.14159274> : tensor + // CHECK: %[[NEG_PI:.*]] = coreai.constant dense<-3.14159274> : tensor + // CHECK: %[[HPI:.*]] = coreai.constant dense<1.57079637> : tensor + // CHECK: %[[NHPI:.*]] = coreai.constant dense<-1.57079637> : tensor + // CHECK: %[[QPI:.*]] = coreai.constant dense<0.785398185> : tensor + // CHECK: %[[NQPI:.*]] = coreai.constant dense<-0.785398185> : tensor + // CHECK: %[[THREEQPI:.*]] = coreai.constant dense<2.3561945> : tensor + // CHECK: %[[NTHREEQPI:.*]] = coreai.constant dense<-2.3561945> : tensor + // CHECK: %[[Y_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[ZERO]] + // CHECK: %[[X_IS_ZERO:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[ZERO]] + // CHECK: %[[Y_NEG_STRICT:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[Y]] + // CHECK: %[[RECIP_Y:.*]] = coreai.decomposable.broadcasting_divide %[[ONE]], %[[Y]] + // CHECK: %[[RECIP_Y_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[RECIP_Y]] + // CHECK: %[[Y_ZERO_NEG:.*]] = coreai.decomposable.broadcasting_and %[[Y_IS_ZERO]], %[[RECIP_Y_NEG]] + // CHECK: %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_or %[[Y_NEG_STRICT]], %[[Y_ZERO_NEG]] + // CHECK: %[[X_NEG_STRICT:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[X]] + // CHECK: %[[RECIP_X:.*]] = coreai.decomposable.broadcasting_divide %[[ONE]], %[[X]] + // CHECK: %[[RECIP_X_NEG:.*]] = coreai.decomposable.broadcasting_greater %[[ZERO]], %[[RECIP_X]] + // CHECK: %[[X_ZERO_NEG:.*]] = coreai.decomposable.broadcasting_and %[[X_IS_ZERO]], %[[RECIP_X_NEG]] + // CHECK: %[[X_NEG:.*]] = coreai.decomposable.broadcasting_or %[[X_NEG_STRICT]], %[[X_ZERO_NEG]] + // CHECK: %[[X_IS_POS_INF:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[POS_INF]] + // CHECK: %[[X_IS_NEG_INF:.*]] = coreai.decomposable.broadcasting_equal %[[X]], %[[NEG_INF]] + // CHECK: %[[X_IS_INF:.*]] = coreai.decomposable.broadcasting_or %[[X_IS_POS_INF]], %[[X_IS_NEG_INF]] + // CHECK: %[[Y_IS_POS_INF:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[POS_INF]] + // CHECK: %[[Y_IS_NEG_INF:.*]] = coreai.decomposable.broadcasting_equal %[[Y]], %[[NEG_INF]] + // CHECK: %[[Y_IS_INF:.*]] = coreai.decomposable.broadcasting_or %[[Y_IS_POS_INF]], %[[Y_IS_NEG_INF]] + // CHECK: %[[BOTH_INF:.*]] = coreai.decomposable.broadcasting_and %[[X_IS_INF]], %[[Y_IS_INF]] + // CHECK: %[[BASE:.*]] = coreai.atan + // CHECK: %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[BOTH_INF]], + // CHECK-NEXT: coreai.output %[[RESULT]] : tensor + // CHECK-NEXT: } + // CHECK-NEXT: } + """, + ) + + def test_1d(self) -> None: + class Atan2Model(nn.Module): + def forward(self, y: Tensor, x: Tensor) -> Tensor: + return torch.atan2(y, x) + + ir = get_ir(Atan2Model().eval(), y=torch.rand(4), x=torch.rand(4)) + filecheck_pattern( + ir, + check_file=""" + // CHECK-LABEL: module { + // CHECK-NEXT: coreai.graph @main(%[[Y:.*]]: tensor<4xf32> {coreai.name = "y"}, %[[X:.*]]: tensor<4xf32> {coreai.name = "x"}) -> (tensor<4xf32> {coreai.name = "{{.*}}"}) attributes {__coreai_pure__} { + // CHECK: %[[ZERO:.*]] = coreai.constant dense<0.000000e+00> : tensor + // CHECK: %[[ONE:.*]] = coreai.constant dense<1.000000e+00> : tensor + // CHECK: %[[Y_NEG:.*]] = coreai.decomposable.broadcasting_or + // CHECK: %[[X_NEG:.*]] = coreai.decomposable.broadcasting_or + // CHECK: %[[BOTH_INF:.*]] = coreai.decomposable.broadcasting_and + // CHECK: %[[BASE:.*]] = coreai.atan + // CHECK: %[[RESULT:.*]] = coreai.decomposable.broadcasting_where %[[BOTH_INF]], + // CHECK-NEXT: coreai.output %[[RESULT]] : tensor<4xf32> + // CHECK-NEXT: } + // CHECK-NEXT: } + """, + ) + + class TestAvgPool2dIR: def test_static(self) -> None: class AvgPool2dModel(nn.Module):