From ac0e0deb00fedd9c678ce7f0ea5e115544b886d1 Mon Sep 17 00:00:00 2001
From: gokulkrishna98 <gokulkrishna98@users.noreply.github.com>
Date: Thu, 11 Jun 2026 08:42:15 -0700
Subject: [PATCH 1/2] fix(externalize): skip fully-specialised dims in
 submodule re-export
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When externalising an SDPA submodule, ``_dynamic_shapes_from_node``
requested a Dim for every SymInt in the upstream FakeTensor's shape
— including SymInts whose ``.node.expr`` had been specialised to a
literal int by the parent program. Those re-exported as an unbounded
``Dim(min=1)`` and ``torch.export`` rejected the submodule with
``L['key'].size()[2] <= IntInfinity()`` whenever a model used a static
query length and a dynamic KV-context length (the prefill / decode
shape used by hybrid linear-attention models).

Skip SymInts whose expr is already a number — they are static dims and
should not appear in ``dynamic_shapes`` at all.

Signed-off-by: gokulkrishna98 <gokulkrishna98@users.noreply.github.com>
---
 coreai_torch/_utils.py                    |   2 +-
 tests/cross_component/__init__.py         |   0
 tests/cross_component/test_externalize.py | 226 ++++++++++++++++++++++
 tests/test_dynamic_shapes.py              |  58 ++++++
 4 files changed, 285 insertions(+), 1 deletion(-)
 create mode 100644 tests/cross_component/__init__.py
 create mode 100644 tests/cross_component/test_externalize.py
 create mode 100644 tests/test_dynamic_shapes.py

diff --git a/coreai_torch/_utils.py b/coreai_torch/_utils.py
index 813e36e..624ce9d 100644
--- a/coreai_torch/_utils.py
+++ b/coreai_torch/_utils.py
@@ -1552,7 +1552,7 @@ def _dynamic_shapes_from_node(node: fx.Node) -> tuple[dict[int, Dim] | None, ...
         dims = {
             j: _dim_for_sym(s, cache)
             for j, s in enumerate(val.shape)
-            if isinstance(s, torch.SymInt)
+            if isinstance(s, torch.SymInt) and not s.node.expr.is_number
         }
         result.append(dims or None)
     return tuple(result)
diff --git a/tests/cross_component/__init__.py b/tests/cross_component/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/cross_component/test_externalize.py b/tests/cross_component/test_externalize.py
new file mode 100644
index 0000000..4c322ad
--- /dev/null
+++ b/tests/cross_component/test_externalize.py
@@ -0,0 +1,226 @@
+# Copyright 2026 Apple Inc.
+#
+# Use of this source code is governed by a BSD-3-clause license that can
+# be found in the LICENSE file or at https://opensource.org/licenses/BSD-3-Clause
+
+"""End-to-end externalize regression: static query, dynamic KV context.
+
+When a model is exported with a *static* query length but a *dynamic*
+KV-context length (the prefill / decode shape used by hybrid linear-
+attention models), the externalize pipeline re-exports the SDPA
+submodule standalone. Pre-fix, the key sequence dim came back as
+``[query_len, +inf)`` and ``torch.export`` rejected the submodule with
+``L['key'].size()[2] <= IntInfinity()``.
+
+Targets the torch 2.9 failure path. Coverage on torch >= 2.10 is in
+progress.
+"""
+
+from __future__ import annotations
+
+import pytest
+import torch
+
+from coreai_torch import ExternalizeSpec, TorchConverter, get_decomp_table
+from coreai_torch.composite_ops import RMSNorm, RoPE, SDPA
+
+
+@torch.library.custom_op(
+    "coreai_torch_test::mutable_slice_update_regression", mutates_args=["x"]
+)
+def _mutable_slice_update(
+    x: torch.Tensor,
+    update: torch.Tensor,
+    begin: torch.Tensor,
+    end: torch.Tensor,
+) -> torch.Tensor:
+    begin_t = torch.split(begin, 1, dim=0)
+    end_t = torch.split(end, 1, dim=0)
+    slices = tuple(slice(b.item(), e.item()) for b, e in zip(begin_t, end_t))
+    x[slices] = update
+    return x.clone()
+
+
+@_mutable_slice_update.register_fake
+def _mutable_slice_update_meta(
+    x: torch.Tensor,
+    update: torch.Tensor,
+    begin: torch.Tensor,
+    end: torch.Tensor,
+) -> torch.Tensor:
+    return torch.empty(x.shape, dtype=x.dtype)
+
+
+class _KVCache:
+    """Minimal KV cache exposing ``update_and_fetch``.
+
+    Mirrors the shape / op pattern used by upstream hybrid linear-
+    attention models (``mutable_slice_update`` to write back, ``narrow``
+    to fetch the active prefix); only what's needed to surface the
+    externalize Dim bug is kept.
+    """
+
+    def __init__(self, k_cache: torch.Tensor, v_cache: torch.Tensor) -> None:
+        self._k_cache = k_cache
+        self._v_cache = v_cache
+
+    def update_and_fetch(  # noqa: PLR0913
+        self,
+        layer_idx: int,
+        offset: int,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        seq_len: int,
+        query_len: int,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        torch._check_is_size(query_len)  # type: ignore[no-untyped-call]
+        torch._check_is_size(offset)  # type: ignore[no-untyped-call]
+        torch._check_is_size(seq_len)  # type: ignore[no-untyped-call]
+        torch._check_is_size(layer_idx)  # type: ignore[no-untyped-call]
+
+        device = self._k_cache.device
+        layer_index = torch.tensor((layer_idx,), dtype=torch.int32, device=device)
+        layer_index_end = torch.tensor(
+            (layer_idx + 1,), dtype=torch.int32, device=device
+        )
+        zero = torch.tensor((0,), dtype=torch.int32, device=device)
+
+        for buf, src in ((self._k_cache, k), (self._v_cache, v)):
+            _mutable_slice_update(
+                buf,
+                src.unsqueeze(0),
+                torch.cat(
+                    [
+                        layer_index,
+                        zero,
+                        zero,
+                        torch.tensor((offset,), dtype=torch.int32, device=device),
+                        zero,
+                    ]
+                ),
+                torch.cat(
+                    [
+                        layer_index_end,
+                        torch.tensor(
+                            (buf.size(1),), dtype=torch.int32, device=device
+                        ),
+                        torch.tensor(
+                            (buf.size(2),), dtype=torch.int32, device=device
+                        ),
+                        torch.tensor(
+                            (offset + src.size(2),),
+                            dtype=torch.int32,
+                            device=device,
+                        ),
+                        torch.tensor(
+                            (buf.size(4),), dtype=torch.int32, device=device
+                        ),
+                    ]
+                ),
+            )
+
+        k_out = self._k_cache.narrow(0, layer_idx, 1).narrow(-2, 0, seq_len).squeeze(0)
+        v_out = self._v_cache.narrow(0, layer_idx, 1).narrow(-2, 0, seq_len).squeeze(0)
+        return k_out, v_out
+
+
+@pytest.mark.skipif(
+    tuple(int(p) for p in torch.__version__.split(".")[:2]) >= (2, 10),
+    reason="torch >= 2.10 coverage in progress.",
+)
+def test_attention_layer_static_query_dynamic_kv_externalize() -> None:
+    HID, NH, NKV, HD = 256, 8, 2, 64
+
+    def repeat_kv(x: torch.Tensor, n: int) -> torch.Tensor:
+        b, h, s, d = x.shape
+        return x[:, :, None, :, :].expand(b, h, n, s, d).reshape(b, h * n, s, d)
+
+    class AttnLayer(torch.nn.Module):
+        def __init__(self) -> None:
+            super().__init__()
+            self.q_proj = torch.nn.Linear(HID, NH * HD, bias=False)
+            self.k_proj = torch.nn.Linear(HID, NKV * HD, bias=False)
+            self.v_proj = torch.nn.Linear(HID, NKV * HD, bias=False)
+            self.o_proj = torch.nn.Linear(NH * HD, HID, bias=False)
+            self.q_norm = RMSNorm(HD, eps=1e-6)
+            self.k_norm = RMSNorm(HD, eps=1e-6)
+            self.rope = RoPE(base=1e5, dims=HD)
+            self.sdpa = SDPA(is_causal=True)
+
+        def forward(
+            self,
+            x: torch.Tensor,
+            position_ids: torch.Tensor,
+            k_cache: torch.Tensor,
+            v_cache: torch.Tensor,
+        ) -> torch.Tensor:
+            b, query_len, _ = x.shape
+            cache = _KVCache(k_cache, v_cache)
+            sequence_length = position_ids.shape[-1]
+            torch._check_is_size(sequence_length)  # type: ignore[no-untyped-call]
+            offset = sequence_length - query_len
+            torch._check_is_size(offset)  # type: ignore[no-untyped-call]
+            q = self.q_proj(x).reshape(b, query_len, NH, HD).permute(0, 2, 1, 3)
+            k = self.k_proj(x).reshape(b, query_len, NKV, HD).permute(0, 2, 1, 3)
+            v = self.v_proj(x).reshape(b, query_len, NKV, HD).permute(0, 2, 1, 3)
+            q = self.q_norm(q)
+            k = self.k_norm(k)
+            rp = position_ids.narrow(-1, offset, query_len)
+            q = self.rope(q, position_ids=rp)
+            k = self.rope(k, position_ids=rp)
+            k, v = cache.update_and_fetch(
+                0, offset, k, v, seq_len=sequence_length, query_len=query_len
+            )
+            k = repeat_kv(k, NH // NKV)
+            v = repeat_kv(v, NH // NKV)
+            out = (
+                self.sdpa(q, k, v)
+                .permute(0, 2, 1, 3)
+                .reshape(b, query_len, NH * HD)
+            )
+            return self.o_proj(out)
+
+    torch.manual_seed(0)
+    s, ctx, cap = 12, 20, 64  # static query=12, dynamic ctx (trace 20), cap 64
+    model = AttnLayer().to(torch.float16).eval()
+    x = torch.randn(1, s, HID, dtype=torch.float16)
+    pos = torch.arange(ctx, dtype=torch.int32).unsqueeze(0)
+    kc = torch.zeros(1, 1, NKV, cap, HD, dtype=torch.float16)
+    vc = torch.zeros(1, 1, NKV, cap, HD, dtype=torch.float16)
+    ds = {
+        "x": None,
+        "position_ids": {1: torch.export.Dim("ctx", min=s + 1, max=cap)},
+        "k_cache": {3: torch.export.Dim("kseq", min=s + 1, max=cap)},
+        "v_cache": {3: torch.export.Dim("vseq", min=s + 1, max=cap)},
+    }
+    spec = ExternalizeSpec(
+        target_class=SDPA,
+        composite_op_name="scaled_dot_product_attention",
+        composite_attrs=["scale", "is_causal", "window_size"],
+    )
+
+    # Pre-fix this raised:
+    #   RuntimeError: Internal error: failed to export submodule 'sdpa_*':
+    #   Constraints violated (d_20)! ...
+    #   12 <= L['key'].size()[2] and L['key'].size()[2] <= IntInfinity()
+    # The fix lives in the externalize pipeline (run inside ``to_coreai``),
+    # so we drive the converter past that step and tolerate any *downstream*
+    # MLIR-lowering failure — the regression marker is the absence of the
+    # constraint-violation message above.
+    converter = TorchConverter().add_pytorch_module(
+        model,
+        export_fn=lambda m: torch.export.export(
+            m, args=(x, pos, kc, vc), dynamic_shapes=ds
+        ).run_decompositions(get_decomp_table()),
+        externalize_modules=[spec],
+    )
+    try:
+        converter.to_coreai()
+    except Exception as e:  # noqa: BLE001
+        if "Constraints violated" in str(e) and "IntInfinity" in str(e):
+            raise AssertionError(
+                "Externalize SDPA submodule re-export regressed "
+                f"(issue #1): {e}"
+            ) from e
+        # Any other downstream failure (e.g. MLIR lowering on an
+        # incomplete dev install) is unrelated to this bug.
diff --git a/tests/test_dynamic_shapes.py b/tests/test_dynamic_shapes.py
new file mode 100644
index 0000000..137514a
--- /dev/null
+++ b/tests/test_dynamic_shapes.py
@@ -0,0 +1,58 @@
+# Copyright 2026 Apple Inc.
+#
+# Use of this source code is governed by a BSD-3-clause license that can
+# be found in the LICENSE file or at https://opensource.org/licenses/BSD-3-Clause
+
+"""Tests for dynamic-shape reconstruction in the externalize pipeline."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+import sympy
+import torch
+
+from coreai_torch._utils import _dynamic_shapes_from_node
+
+
+class TestDynamicShapesFromNode:
+    """Cover ``_dynamic_shapes_from_node``'s reconstruction of a positional
+    ``dynamic_shapes`` tuple from a custom op node's FakeTensors.
+    """
+
+    @staticmethod
+    def test_skips_specialised_symints() -> None:
+        """Specialised SymInts (``expr.is_number``) must not produce a Dim.
+
+        When a model is externalised, the standalone re-export reconstructs
+        a ``dynamic_shapes`` spec from the FakeTensors flowing into the
+        custom op node. If a dim's symbol has been fully specialised to a
+        literal int by the parent program (e.g. the key sequence length
+        emerging from ``KVCache.update_and_fetch`` after the prefill chunk
+        size is resolved), the SymInt's ``.node.expr`` is a sympy
+        ``Integer``. Asking for a ``Dim`` for it would yield an unbounded
+        ``Dim(min=1)`` and ``torch.export`` would reject the submodule
+        with ``L['key'].size()[2] <= IntInfinity()``. The filter treats
+        such dims as static (no Dim entry).
+        """
+        specialised = MagicMock(spec=torch.SymInt)
+        specialised.node = SimpleNamespace(expr=sympy.Integer(20))
+
+        class _FakeTensor(torch.Tensor):
+            @staticmethod
+            def __new__(cls, shape: tuple[object, ...]) -> "_FakeTensor":
+                t = torch.Tensor._make_subclass(cls, torch.empty(0))
+                t._shape = shape
+                return t
+
+            @property  # type: ignore[override]
+            def shape(self) -> tuple[object, ...]:  # noqa: D401
+                return self._shape
+
+        val = _FakeTensor((1, 8, specialised, 64))
+        arg = SimpleNamespace(meta={"val": val})
+        node = SimpleNamespace(args=(arg,), target="custom_op")
+
+        # All non-symbolic / specialised dims → no Dim entries → None for the arg.
+        assert _dynamic_shapes_from_node(node) == (None,)

From 4774f02ccf83ae2a592d27e814f75f3c027aefb1 Mon Sep 17 00:00:00 2001
From: gokulkrishna98 <gokulkrishna98@users.noreply.github.com>
Date: Mon, 22 Jun 2026 12:46:22 -0700
Subject: [PATCH 2/2] Merge main: normalize notebook cell IDs and clear
 execution outputs

---
 .../tutorials/construct-a-graph.ipynb         | 171 ++++--------------
 .../tutorials/run-an-aimodel.ipynb            | 131 +++-----------
 2 files changed, 62 insertions(+), 240 deletions(-)

diff --git a/docs/coreai-core/tutorials/construct-a-graph.ipynb b/docs/coreai-core/tutorials/construct-a-graph.ipynb
index a10b05d..86f0acc 100644
--- a/docs/coreai-core/tutorials/construct-a-graph.ipynb
+++ b/docs/coreai-core/tutorials/construct-a-graph.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "intro",
+   "id": "0",
    "metadata": {},
    "source": [
     "# Constructing a CoreAI Graph\n",
@@ -25,7 +25,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "warning",
+   "id": "1",
    "metadata": {},
    "source": [
     ":::{warning}\n",
@@ -39,7 +39,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "setup-md",
+   "id": "2",
    "metadata": {},
    "source": [
     "## Setup\n",
@@ -50,25 +50,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "imports",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:09.232797Z",
-     "iopub.status.busy": "2026-06-04T23:13:09.232330Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.445622Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.444678Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Dev installation detected. Using local Core AI Framework.\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "3",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import shutil\n",
     "from pathlib import Path\n",
@@ -84,7 +69,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "specs-md",
+   "id": "4",
    "metadata": {},
    "source": [
     "## Describe the inputs and outputs\n",
@@ -99,26 +84,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "specs",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.447443Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.447294Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.451834Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.451081Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "input:  tensor<2x3xf32>\n",
-      "output: tensor<2x3xf32>\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "5",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "input_spec = TensorSpec(shape=[2, 3], dtype=np.float32)\n",
     "output_spec = TensorSpec(shape=[2, 3], dtype=np.float32, name=\"y\")\n",
@@ -129,7 +98,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "build-md",
+   "id": "6",
    "metadata": {},
    "source": [
     "## Build the graph\n",
@@ -145,25 +114,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "id": "build",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.453228Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.453111Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.470623Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.470201Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Module verified.\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "7",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "module = Module.create()\n",
     "with module:\n",
@@ -181,7 +135,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "program-md",
+   "id": "8",
    "metadata": {},
    "source": [
     "## Wrap in an `AIProgram`\n",
@@ -192,16 +146,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "program",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.472009Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.471916Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.474216Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.473635Z"
-    }
-   },
+   "execution_count": null,
+   "id": "9",
+   "metadata": {},
    "outputs": [],
    "source": [
     "program = AIProgram(module)"
@@ -209,7 +156,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "save-md",
+   "id": "10",
    "metadata": {},
    "source": [
     "## Save as an `.aimodel`\n",
@@ -222,25 +169,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "save",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.475505Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.475388Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.479489Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.479034Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved to: hello-graph.aimodel\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "11",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "asset_path = Path(\"./hello-graph.aimodel\")\n",
     "if asset_path.exists():\n",
@@ -252,7 +184,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "inspect-md",
+   "id": "12",
    "metadata": {},
    "source": [
     "## Inspect the saved asset\n",
@@ -263,26 +195,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "inspect",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.480788Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.480705Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.483398Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.483007Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "contents:   ['main.hash', 'main.mlirb', 'metadata.json']\n",
-      "total size: 446 bytes\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "13",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "files = sorted(p.name for p in asset_path.iterdir())\n",
     "total_bytes = sum(p.stat().st_size for p in asset_path.rglob(\"*\") if p.is_file())\n",
@@ -293,7 +209,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "validate-md",
+   "id": "14",
    "metadata": {},
    "source": [
     "## Validate that the asset is loadable\n",
@@ -305,25 +221,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "validate",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.484574Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.484499Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.498350Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.497839Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "OK\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "15",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "reloaded = AIModelAsset.load(asset_path)\n",
     "async with reloaded.executable() as model:\n",
diff --git a/docs/coreai-core/tutorials/run-an-aimodel.ipynb b/docs/coreai-core/tutorials/run-an-aimodel.ipynb
index 4d878f9..7e23884 100644
--- a/docs/coreai-core/tutorials/run-an-aimodel.ipynb
+++ b/docs/coreai-core/tutorials/run-an-aimodel.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "intro",
+   "id": "0",
    "metadata": {},
    "source": [
     "# Running an `.aimodel` with `coreai.runtime`\n",
@@ -29,7 +29,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "setup-md",
+   "id": "1",
    "metadata": {},
    "source": [
     "## Setup\n",
@@ -41,25 +41,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "imports",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:09.233244Z",
-     "iopub.status.busy": "2026-06-04T23:13:09.232818Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.445952Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.444672Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Dev installation detected. Using local Core AI Framework.\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "2",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "from pathlib import Path\n",
     "\n",
@@ -72,7 +57,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "ensure-md",
+   "id": "3",
    "metadata": {},
    "source": [
     "### Ensure `hello.aimodel` exists\n",
@@ -84,25 +69,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "ensure",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.447569Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.447413Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.473679Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.473128Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "created hello-run.aimodel\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "4",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "from shutil import rmtree\n",
     "from typing import Annotated\n",
@@ -132,7 +102,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "open-asset-md",
+   "id": "5",
    "metadata": {},
    "source": [
     "## Open the asset\n",
@@ -155,16 +125,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "id": "open-asset",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.475086Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.474967Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.477296Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.476906Z"
-    }
-   },
+   "execution_count": null,
+   "id": "6",
+   "metadata": {},
    "outputs": [],
    "source": [
     "asset = AIModelAsset.load(asset_path)"
@@ -172,7 +135,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "run-md",
+   "id": "7",
    "metadata": {},
    "source": [
     "## Open the model and run inference\n",
@@ -195,32 +158,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "run",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.478555Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.478462Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.505717Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.505274Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "functions: ['main']\n",
-      "name:    main\n",
-      "inputs:  ['x']\n",
-      "outputs: ['y']\n",
-      "input x:\n",
-      "[[1.5 1.5 1.5]\n",
-      " [1.5 1.5 1.5]]\n",
-      "output keys: ['y']\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "8",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "async with asset.executable() as model:\n",
     "    print(f\"functions: {model.function_names}\")\n",
@@ -245,7 +186,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "inspect-md",
+   "id": "9",
    "metadata": {},
    "source": [
     "## Inspect the output\n",
@@ -258,30 +199,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "inspect",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2026-06-04T23:13:12.506908Z",
-     "iopub.status.busy": "2026-06-04T23:13:12.506825Z",
-     "iopub.status.idle": "2026-06-04T23:13:12.509068Z",
-     "shell.execute_reply": "2026-06-04T23:13:12.508740Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "shape: (2, 3)\n",
-      "dtype: float32\n",
-      "value:\n",
-      "[[3. 3. 3.]\n",
-      " [3. 3. 3.]]\n",
-      "OK — inference produced expected output shape and dtype\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "10",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "print(f\"shape: {result.shape}\")\n",
     "print(f\"dtype: {result.dtype}\")\n",
@@ -294,7 +215,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "advanced-md",
+   "id": "11",
    "metadata": {},
    "source": [
     "## What's next\n",