diff --git a/pyproject.toml b/pyproject.toml
index 7ba6dd06c3..4f21a6d8d8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,7 +51,7 @@ classifiers = [
 ]
 dynamic = [ "version" ]
 dependencies = [
-    "anndata>=0.10.8",
+    "anndata>=0.11",
     "certifi",
     "fast-array-utils[accel,sparse]>=1.4",
     "h5py>=3.11",
@@ -98,6 +98,7 @@ scanorama = [ "scanorama" ]
 scrublet = [ "scikit-image>=0.25" ]
 # highly_variable_genes method 'seurat_v3'
 skmisc = [ "scikit-misc>=0.5.1" ]
+illico = [ "illico>=0.5.1" ]
 scanpy2 = [ "igraph>=0.10.8", "scikit-misc>=0.5.1" ]
 
 [dependency-groups]
@@ -108,6 +109,7 @@ dev = [
 test = [
     "scanpy[dask-ml]",
     "scanpy[dask]",
+    "scanpy[illico]",
     "scanpy[leiden]",
     "scanpy[plotting]",
     "scanpy[scrublet]",
diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py
index e59f83d183..fa95ede75c 100644
--- a/src/scanpy/_settings/presets.py
+++ b/src/scanpy/_settings/presets.py
@@ -33,7 +33,9 @@
 ]
 
 
-type DETest = Literal["logreg", "t-test", "wilcoxon", "t-test_overestim_var"]
+type DETest = Literal[
+    "logreg", "t-test", "wilcoxon", "wilcoxon_illico", "t-test_overestim_var"
+]
 type HVGFlavor = Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"]
 type LeidenFlavor = Literal["leidenalg", "igraph"]
 
diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py
index eb32fb4bdb..e555c6c602 100644
--- a/src/scanpy/tools/_rank_genes_groups.py
+++ b/src/scanpy/tools/_rank_genes_groups.py
@@ -7,6 +7,7 @@
 import numba
 import numpy as np
 import pandas as pd
+from anndata import AnnData
 from fast_array_utils.numba import njit
 from fast_array_utils.stats import mean_var
 from scipy import sparse
@@ -28,7 +29,6 @@
     from collections.abc import Generator, Iterable
     from typing import Literal
 
-    from anndata import AnnData
     from numpy.typing import NDArray
 
 
@@ -47,6 +47,33 @@ def _select_top_n(scores: NDArray, n_top: int):
     return global_indices
 
 
+def _illico_results_to_iter(
+    illico_df: pd.DataFrame,
+    groups_order: NDArray,
+    ireference: int | None,
+):
+    """Yield per-group ``(index, z, p)`` tuples from illico's long-form output.
+
+    illico returns a DataFrame with a 2-level MultiIndex ``(pert, feature)``
+    and columns including ``z_score`` and ``p_value``. We stream one group
+    at a time via `pandas.Series.loc`, trusting illico_df groups are ordered
+    by ``var_name``.
+    """
+    ref_label = None if ireference is None else groups_order[ireference]
+    z_series = illico_df["z_score"]
+    p_series = illico_df["p_value"]
+    illico_groups = set(illico_df.index.unique(level="pert"))
+    return (
+        (
+            group_index,
+            z_series.loc[group_name].to_numpy(),
+            p_series.loc[group_name].to_numpy(),
+        )
+        for group_index, group_name in enumerate(groups_order)
+        if group_name != ref_label and group_name in illico_groups
+    )
+
+
 @njit
 def rankdata(data: NDArray[np.number]) -> NDArray[np.float64]:
     """Parallelized version of scipy.stats.rankdata."""
@@ -141,6 +168,7 @@ def __init__(
             self.expm1_func = lambda x: np.expm1(x * np.log(base))
         else:
             self.expm1_func = np.expm1
+        self.group_col = adata.obs[groupby].array
 
         self.groups_order, self.groups_masks_obs = _utils.select_groups(
             adata, groups, groupby
@@ -441,8 +469,40 @@ def compute_statistics(  # noqa: PLR0912
             if not mean_in_log_space:
                 # If we are not exponentiating after the mean aggregation, we need to recalculate the stats.
                 self._basic_stats(exponentiate_values=True)
-        elif method == "wilcoxon":
-            generate_test_results = self.wilcoxon(tie_correct=tie_correct)
+        elif "wilcoxon" in method:
+            if "illico" in method:
+                from illico import asymptotic_wilcoxon
+
+                illico_df = asymptotic_wilcoxon(
+                    AnnData(
+                        X=self.X,
+                        var=pd.DataFrame(index=self.var_names),
+                        obs=pd.DataFrame(
+                            index=pd.RangeIndex(self.X.shape[0]).astype("str"),
+                            # This self.group_col means illico will run tests against *all* data
+                            # instead of what's in self.groups_order as controlled by the `groups` arg.
+                            # TODO: Only run the subset once illico supports a `groups` argument
+                            data={"group": self.group_col},
+                        ),
+                    ),
+                    reference=self.groups_order[self.ireference]
+                    if self.ireference is not None
+                    else None,
+                    group_keys="group",
+                    return_as_scanpy=False,
+                    is_log1p=True,
+                    tie_correct=tie_correct,
+                    use_continuity=False,
+                    alternative="two-sided",
+                    use_rust=False,
+                )
+                generate_test_results = _illico_results_to_iter(
+                    illico_df,
+                    self.groups_order,
+                    self.ireference,
+                )
+            else:
+                generate_test_results = self.wilcoxon(tie_correct=tie_correct)
             # If we're not exponentiating after the mean aggregation, then do it now.
             self._basic_stats(exponentiate_values=not mean_in_log_space)
         elif method == "logreg":
@@ -451,7 +511,6 @@ def compute_statistics(  # noqa: PLR0912
         self.stats = None
 
         n_genes = self.X.shape[1]
-
         for group_index, scores, pvals in generate_test_results:
             group_name = str(self.groups_order[group_index])
 
diff --git a/src/testing/scanpy/_pytest/marks.py b/src/testing/scanpy/_pytest/marks.py
index 1e83404614..654340f604 100644
--- a/src/testing/scanpy/_pytest/marks.py
+++ b/src/testing/scanpy/_pytest/marks.py
@@ -41,6 +41,7 @@ def _generate_next_value_(
     skimage = "scikit-image"
     skmisc = "scikit-misc"
     zarr = auto()
+    illico = auto()
     # external
     bbknn = auto()
     harmony = "harmonyTS"
diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py
index d7424353e1..6c32d45dbc 100644
--- a/tests/test_rank_genes_groups.py
+++ b/tests/test_rank_genes_groups.py
@@ -17,13 +17,14 @@
 from scanpy._utils.random import _LegacyRng
 from scanpy.get import rank_genes_groups_df
 from scanpy.tools import rank_genes_groups
-from scanpy.tools._rank_genes_groups import _RankGenes
+from scanpy.tools._rank_genes_groups import _illico_results_to_iter, _RankGenes
 from testing.scanpy._helpers import random_mask
 from testing.scanpy._helpers.data import pbmc68k_reduced
+from testing.scanpy._pytest.marks import needs
 from testing.scanpy._pytest.params import ARRAY_TYPES, ARRAY_TYPES_MEM
 
 if TYPE_CHECKING:
-    from collections.abc import Callable
+    from collections.abc import Callable, Sequence
     from typing import Any, Literal
 
     from numpy.lib.npyio import NpzFile
@@ -78,6 +79,25 @@ def get_true_scores(method: Literal["t-test", "wilcoxon"]) -> Expected:
     return Expected(names=expected["names"].astype("T"), scores=expected["scores"])
 
 
+def get_illico_results_df(
+    n_groups: int, n_genes: int, *, seed: int = 0
+) -> pd.DataFrame:
+    """Synthetic illico-shaped output used by the _illico_results_to_iter tests.
+
+    Features are in deliberately non-ascending to test ``var_name`` ordering.
+    """
+    rng = np.random.default_rng(seed)
+    groups = [f"g{i}" for i in range(n_groups)]
+    features = [f"f{n_genes - 1 - j}" for j in range(n_genes)]  # reversed -> not sorted
+    return pd.DataFrame(
+        {
+            "z_score": rng.normal(size=n_groups * n_genes),
+            "p_value": rng.uniform(size=n_groups * n_genes),
+        },
+        index=pd.MultiIndex.from_product([groups, features], names=["pert", "feature"]),
+    )
+
+
 # TODO: Make dask compatible
 @pytest.mark.parametrize("method", ["t-test", "wilcoxon"])
 @pytest.mark.parametrize("array_type", ARRAY_TYPES_MEM)
@@ -352,6 +372,100 @@ def test_mask_not_equal():
     assert not np.array_equal(no_mask, with_mask)
 
 
+@pytest.mark.parametrize(
+    ("groups_order", "ireference", "expected_indices"),
+    [
+        (["g0", "g1", "g2"], None, [0, 1, 2]),
+        (["g0", "g1", "g2"], 1, [0, 2]),
+    ],
+    ids=["vs_rest", "vs_reference"],
+)
+def test_illico_iter(groups_order, ireference, expected_indices):
+    df = get_illico_results_df(n_groups=3, n_genes=4)
+    feature_order = df.index.unique(level="feature")
+    out = list(_illico_results_to_iter(df, np.array(groups_order), ireference))
+    assert sorted(t[0] for t in out) == sorted(expected_indices)
+    for gi, z, p in out:
+        sub = df.xs(groups_order[gi], level=0).reindex(feature_order)
+        np.testing.assert_array_equal(z, sub["z_score"].to_numpy())
+        np.testing.assert_array_equal(p, sub["p_value"].to_numpy())
+
+
+@pytest.mark.parametrize("corr_method", ["benjamini-hochberg", "bonferroni"])
+@pytest.mark.parametrize("test", ["ovo", "ovr"])
+@pytest.mark.parametrize(
+    "mean_in_log_space", [True, False], ids=["log_space_mean", "linear_space_mean"]
+)
+@pytest.mark.parametrize(
+    "tie_correct", [True, False], ids=["tie_correct", "no_tie_correct"]
+)
+@pytest.mark.parametrize("groups", [["CD14+ Monocyte", "Dendritic"], "all"])
+@pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning")
+@needs.illico
+def test_illico(
+    test: Literal["ovo", "ovr"],
+    corr_method: Literal["benjamini-hochberg", "bonferroni"],
+    subtests: pytest.Subtests,
+    groups: Literal["all"] | Sequence[str],
+    *,
+    mean_in_log_space: bool,
+    tie_correct: bool,
+):
+
+    pbmc = pbmc68k_reduced()
+    pbmc.raw.X.sum_duplicates()
+    pbmc.raw.X.sort_indices()
+    pbmc_illico = pbmc.copy()
+
+    reference = pbmc.obs["bulk_labels"].iloc[0] if test == "ovo" else "rest"
+    sc.tl.rank_genes_groups(
+        pbmc_illico,
+        groupby="bulk_labels",
+        method="wilcoxon_illico",
+        reference=reference if test == "ovo" else "rest",
+        n_genes=pbmc.n_vars,
+        tie_correct=tie_correct,
+        corr_method=corr_method,
+        mean_in_log_space=mean_in_log_space,
+        groups=groups,
+    )
+
+    sc.tl.rank_genes_groups(
+        pbmc,
+        groupby="bulk_labels",
+        method="wilcoxon",
+        reference=reference if test == "ovo" else "rest",
+        n_genes=pbmc.n_vars,
+        tie_correct=tie_correct,
+        corr_method=corr_method,
+        mean_in_log_space=mean_in_log_space,
+        groups=groups,
+    )
+    scanpy_results = pbmc.uns["rank_genes_groups"]
+    illico_results = pbmc_illico.uns["rank_genes_groups"]
+    assert set(illico_results.keys()) == set(scanpy_results.keys()), (
+        "Output keys do not match Scanpy's output format."
+    )
+
+    for k, ref in scanpy_results.items():
+        with subtests.test(k):
+            if k in ["params", "names"]:
+                # We can skip names ordering check as if incorrect, other values will mismatch
+                continue
+            res = np.array(illico_results[k].tolist())
+            ref_arr = np.array(ref.tolist())
+            mask = np.isfinite(ref_arr) * np.isfinite(
+                res
+            )  # Mask to ignore inf values in the comparison
+            np.testing.assert_allclose(
+                ref_arr[mask],
+                res[mask],
+                rtol=0,
+                atol=1e-6,
+                err_msg=f"Mismatch in '{k}' values between asymptotic_wilcoxon and Scanpy outputs.",
+            )
+
+
 @pytest.mark.parametrize(
     ("mean_in_log_space", "expected_logfc"),
     [
@@ -363,10 +477,18 @@ def test_mask_not_equal():
         (False, -1.0),
     ],
 )
-@pytest.mark.parametrize("method", ["wilcoxon", "t-test", "t-test_overestim_var"])
+@pytest.mark.parametrize(
+    "method",
+    [
+        "wilcoxon",
+        "t-test",
+        "t-test_overestim_var",
+        pytest.param("wilcoxon_illico", marks=needs.illico),
+    ],
+)
 def test_mean_in_log_space(
     expected_logfc: float,
-    method: Literal["wilcoxon", "t-test", "t-test_overestim_var"],
+    method: Literal["wilcoxon", "wilcoxon_illico", "t-test", "t-test_overestim_var"],
     *,
     mean_in_log_space: bool,
 ):