diff --git a/.gitignore b/.gitignore index 3ad5c7c6..18f922fe 100644 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,11 @@ !/benchmarks/lp/mps_to_nl.py !/benchmarks/lpopt/build_subset.py !/benchmarks/lpopt/mps_to_nl.py +# The cblib suite is the conic (exp/power cone) tier: CBLIB .cbf instances +# solved through the pounce_cblib binary. Track its runner (README via the +# per-suite rule above); the per-run pounce.json stays ignored like every +# other suite. +!/benchmarks/cblib/run_cblib.py # Vanderbei reference status (derived once from cute_table.pdf): which # problems have a documented feasible optimum vs. are hard/infeasible/ # unbounded/untabulated. Tracked so we never have to re-derive it. @@ -87,6 +92,9 @@ python/dist/ python/examples/*.png python/*.egg-info/ +# Local virtualenvs (e.g. for building the extension + running notebooks) +.venv/ + # Claude Code local state .claude/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 428530dd..4e8e6bdd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,91 @@ changes. ## [0.4.0] — 2026-06-05 +### Added — Convex / conic solver (`pounce-convex`; `solve_qp` / `solve_socp`) + +POUNCE is no longer NLP-only: a new pure-Rust convex interior-point solver +(`pounce-convex`) handles **LP, convex QP, SOCP, and PSD / exp / power cones**, +solving each to a **global** optimum (a convex problem has no other kind). It +uses a homogeneous self-dual embedding (HSDE) — symmetric for the self-dual +cones and a non-symmetric driver for the exponential/power cones — over a +`Cone` abstraction (`nonneg`, `soc`, `psd`, `exp`, `power`, plus composite and +chordal decompositions for sparse SDPs). Convex solvers extract the constant +`P`, `A`, `c`, `b` data once at setup rather than re-evaluating per iteration, +and share the `pounce-linsol` / `pounce-linalg` factorization substrate with the +NLP path. Python entry points are typed (not SciPy-shaped, by necessity — a cone +program is *data*, not a callable): `solve_qp(P, c, A, b, G, h, lb, ub, …)`, +`solve_socp(…, cones=…)`, plus `solve_qp_batch` / `solve_qp_multi_rhs` for +batched factor reuse, and a reduced-Hessian sensitivity API. The CLI reads conic +instances from CBLIB / `.cbf` (including PSDCON / HCOORD / DCOORD SDP blocks). + +### Fixed — Convex LP/QP reported objective dropped tree-folded constant + +The convex LP/QP path (`solver_selection=lp-ipm` / `qp-ipm`) reported an +objective off by the objective's constant term whenever AMPL/Pyomo folded that +constant into the **nonlinear objective tree** (the `+9` of `(x-3)²`) rather +than the `.nl` linear-section constant. The quadratic-form extractor +(`analyze_quadratic_full`) discarded the degree-0 term — correct for the +*minimizer*, wrong for the *reported value* — so e.g. `HS21` reported `0.04` +instead of `−99.96` and `HS35` `−8.889` instead of `0.111`. The extractor now +returns that constant and the convex driver adds it to the reported objective +alongside `obj_constant`; the optimal point was always correct. Caught by a +head-to-head NLP-vs-convex run over the Maros-Mészáros QP and NETLIB LP suites +(`benchmarks/nl_compare_nlp_vs_convex.md`). + +### Fixed — Convex LP/QP IPM stalled on badly-scaled NETLIB LPs + +The static KKT regularization `δ` (added on the reduced KKT diagonal so the +LDLᵀ has a stable inertia) was `1e-8`, large enough to **floor the achievable +primal residual** at `δ·‖dy‖`: with a full Newton step `A·dx = −r_p + δ·dy`, so +on instances with large equality multipliers the primal infeasibility cannot +fall below `δ·‖dy‖`. On NETLIB `adlittle` (`‖dy‖ ≈ 4e8`) this froze `inf_pr` +near 4 and the LP IPM ran to its iteration cap, returning a wrong objective +(`439665` vs the published `225494.96`). Lowering the default `δ` to `1e-10` — +still strictly positive, so the system stays quasi-definite — clears the floor: +`adlittle` now converges in ~57 iterations to the optimum, `stocfor1` speeds up +(139 → 71 iters), and the rest of the LP/QP suites are unchanged (the QP suite +is bit-identical). The whole `1e-9‥1e-11` band converges the benchmark suites; +`1e-10` is centered in it. + +Also: the convex IPM's opt-in iteration trace now records a **terminal record at +the converged iterate** (the NLP path's N+1 convention), so the trace always +ends at the optimum instead of at the last pre-step state — previously a solve +that converged in a single step left only the cold-start record in the trace. + +### Added — SOS polynomial global optimization (`sos_minimize`) + +`sos_minimize(objective, *, inequalities, equalities, …)` computes **certified +global** lower bounds for polynomial optimization via a sum-of-squares / +Lasserre relaxation (Putinar localizing multipliers for constraints), built on +the new PSD cone. When the relaxation is exact it extracts the global +minimizer(s) with an exactness certificate (multi-atom extraction without a +non-symmetric eig, plus facial reduction for degenerate solves). + +### Added — Spatial branch-and-bound global optimizer (`pounce-global`; +`minimize_global` / `--solver global`) + +A new `pounce-global` crate solves **factorable nonconvex NLPs to a certified +global optimum** by spatial branch-and-bound: αBB convex underestimators, +polyhedral envelopes for univariate atoms, level-1 RLT cuts, multi-grouping +trilinear relaxations, optimization-based bound tightening (OBBT), and +cutting-plane bound refinement, with local NLP upper bounds. Branching is +reliability-based (pseudocost + strong branching); the node pool and OBBT run in +parallel (deterministic, ~2.3–2.6× wall-clock). Exposed as +`minimize_global(objective, *, constraints, lo, hi, …)` in Python (a symbolic +`Expr` + box) and `pounce --solver global` on `.nl` models, with frontier +memory estimation and a pre-solve warning. + +### Added — Multi-backend interactive debugger (convex IPM + B&B tree) + +The interactive debugger was generalized over a `DebugState` trait so one REPL +drives all three solvers. New backends: a **convex/conic** debugger +(`pounce_cblib --debug`, wired through the symmetric and non-symmetric HSDE +drivers) and an **interactive branch-and-bound tree debugger** that can `step` +through nodes and `into` a node's relaxation — handing off to the interior-point +REPL via a shared command queue (tree ↔ interior-point). This composes with the +0.4.0 debugger features below (quote-aware tokenization, `ask` provider presets, +`--debug-json` protocol, Ctrl-C escape hatch). + ### Added — `pounce.curve_fit` (Python) A `scipy.optimize.curve_fit`-style nonlinear fitter on top of the diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..ed71bc8a --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,43 @@ +# pounce — release / publishing facts + +pounce ships to **three** registries on each release. Two are automated by +GitHub Actions (tag-triggered); the **crates.io one is manual** and is the +easiest to forget — it is NOT triggered by pushing a tag or by creating a +GitHub Release. + +## Surfaces (all must reach the same X.Y.Z) + +1. **PyPI `pounce-solver`** — `.github/workflows/release-pounce.yml`, triggered + by pushing a `python-vX.Y.Z` tag. Builds wheels (incl. Windows) + sdist, + publishes to PyPI. +2. **PyPI `pyomo-pounce`** — `.github/workflows/release-pyomo-pounce.yml`, + triggered by a `pyomo-pounce-vX.Y.Z` tag. +3. **crates.io — 16 workspace crates** — **MANUAL**, via + `scripts/publish-crates.sh` (run locally). NO workflow does `cargo publish`. + Full procedure in `dev-notes/cargo-release.md`. The script publishes in + topological (dependency) order; resume a mid-batch failure with + `--start-from `. New-crate rate limits apply on first publish only. + Crates with `publish = false` (pounce-py, pounce-studio-*, iter-diff) are + intentionally excluded. + + The CLI binary is also bundled inside the PyPI wheels, so an end user + `pip install pounce-solver` does not require the crates.io publish — but the + crates.io publish is still part of a complete release. + +## GitHub Release + +Created **by hand** (`gh release create vX.Y.Z --notes-file `); no workflow +makes it. Body has historically been the matching `## [X.Y.Z]` section of +CHANGELOG.md. A git tag alone does NOT create a Release, and creating a Release +does NOT trigger any workflow (nothing has an `on: release` trigger). + +## Checking what's published (don't get this wrong) + +crates.io API needs a User-Agent or it silently looks unpublished: + + curl -s -H "User-Agent: pounce-release-check (jkitchin@andrew.cmu.edu)" \ + https://crates.io/api/v1/crates/ | python3 -c \ + "import sys,json; c=json.load(sys.stdin).get('crate'); print(c['max_version'] if c else 'NOT PUBLISHED')" + +Sanity-check against `serde` first; if serde reads NOT PUBLISHED your request is +being rejected, not the crate missing. diff --git a/Cargo.lock b/Cargo.lock index 6711021a..24123c9b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -306,8 +306,7 @@ dependencies = [ [[package]] name = "feral" version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44c7bcc7e829a9454749e80f3aa3fe88c95f8483ec9b7a365ff88c0612ad51a3" +source = "git+https://github.com/jkitchin/feral.git?rev=11fb4b98d7caac0383c53b6a969e27632efcef77#11fb4b98d7caac0383c53b6a969e27632efcef77" dependencies = [ "feral-amd", "feral-amf", @@ -324,8 +323,7 @@ dependencies = [ [[package]] name = "feral-amd" version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "363d1f8038ad30f115b56330770fa075444251d49259de7b9f60852f18a0a3f5" +source = "git+https://github.com/jkitchin/feral.git?rev=11fb4b98d7caac0383c53b6a969e27632efcef77#11fb4b98d7caac0383c53b6a969e27632efcef77" dependencies = [ "feral-ordering-core", ] @@ -333,8 +331,7 @@ dependencies = [ [[package]] name = "feral-amf" version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0414fac45abb5acec0d7ea9e82d125dba35ac0919491cbe5343ee6176eee8394" +source = "git+https://github.com/jkitchin/feral.git?rev=11fb4b98d7caac0383c53b6a969e27632efcef77#11fb4b98d7caac0383c53b6a969e27632efcef77" dependencies = [ "feral-ordering-core", ] @@ -342,8 +339,7 @@ dependencies = [ [[package]] name = "feral-kahip" version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36709fbe0273267511c0b164f3b93d16e62451ae7872bd0390cf073ced1f81d9" +source = "git+https://github.com/jkitchin/feral.git?rev=11fb4b98d7caac0383c53b6a969e27632efcef77#11fb4b98d7caac0383c53b6a969e27632efcef77" dependencies = [ "feral-amd", "feral-metis", @@ -353,8 +349,7 @@ dependencies = [ [[package]] name = "feral-metis" version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a90e908946db5c7f03e5fd9fb51f7af18e917deb245be583157ba41635792139" +source = "git+https://github.com/jkitchin/feral.git?rev=11fb4b98d7caac0383c53b6a969e27632efcef77#11fb4b98d7caac0383c53b6a969e27632efcef77" dependencies = [ "feral-amd", "feral-ordering-core", @@ -363,14 +358,12 @@ dependencies = [ [[package]] name = "feral-ordering-core" version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c22774549d1d1209ae367ad4ce1c094151c43c9981b2009ae4c0b55a03387dbb" +source = "git+https://github.com/jkitchin/feral.git?rev=11fb4b98d7caac0383c53b6a969e27632efcef77#11fb4b98d7caac0383c53b6a969e27632efcef77" [[package]] name = "feral-scotch" version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80c7b693523b6ef86bde0258983b6d3b12ef25b2979024927af5b47d2a7c7c61" +source = "git+https://github.com/jkitchin/feral.git?rev=11fb4b98d7caac0383c53b6a969e27632efcef77#11fb4b98d7caac0383c53b6a969e27632efcef77" dependencies = [ "feral-amd", "feral-metis", @@ -911,6 +904,7 @@ dependencies = [ "nix", "pounce-algorithm", "pounce-common", + "pounce-convex", "pounce-feral", "pounce-hsl", "pounce-linalg", @@ -940,6 +934,17 @@ dependencies = [ "anstyle-query", ] +[[package]] +name = "pounce-convex" +version = "0.4.0" +dependencies = [ + "pounce-common", + "pounce-feral", + "pounce-linalg", + "pounce-linsol", + "rayon", +] + [[package]] name = "pounce-feral" version = "0.4.0" @@ -1032,6 +1037,7 @@ dependencies = [ "numpy", "pounce-algorithm", "pounce-common", + "pounce-convex", "pounce-feral", "pounce-linsol", "pounce-nl", diff --git a/Cargo.toml b/Cargo.toml index a1e55776..bd5c1bd8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ members = [ "crates/pounce-presolve", "crates/pounce-l1penalty", "crates/pounce-qp", + "crates/pounce-convex", "crates/pounce-sensitivity", "crates/pounce-solve-report", "crates/pounce-observability", @@ -41,6 +42,7 @@ default-members = [ "crates/pounce-presolve", "crates/pounce-l1penalty", "crates/pounce-qp", + "crates/pounce-convex", "crates/pounce-sensitivity", "crates/pounce-solve-report", "crates/pounce-observability", @@ -74,11 +76,17 @@ pounce-restoration = { path = "crates/pounce-restoration", version = "0.4.0" } pounce-presolve = { path = "crates/pounce-presolve", version = "0.4.0" } pounce-l1penalty = { path = "crates/pounce-l1penalty", version = "0.4.0" } pounce-qp = { path = "crates/pounce-qp", version = "0.4.0" } +pounce-convex = { path = "crates/pounce-convex", version = "0.4.0" } pounce-sensitivity = { path = "crates/pounce-sensitivity", version = "0.4.0" } pounce-solve-report = { path = "crates/pounce-solve-report", version = "0.4.0" } pounce-studio-core = { path = "crates/pounce-studio-core", version = "0.4.0" } pounce-observability = { path = "crates/pounce-observability", version = "0.4.0" } -feral = "0.10.0" +# feral HEAD past the 0.10.0 release checkpoint: issue #80 — MC64/scaling +# perf work (Hungarian-heap reuse across columns, localized dense-column +# cost, ldlt_compress profiling). Not yet on crates.io; pinned by rev for +# reproducibility. NOTE: this git pin blocks the crates.io publish of the +# pounce crates until feral cuts a release carrying these commits. +feral = { git = "https://github.com/jkitchin/feral.git", rev = "11fb4b98d7caac0383c53b6a969e27632efcef77" } # Dense linear algebra for the debugger's numerical rank diagnosis # (SVD of the active-constraint Jacobian). Pure-Rust, MIT; we pull only # the dense `std` core — no rayon/sparse/rand/npy. diff --git a/Makefile b/Makefile index f162d84d..b426bfab 100644 --- a/Makefile +++ b/Makefile @@ -70,6 +70,7 @@ endif .PHONY: all build debug test check clippy fmt fmt-check doc book install uninstall clean help \ install-mcp uninstall-mcp install-skill uninstall-skill \ + python-ext python-test \ benchmark benchmark-rerun benchmark-report benchmark-gams all: build @@ -118,6 +119,20 @@ clean: help: @sed -n 's/^# \{0,1\}//p' Makefile | sed -n '1,45p' +# ---- Python extension + tests ------------------------------------------- +# Rebuild the native extension in place, then run the Python test suite. +# This is the safe way to run pytest: a stale in-place `_pounce*.so` (left +# by an earlier `maturin develop`) silently shadows the current binding and +# makes the suite fail with confusing errors. `python-ext` rebuilds it, and +# `python/tests/conftest.py` additionally guards against running pytest +# against a stale artifact. Requires `maturin` and the test extras in the +# active environment (`pip install -e 'python[dev]'`). +python-ext: + cd python && maturin develop + +python-test: python-ext + cd python && python -m pytest tests -q + # ---- Benchmarks ---------------------------------------------------------- # Single source of truth: benchmarks/Makefile. These shims forward # everything so users can drive runs from the repo root. diff --git a/README.md b/README.md index a406f6f2..36b046bb 100644 --- a/README.md +++ b/README.md @@ -11,20 +11,29 @@ ![POUNCE](logos/pounce_A_pounce.png) -POUNCE is a pure-Rust port of the [Ipopt](https://github.com/coin-or/Ipopt) -interior-point nonlinear programming solver. It solves problems of the -form - -``` -min f(x) -s.t. g_L <= g(x) <= g_U - x_L <= x <= x_U -``` - -where `f` and `g` are twice-continuously-differentiable. The algorithm, -console output, and option semantics follow upstream Ipopt closely enough -that anyone used to reading `ipopt` logs can drop in `pounce` without -relearning where the numbers live. +POUNCE is a pure-Rust interior-point optimization solver. Its +nonlinear-programming core began as a faithful port of +[Ipopt](https://github.com/coin-or/Ipopt) — the same filter line-search +algorithm, console output, and option semantics, so anyone used to reading +`ipopt` logs can drop in `pounce` without relearning where the numbers +live — and it has since grown into a *family* of solvers sharing one +numerical backbone: + +- **Nonlinear programming** — the filter line-search interior-point method + (the Ipopt port), plus an active-set SQP path, for general smooth problems + `min f(x) s.t. g_L ≤ g(x) ≤ g_U, x_L ≤ x ≤ x_U`. +- **Conic & quadratic** — dedicated interior-point solvers for LP, convex QP, + second-order (SOCP), positive-semidefinite (SDP), and the non-symmetric + exponential and power cones — each solved to the global optimum, with + infeasibility certificates, warm starts, and post-optimal sensitivity. +- **Global optimization** — certified global optima for nonconvex problems: + SOS / Lasserre relaxations for polynomials, and a deterministic spatial + branch-and-bound solver (`pounce-global`) for general factorable NLPs. + +Convex and conic problems are solved to global optimality; nonconvex problems +are solved locally by default, or to a certified global optimum via the SOS +and branch-and-bound paths. See **[Choosing a Solver](https://jkitchin.github.io/pounce/choosing-a-solver.html)** +for the full map of which solver fits which problem. The default build is pure Rust — no Fortran, no HSL, no system BLAS required. The [FERAL](crates/pounce-feral) backend provides a sparse symmetric LDLᵀ @@ -47,6 +56,22 @@ port) and reduced-Hessian computation are wired end-to-end; the bound-tightening) and the active-set SQP path (`pounce-qp`-backed) are available behind option keys. +Beyond the NLP core, the solver family is wired end-to-end and validated +against external suites: + +- **Convex & conic** (`pounce-convex`) — LP / convex-QP, SOCP, the + exponential and power cones (geometric programming, entropy, logistic, + `p`-norms), and small dense SDPs, with a Conic Benchmark Format (`.cbf`) + reader cross-checked against the CBLIB tier. The CLI's `auto` routing + classifies an `.nl` and sends LP / convex-QP problems here automatically. +- **Global** — SOS / Lasserre polynomial optimization (`sos_minimize`) and + deterministic spatial branch-and-bound (`pounce-global`, `--solver global`) + with McCormick relaxations, OBBT/FBBT bound tightening, and a certified + optimality gap. + +All of it — NLP, conic, and global — is reachable from the CLI, the Python +package, and the JSON solve report. + See `benchmarks/` for the comparison harness against upstream Ipopt. ## Documentation @@ -77,6 +102,8 @@ make book # builds docs/book/ (requires `cargo install mdbook`) | [`pounce-l1penalty`](crates/pounce-l1penalty) | Thierry-Biegler ℓ₁-exact penalty-barrier wrapper for degenerate / MPCC problems. | | [`pounce-sensitivity`](crates/pounce-sensitivity) | Post-optimal sensitivity + reduced-Hessian (port of upstream sIPOPT). | | [`pounce-qp`](crates/pounce-qp) | Sparse parametric active-set QP subproblem solver — drives the SQP path and the sensitivity corrector. | +| [`pounce-convex`](crates/pounce-convex) | Convex/conic interior-point solver — LP, QP, SOCP, exponential/power cones, small SDP, and SOS polynomial optimization. | +| [`pounce-global`](crates/pounce-global) | Deterministic spatial branch-and-bound for nonconvex factorable NLPs (McCormick relaxations, OBBT/FBBT, certified gap). | | [`pounce-solve-report`](crates/pounce-solve-report) | `pounce.solve-report/v1` JSON writer (shared by `pounce-cli --json-output` and `IpoptWriteSolveReport`). | | [`pounce-observability`](crates/pounce-observability) | `tracing` subscriber install + per-iteration collector layer that feeds the iteration stream into the solve report. | | [`pounce-cinterface`](crates/pounce-cinterface) | C ABI shim — `CreateIpoptProblem` / `IpoptSolve` / `FreeIpoptProblem` / `IpoptWriteSolveReport`. | @@ -349,11 +376,11 @@ the full list and per-suite details. ## Acknowledgments -POUNCE is a Rust port of [Ipopt](https://github.com/coin-or/Ipopt), -the interior-point nonlinear programming solver by Andreas Wächter, -Lorenz T. Biegler, and the COIN-OR community. Its algorithm, console -output, and option semantics are modeled directly on that codebase, -which is released under the EPL-2.0. +POUNCE's nonlinear-programming core is a Rust port of +[Ipopt](https://github.com/coin-or/Ipopt), the interior-point nonlinear +programming solver by Andreas Wächter, Lorenz T. Biegler, and the COIN-OR +community. Its algorithm, console output, and option semantics are modeled +directly on that codebase, which is released under the EPL-2.0. It is a sibling of [ripopt](https://github.com/jkitchin/ripopt), an earlier memory-safe interior-point NLP optimizer in Rust by the same diff --git a/benchmarks/Makefile b/benchmarks/Makefile index e043dd0b..ca5328f3 100644 --- a/benchmarks/Makefile +++ b/benchmarks/Makefile @@ -53,6 +53,8 @@ export IPOPT_LINEAR_SOLVER qp-run qp-rerun qp-generate \ lp-run lp-rerun lp-generate \ lpopt-run lpopt-rerun lpopt-generate \ + globallib-run globallib-rerun globallib-translate \ + globallib-micro globallib-fast .PHONY-build-simplex-obbt \ ipopt-reference ipopt-reference-provenance \ $(addprefix ipopt-ref-,$(REF_SUITES)) \ gams-bench gams-rerun \ @@ -89,6 +91,10 @@ help: @echo " lpopt-run / -rerun HARD Mittelmann lpopt LP subset (stress tier; use BENCH_TIMELIMIT=1800)" @echo " lpopt-generate (Re)generate the lpopt .nl files only (downloads MPS from plato lptestset)" @echo " gams-bench / gams-rerun GAMS solver-link smoke check (10 problems; not in the report)" + @echo " globallib-run / -rerun GLOBALLib proven-optimum global suite (solver_selection=global; vs known optima)" + @echo " globallib-micro fast dev loop (~3s): IPM-vs-simplex OBBT cross-check on tiers/micro.txt" + @echo " globallib-fast broader fast cross-check (~30s) on tiers/fast.txt" + @echo " globallib-translate (Re)generate the globallib .nl from AMPL .mod (needs ampl; see benchmarks/globallib)" @echo "" @echo " clean-bench Wipe pounce results/logs across every suite (keeps the committed ipopt reference)" @echo " clean-bench-large-scale Wipe large_scale pounce.json" @@ -143,6 +149,7 @@ nldir_mittelmann := $(POUNCE_BENCH_DATA)/mittelmann/nl nldir_qp := $(POUNCE_BENCH_DATA)/qp/nl nldir_lp := $(POUNCE_BENCH_DATA)/lp/nl nldir_lpopt := $(POUNCE_BENCH_DATA)/lpopt/nl +nldir_globallib := $(POUNCE_BENCH_DATA)/globallib/nl # --- pounce release runs (incremental, pounce-only) --- # Each /pounce.json rebuilds when the pounce binary or the suite's @@ -306,6 +313,54 @@ mittelmann-rerun: rm -f $(MITT_DIR)/pounce.json $(MAKE) mittelmann-run +# --- GLOBALLib global-optimization benchmark (pounce-global) --- +# The only tier that drives `solver_selection=global`: the GLOBALLib subset +# with a *proven* global optimum (MINLPLib `=opt=`). Translation clones the +# AMPL .mod sources and runs AMPL `write` into the bench-data tree; the run +# checks each certified objective against the known optimum in optima.txt. +# Needs `ampl` on PATH (set $AMPL to override). GLOBALLIB_TIMEOUT caps each +# problem (default 30s). This is a hard external (subprocess) kill; the global +# solver also accepts `global_max_cpu_time` (and `global_max_nodes` / gap / cut +# tunables — see `register_global_options` in pounce-cli) for a graceful +# node-boundary stop that still reports the best incumbent, but the external +# timeout stays as the hard backstop (it catches a single pathological slow node). +GLOBALLIB_DIR := $(REPO_ROOT)/benchmarks/globallib +GLOBALLIB_TIMEOUT ?= 30 + +globallib-translate: + $(GLOBALLIB_DIR)/translate.sh $(nldir_globallib) + +$(GLOBALLIB_DIR)/pounce.json: $(POUNCE_BIN) $(wildcard $(nldir_globallib)/*.nl) + python3 $(GLOBALLIB_DIR)/run_globallib.py --bin $(POUNCE_BIN) \ + --nl-dir $(nldir_globallib) --timeout $(GLOBALLIB_TIMEOUT) --out $@ + +globallib-run: $(GLOBALLIB_DIR)/pounce.json + +globallib-rerun: + rm -f $(GLOBALLIB_DIR)/pounce.json + $(MAKE) globallib-run + +# --- fast dev tiers: small/fast subsets for wiring the per-node pieces (OBBT, +# simplex/IPM warm-starts, branching) with a seconds-long edit->run loop. +# Both run the IPM and simplex OBBT engines and assert they certify identical +# optima (the soundness gate), so the simplex binary needs the feature on. +GLOBALLIB_DEV_BIN := $(REPO_ROOT)/target/release/pounce +.PHONY-build-simplex-obbt: + $(CARGO) build --release --manifest-path $(CARGO_MANIFEST) \ + -p pounce-cli --features simplex-obbt + +# Inner loop (~2-3s): `make globallib-micro` +globallib-micro: .PHONY-build-simplex-obbt + python3 $(GLOBALLIB_DIR)/compare_obbt_engines.py \ + --bin $(GLOBALLIB_DEV_BIN) --nl-dir $(nldir_globallib) \ + --stems-file $(GLOBALLIB_DIR)/tiers/micro.txt --timeout 10 + +# Broader fast regression (~25-30s both engines): `make globallib-fast` +globallib-fast: .PHONY-build-simplex-obbt + python3 $(GLOBALLIB_DIR)/compare_obbt_engines.py \ + --bin $(GLOBALLIB_DEV_BIN) --nl-dir $(nldir_globallib) \ + --stems-file $(GLOBALLIB_DIR)/tiers/fast.txt --timeout 10 + # --- ipopt-ma57 reference (run rarely; committed) --- # `make ipopt-reference` runs ipopt-ma57 across every suite and writes the # committed benchmarks//ipopt_ma57.json plus a provenance stamp. diff --git a/benchmarks/cblib/README.md b/benchmarks/cblib/README.md new file mode 100644 index 00000000..2da8ecd4 --- /dev/null +++ b/benchmarks/cblib/README.md @@ -0,0 +1,88 @@ +# CBLIB suite — conic (exponential / power cone) tier + +The **conic** benchmark tier: instances from the Conic Benchmark Library +(CBLIB, ) in Conic Benchmark Format (`.cbf`). Unlike +every other suite here — which is `.nl`-driven through the main `pounce` +NLP binary — these are *conic programs* (geometric programs and power-cone +models) solved through POUNCE's convex conic driver (`pounce-convex`'s +non-symmetric HSDE path) via the dedicated `pounce_cblib` binary. + +Each instance is recorded in the same schema as the other suites — +`{solver, name, n, m, status, objective, iterations, solve_time}` — in +`cblib/pounce.json`, so it merges into the composite `BENCHMARK_REPORT.md`. + +## What runs + +By default the runner solves the small instances **vendored with the +repo** (under `crates/pounce-cli/tests/data/cblib/`), so it works offline: + +| Instance | Class | Cones | +|---|---|---| +| `demb761`, `beck751`, `fang88` | geometric programs (Demberg / Beck / Fang) | exponential | +| `pow3_synthetic` | hand-authored power-cone problem | power (`POWCONES`) | + +These are also the cross-check tests in +`crates/pounce-cli/tests/cblib_vs_nlp.rs`, where each conic solve is +validated against an **independent** smooth-NLP solve (the two agree on the +objective to ~1e-8). Published CBLIB reference objectives are unavailable +(the solution files 404), so that conic-vs-NLP cross-check *is* the +correctness reference. + +## Running + +```sh +python3 benchmarks/cblib/run_cblib.py # vendored instances +python3 benchmarks/cblib/run_cblib.py --detail full # + per-iteration trace +python3 benchmarks/cblib/run_cblib.py --dir /path/to/cblib # more instances +``` + +`--dir` points at a folder of additional `.cbf` files — e.g. a local CBLIB +checkout. The reader supports the cone kinds `F`/`L=`/`L+`/`L-`/`EXP`/`Q` +and the 3-D power cone (`POWCONES` / `@k:POW`); instances using PSD +(`DCOORD`), rotated SOC (`QR`), or dual power cones are skipped with a +clear error. The large power-cone instances (`2013_fir*`, ~120 MB) are not +vendored; fetch them into a `--dir` to include them. + +The underlying `pounce_cblib --json-output ` emits a full +`pounce.solve-report/v1` JSON (the same schema the `.nl` path writes, with +an input descriptor of kind `cbf-file`); the runner projects each into the +suite record schema. + +## Full corpus + conic-robustness regression watch + +Beyond the 5 vendored instances, a 132-instance corpus (exp-cone GPs, +power-cone, SOC families from ) lives in the bench-data +tree at `pounce-bench-data/cblib/cbf/` (307 M). Run it with: + +```sh +python3 benchmarks/cblib/run_cblib.py \ + --dir "$HOME/Dropbox/projects/pounce-bench-data/cblib/cbf" +``` + +A stress sweep (60 s/instance, 2026-06-07) over that corpus originally +classified **71 pass · 34 `NumericalFailure` · 10 timeout · 17 +unsupported-cone**. The 34 failures seeded a tracked **conic-robustness +regression set** (`MANIFEST.tsv` in the bench-data dir). 27 of them fail *with a +usable objective already in hand* +— several provably match a sibling formulation that passed (`flay02m`==`flay02h`, +`slay04h`==`slay04m`, `clay020{3,4,5}h`==`..m`). + +This is **not** an ill-conditioned-input problem: both HSDE drivers can discard +a converged-enough iterate when the KKT factorization degrades near the cone +boundary (`s∘z → 0` ⇒ NT scaling blows up) a hair short of `tol` (1e-8). The +**non-symmetric** driver (`hsde_nonsym.rs`, exp/power cones) already carried an +Ipopt-style "acceptable level" tier — accept the iterate when the +*unregularized* KKT residual is already `< 1e3·tol`. The **symmetric** driver +(`hsde.rs`, SOC/orthant/PSD) did **not**, so it discarded iterates the +non-symmetric one would have kept. Porting that same tier into the symmetric +driver (the principled fix — **not** porting the orthant path's Ruiz +equilibration fallback) recovers **12 of the 34** (all SOC/orthant, with +byte-identical objectives), taking the corpus to **83 pass · 22 +`NumericalFailure`**. The remaining 22 are genuine: 9 exp-cone gap-laggards +(would need a composite pres/dres/mu criterion), `slay06h`/`slay06m` (true +divergence), and the `expdesign_D_*` 0-iteration structural failures. Re-run the +corpus after any conic-driver change to track the count. + +> Note: the raw solve report renders `QpStatus::NumericalFailure` as +> `InternalError` (`pounce_cblib.rs:33`); classify on the stderr banner, not +> the JSON `status` field. diff --git a/benchmarks/cblib/run_cblib.py b/benchmarks/cblib/run_cblib.py new file mode 100644 index 00000000..2acba9fb --- /dev/null +++ b/benchmarks/cblib/run_cblib.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +"""Run the CBLIB exponential/power-cone conic tier through POUNCE. + +Unlike the other suites (which are `.nl`-driven through the main `pounce` +binary), CBLIB ships *conic* programs in Conic Benchmark Format (`.cbf`), +solved through POUNCE's convex conic driver via the `pounce_cblib` binary. +Each instance is solved and recorded in the same schema the composite +report consumes: + + {solver, name, n, m, status, objective, iterations, solve_time} + +Out: benchmarks/cblib/pounce.json + +By default this runs the small instances vendored with the repo (the +exp-cone GPs demb761/beck751/fang88 and a synthetic power-cone problem, +under crates/pounce-cli/tests/data/cblib). Point `--dir` at a folder of +additional `.cbf` files (e.g. a local CBLIB checkout) to run more. + +Run: python3 benchmarks/cblib/run_cblib.py [--dir PATH] [--detail full] +""" + +import argparse +import json +import os +import re +import subprocess +import sys +import tempfile + +HERE = os.path.dirname(os.path.abspath(__file__)) +ROOT = os.path.dirname(os.path.dirname(HERE)) +BIN = os.path.join(ROOT, "target", "release", "pounce_cblib") +VENDORED = os.path.join( + ROOT, "crates", "pounce-cli", "tests", "data", "cblib" +) + + +def status_underscored(s: str) -> str: + """`SolveSucceeded` -> `Solve_Succeeded` (the composite-report form).""" + return re.sub(r"(? None: + print("Building pounce_cblib (release)…", file=sys.stderr) + subprocess.run( + ["cargo", "build", "--release", "--bin", "pounce_cblib"], + cwd=ROOT, + check=True, + ) + + +def instances(extra_dir): + """Yield (name, path) for every .cbf to run, vendored first.""" + seen = set() + for d in [VENDORED] + ([extra_dir] if extra_dir else []): + if not d or not os.path.isdir(d): + continue + for fn in sorted(os.listdir(d)): + if fn.endswith(".cbf") and fn not in seen: + seen.add(fn) + yield fn[:-4], os.path.join(d, fn) + + +def run_one(name, path, detail): + """Solve one instance; return the standard-schema record (or None).""" + with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tf: + out = tf.name + try: + proc = subprocess.run( + [BIN, path, "--json-output", out, "--json-detail", detail], + cwd=ROOT, + capture_output=True, + text=True, + ) + if not os.path.exists(out) or os.path.getsize(out) == 0: + print(f" {name}: no report ({proc.stderr.strip()})", file=sys.stderr) + return None + with open(out) as f: + r = json.load(f) + return { + "solver": "pounce", + "name": name, + "n": r["problem"]["n_variables"], + "m": r["problem"]["n_constraints"], + "status": status_underscored(r["solution"]["status"]), + "objective": r["solution"]["objective"], + "iterations": r["statistics"]["iteration_count"], + "solve_time": r["statistics"]["total_wallclock_time_secs"], + } + finally: + if os.path.exists(out): + os.remove(out) + + +def main(): + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument("--dir", help="extra directory of .cbf instances") + ap.add_argument("--detail", default="summary", choices=["summary", "full"]) + ap.add_argument("--no-build", action="store_true", help="skip cargo build") + args = ap.parse_args() + + if not args.no_build: + build_binary() + if not os.path.exists(BIN): + sys.exit(f"binary not found: {BIN} (drop --no-build to build it)") + + records = [] + for name, path in instances(args.dir): + rec = run_one(name, path, args.detail) + if rec is not None: + records.append(rec) + print( + f" {rec['name']:<20} {rec['status']:<20} " + f"obj={rec['objective']:.6g} iters={rec['iterations']} " + f"{rec['solve_time']:.3f}s" + ) + + out_path = os.path.join(HERE, "pounce.json") + with open(out_path, "w") as f: + json.dump(records, f, indent=2) + print(f"\nWrote {len(records)} records to {out_path}") + + +if __name__ == "__main__": + main() diff --git a/benchmarks/global/README.md b/benchmarks/global/README.md new file mode 100644 index 00000000..729bd601 --- /dev/null +++ b/benchmarks/global/README.md @@ -0,0 +1,110 @@ +# Global-optimization benchmark (`pounce-global`) + +A graduated suite of **verifiable** nonconvex problems for the spatial +branch-and-bound global solver — from quick 2-D classics to instances that +branch into the thousands. Every instance has a known global optimum, so the +harness checks the *certified* value against ground truth (not just that it +returned something). + +Unlike the other tiers (which drive the CLI on AMPL `.nl` files), the global +solver is Rust-native and needs finite variable bounds, so the harness is a +self-contained Rust example — no Pyomo / `.nl` generation: + +```sh +cargo run --release -p pounce-global --example benchmark +``` + +It prints the Markdown table below. The source is +`crates/pounce-global/examples/benchmark.rs`. + +## What the instances exercise + +| instance | what it stresses | +|---|---| +| **six-hump camel** | the classic 2-D nonconvex case (two global minima); envelopes + OBBT + most-violation branching | +| **himmelblau** | quartic with four global minima; the relaxation prunes it almost immediately | +| **bukin-6** | `\|·\|` + `√` (non-smooth, the Hessian sweep declines) — forces branching | +| **allpairs bilinear** `Σ_{i`. Before investing in a sharded / +work-stealing frontier, we measured where the parallel speedup actually leaks, +by instrumenting the pool to split each worker's non-compute time into +**lock-wait** (contention on the shared heap — what sharding fixes) and +**idle-wait** (frontier starvation / ramp-up — what it doesn't). Double camel, +M4 Pro (14 cores): + +| threads | wall (s) | speedup | lock-wait % | idle-wait % | +|--:|--:|--:|--:|--:| +| 1 | 211.6 | 1.0× | — | — | +| 2 | 109.0 | 1.9× | 0.0 | 0.2 | +| 4 | 76.4 | 2.8× | 0.0 | 0.6 | +| 8 | 47.5 | 4.5× | 0.0 | 2.0 | +| 14 | 35.7 | 5.9× | 0.0 | 3.6 | + +**Lock-wait is 0.0 % at every thread count** — and stayed 0.0 % even in a +cheap-node regime (OBBT/NLP/sandwich/αBB off, ~5500 tiny FBBT+LP nodes), where +heap traffic per second is far higher. The shared frontier mutex is simply not +contended at these core counts, so **a sharded frontier (issue #7) would buy +almost nothing here.** + +The sub-linear speedup instead comes from **per-node work dilation**: the total +useful CPU summed across workers grows from ~212 s (serial) to ~481 s at 14 +threads for the *same* ~1685 nodes. Concurrent sparse LDLᵀ factorizations +contend for memory bandwidth, and the high-thread runs spill onto the slower +efficiency cores — neither of which a frontier rework addresses. The lever for +better scaling is reducing per-node allocation/bandwidth (a per-worker reusable +backend and scratch buffers), not the queue. So #7 is **parked with evidence**, +not on the roadmap. + diff --git a/benchmarks/globallib/README.md b/benchmarks/globallib/README.md new file mode 100644 index 00000000..dfdf4423 --- /dev/null +++ b/benchmarks/globallib/README.md @@ -0,0 +1,218 @@ +# GLOBALLib — proven-optimum global benchmark (`pounce-global`) + +An **external, `.nl`-driven** benchmark for the spatial branch-and-bound global +solver, complementing the self-contained synthetic suite in +[`../global/`](../global/README.md). Where that suite hand-builds classic +functions in Rust, this one drives real AMPL `.nl` files through the same CLI a +user hits — `pounce .nl solver_selection=global` — and checks the +**certified** objective against a *proven* global optimum. + +## What it is + +- **Problems:** the [GLOBALLib][globallib] collection (Floudas/GAMS nonconvex + NLP & QP test set, 2–9 variables, finite box bounds — the natural shape for + spatial B&B), as redistributed in AMPL `.mod` form by + [`ampl/global-optimization`][ampl-go]. +- **Subset:** only the models that have a **proven** global optimum + (`=opt=`) in MINLPLib's [`minlplib.solu`][solu] — so every check is against + ground truth, not a best-known heuristic value. That is **104** models + (1 GLOBALLib model, `nemhaus`, has no AMPL `.mod` and is excluded). +- **Ground truth:** [`optima.txt`](optima.txt) — one ` ` per + line, copied verbatim from the `=opt=` entries of `minlplib.solu`. + +## How the `.nl` are produced (reproducible) + +The `.nl` files live in the bench-data tree (Dropbox), next to every other +supplied tier (`lp/nl`, `qp/nl`, `vanderbei/nl`, …), at +`$POUNCE_BENCH_DATA/globallib/nl/`. They are *generated*, not committed: + +```sh +# clones ampl/global-optimization, runs AMPL `write` on each proven-optimum +# model, drops .nl into the bench-data globallib/nl dir +benchmarks/globallib/translate.sh # needs `ampl` on PATH +# or via the suite Makefile: +make -C benchmarks globallib-translate +``` + +`.mod → .nl` is done by AMPL itself (`model x.mod; option auxfiles rc; write gx;`), +the same translation the `mittelmann` tier uses. + +## Running + +```sh +make -C benchmarks globallib-run # 30s/problem cap (default) +make -C benchmarks globallib-run GLOBALLIB_TIMEOUT=120 +# or directly: +python3 benchmarks/globallib/run_globallib.py --timeout 30 --out report.json +python3 benchmarks/globallib/run_globallib.py ex2_1_1 ex8_1_1 # a few by name +python3 benchmarks/globallib/run_globallib.py --max-vars 4 # small only +``` + +The harness runs each model, parses the solver's certificate line +(`obj=… gap=… nodes=…`), and classifies the run: + +| verdict | meaning | +|---|---| +| **OK** | `Global optimum found` **and** certified obj matches the known optimum (abs-tol `1e-6` **or** rel-tol `1e-4`) | +| **WRONG** | solver certified optimality at a value that disagrees with the proven optimum — a **correctness bug** (none observed) | +| **TIMEOUT** | hit the per-problem wall-clock cap before closing the gap | +| **other** | node-limit / infeasible / crash | + +The OK check is **combined absolute+relative** (`--atol 1e-6`, `--tol 1e-4`): a +proven optimum of *exactly* 0 (common here — `ex14_1_*`, `ex9_2_3`) makes a pure +*relative* metric explode for a certified `~1e-7` that is in fact correct to +~1e-6 absolute. Accepting on either bound stops those near-zero optima from being +mis-flagged as `WRONG`. + +The distinction that matters: a `WRONG` would mean the solver *claimed* a +certified global optimum that is provably not one — the only true failure. A +`TIMEOUT`/node-limit means "didn't finish in budget," a performance limit, not +a soundness bug. The global solver has no node/time CLI flag yet, so the budget +is enforced by an external process timeout. + +## Notes on coverage + +- The global CLI path **caps unbounded variables to ±1e6** and warns; GLOBALLib + models are bounded, so this rarely triggers here. +- Expect honest performance limits at this stage: **concave** quadratics + (e.g. `ex2_1_*`, negative-definite Hessian) and **high-degree univariate + polynomials** (e.g. `ex4_1_2`, degree 16) are the hardest cases for the + McCormick/αBB relaxations and tend to time out — exactly the regime a + benchmark should expose. The headline correctness claim is that **no run + certifies a wrong optimum**. + +## Results + + +Latest run — Apple M-series, `--release`, **30 s/problem** cap, abs-tol `1e-6` / +rel-tol `1e-4`, 104 proven-optimum models: + +| outcome | count | meaning | +|---|--:|---| +| **certified correct global optimum** | **59** | matched the known optimum | +| **wrong certified value** | **0** | no soundness failure of this kind | +| **false "infeasible"** | **0** | no feasible problem certified infeasible | +| timed out (30 s) | 45 | performance limit, not a correctness failure | + +**Headline (good):** every run that returned a *value* certified the correct +optimum — **0 wrong objectives, 0 false-infeasible**. The remaining 45 are pure +performance timeouts, not soundness failures. + +### Fix: the `.nl` infinity-sentinel false-infeasible (4 problems recovered) + +An earlier run flagged **4** problems (`dispatch`, `ex2_1_10`, `ex3_1_1`, +`ex7_2_1`) as certified *infeasible* despite each having a proven finite optimum. +Root cause: AMPL writes a *missing* constraint bound as the sentinel `±1e19` +(not an IEEE infinity), and the global CLI was passing that sentinel straight +through as a **finite** bound. `pounce-global` treats a finite bound as an +*active* side, so a genuinely one-sided constraint (`g ≤ ub`) became spuriously +two-sided (`1e19 ≥ g ≤ ub`); at GLOBALLib scale the bilinear relaxation terms +(~1e7) against a 1e19 wall make the relaxed region read as empty. Fix: +`nl_constraint_bound()` in `crates/pounce-cli/src/dispatch.rs` maps `±1e19 → ±∞` +before the constraints reach the relaxation (unit-tested). All four now certify +their proven optima (`ex3_1_1 → 7049.249`, `dispatch → 3155.288`, +`ex2_1_10 → 49318.018`, `ex7_2_1 → 1227.226`) when given enough budget. Within +the 30 s screen three of them now close (`dispatch` 1.9 s, `ex2_1_10` 11.0 s, +`ex7_2_1` 13.5 s); only `ex3_1_1` still exceeds it (closes in ~113 s) and shows +as TIMEOUT above, with the certified value correct once it finishes. + +### Fixed: the `chance` false-infeasible (near-singular envelope tangent) + +`chance` (proven optimum `29.894`, solved by both the NLP filter-IPM and BARON) +used to be certified *infeasible* at the root node. The first hypothesis was an +FBBT reverse-propagation bug, but instrumenting the run cleared FBBT: at the root +box it correctly tightens to `[0,1]⁴` and never prunes a box containing the +optimum. The real fault was one level down, in the **relaxation LP**. + +The `√(Σ aᵢxᵢ²)` constraint relaxes through the `sqrt` envelope, whose concave +**over**-cuts are tangent lines `df = 0.5/√t`. At the singular endpoint `t = 0` +that slope is ≈`5e149` — a *valid* but astronomical cut. Feeding it into the +relaxation LP's constraint matrix wrecks the conditioning, and the HSDE conic IPM +responds by emitting a spurious Farkas infeasibility certificate (the tell: it +reported `obj ≈ 29.636`, right next to the true optimum, before declaring the LP +empty). So a perfectly feasible relaxation read as infeasible and the root node +was pruned. + +Fix (in `crates/pounce-global/src/relax.rs`): a `cut_is_finite` guard with +`MAX_CUT_MAGNITUDE = 1e8` drops any cut whose slope or intercept exceeds that +bound, in both `emit_univariate` and `sandwich_cuts`. Dropping a cut only +*loosens* the relaxation, so it is always sound — spatial branching re-tightens +the bound on later, better-conditioned subboxes. `chance` now certifies +`29.894378` in **3 nodes / 0.11 s**, and the better conditioning also flipped +`ex14_1_2` from TIMEOUT to OK. Regression-tested end-to-end +(`chance_constraint_is_not_falsely_infeasible`, `drops_astronomical_sqrt_tangent` +in `relax.rs`); the full GLOBALLib sweep shows zero OK→worse regressions. + +### Fixed: ill-conditioned relaxation LPs discarded their bound (+11 net) + +A cluster of division+log models — the **Wilson VLE consistency** set +`ex14_2_*` — timed out despite the relaxation *reaching* the correct objective +(`~1e-8`, the proven optimum `0`) at the root. The cause was one level down again, +in the conic IPM that solves the relaxation. These relaxations are severely +ill-scaled: the McCormick **division** columns `w = a/c` with a denominator box +bottoming out near `0` carry bounds up to `~1.2e6`, and the `ln` envelope tangents +at `x ≈ 1e-6` have slope `1/x ≈ 1e6`, so the inequality matrix spans +`|G| ∈ [1.8e-7, 1e6]` (condition number `~1e12`). On such data the HSDE driver's +embedded KKT factorization breaks down and returns `NumericalFailure` — and +`process_node` then has no choice but to fall back to the *inherited* parent +bound (`-∞` at the root). With no finite lower bound the node can never be pruned, +so the search runs to the wall-clock cap even though it sat on the optimum the +whole time. + +The HSDE driver deliberately skips Ruiz pre-scaling (it conditions itself through +per-cone NT scaling, like Clarabel/ECOS, and Ruiz composes badly with presolve on +the well-scaled NETLIB LPs). The fix keeps that happy path intact and adds a +**fallback**: when an HSDE solve returns `NumericalFailure` *and* equilibration is +enabled (the default), `solve_qp_ipm` retries the solve **once** with Ruiz +equilibration and accepts the result if it converges +(`crates/pounce-convex/src/ipm.rs`). This is sound by construction — the retry +only runs after the un-equilibrated solve has already failed, so there is no +well-conditioned case left to regress; equilibration either recovers a usable +bound or fails the same way and the original result stands. + +Net effect on the 30 s screen: **48 → 59 OK** (`+11`). Twelve models flipped +TIMEOUT→OK — the eight solvable `ex14_2_*` (each now **1–3 nodes**, e.g. +`ex14_2_1` in 0.29 s), plus `ex14_1_5`, `ex2_1_7`, `ex5_4_2`, and `ex7_3_2`. One +model, `ex9_2_6`, crossed the screen the other way (OK→TIMEOUT) — *not* a +correctness change: it still certifies its proven optimum `-1.0` (gap 0), but the +recovered bound reorders the best-first frontier and it now closes in ~41 s +instead of under 30 s (79 → 209 nodes — the familiar "a different valid bound +grows a different tree" anomaly of spatial B&B). `ex14_2_4` is the one `ex14_2_*` +that still times out: its equilibrated retry also fails to certify, a harder +conditioning case left for future work. Zero models certified a wrong value. + +### Timing context vs BARON (true global solver peer) + +To put pounce's solve times in context we cross-check against **BARON**, the +canonical spatial-branch-and-bound global solver, via AMPL's bundled build. +That build is **demo-limited** (≤10 variables / constraints for nonlinear +models), so it covers a 33-problem subset — but on that subset it is the gold +standard, and **every BARON optimum matches the proven value** (independent +confirmation of our ground truth). The BARON sweep is committed as +[`baron_sweep.tsv`](baron_sweep.tsv); reproduce the table with +`python3 compare_baron.py` (defaults to the committed `pounce.json` + +`baron_sweep.tsv`): + +| | BARON (demo) | pounce-global | +|---|---|---| +| certify proven optimum (33-subset) | 33/33 | 27/33 within 30 s | +| median wall | **0.061 s** | 0.434 s | +| max wall | 1.91 s | 21.06 s | + +So where both close the gap they agree to ~7 digits; pounce is currently **~1–2 +orders of magnitude slower** and times out on the harder ~1/5 of the subset. +BARON is a mature commercial solver — the gap is expected and the useful read is +the *shape*: pounce is competitive on the small/well-conditioned cases and loses +ground exactly where its relaxations are loosest. + +**Performance (the 45 timeouts):** the global solver has no node/time CLI flag, +so 30 s is a deliberately tight screen. The dominant slow cases are concave +quadratics (`ex2_1_*`, negative-definite Hessian → loose secant relaxation) and +high-degree polynomials (`ex4_1_2`, degree 16). A longer cap recovers more (e.g. +`ex3_1_1` closes in ~113 s), but the McCormick/αBB relaxation blow-up on these +shapes is the real lever, not wall clock. Re-run with `GLOBALLIB_TIMEOUT=120` to +measure the budget sensitivity. + +[globallib]: https://www.minlplib.org/ +[ampl-go]: https://github.com/ampl/global-optimization +[solu]: https://www.minlplib.org/minlplib.solu diff --git a/benchmarks/globallib/baron_sweep.tsv b/benchmarks/globallib/baron_sweep.tsv new file mode 100644 index 00000000..d95783b7 --- /dev/null +++ b/benchmarks/globallib/baron_sweep.tsv @@ -0,0 +1,105 @@ +stem proven baron_result baron_obj baron_time +camcns 0.0000000000 TIMEOUT +chakra -179.1335579000 TIMEOUT +chance 29.8943781600 solved 29.89437816 0.049439 +chem -47.7065148300 TIMEOUT +chenery -1058.9198560000 TIMEOUT +demo7 -1589042.3859999999 TIMEOUT +dispatch 3155.2879270000 solved 3155.287927 0.056289 +ex14_1_1 -0.0000000000 solved -1.272933924e-09 0.05816 +ex14_1_2 0.0000000000 TIMEOUT +ex14_1_3 -0.0000000000 solved 0 0.049412 +ex14_1_4 -0.0000000000 TIMEOUT +ex14_1_5 -0.0000000000 TIMEOUT +ex14_1_6 0.0000000000 TIMEOUT +ex14_1_7 0.0000000000 TIMEOUT +ex14_1_8 0.0000000000 TIMEOUT +ex14_1_9 -0.0000000000 solved 0 0.053259 +ex14_2_1 0.0000000000 TIMEOUT +ex14_2_2 0.0000000000 TIMEOUT +ex14_2_3 0.0000000000 TIMEOUT +ex14_2_4 0.0000000000 TIMEOUT +ex14_2_5 0.0000000000 TIMEOUT +ex14_2_6 0.0000000000 TIMEOUT +ex14_2_7 0.0000000000 TIMEOUT +ex14_2_8 0.0000000000 TIMEOUT +ex14_2_9 0.0000000000 TIMEOUT +ex2_1_1 -17.0000000000 solved -17 0.067605 +ex2_1_10 49318.0179600000 TIMEOUT +ex2_1_2 -213.0000000000 solved -213 0.046968 +ex2_1_3 -15.0000000000 TIMEOUT +ex2_1_4 -11.0000000000 failure 5.06119 +ex2_1_5 -268.0146315000 TIMEOUT +ex2_1_6 -39.0000000000 solved -39.00000005 0.146476 +ex2_1_7 -4150.4101340000 TIMEOUT +ex2_1_8 15639.0000000000 TIMEOUT +ex2_1_9 -0.3750000000 solved -0.3750000002 1.69586 +ex3_1_1 7049.2480210000 solved 7049.248019 0.413828 +ex3_1_2 -30665.5386700000 TIMEOUT +ex3_1_3 -310.0000000000 solved -310.0000001 0.057403 +ex3_1_4 -4.0000000000 solved -4.000000001 0.068156 +ex4_1_1 -7.4873123650 solved -7.487312365 0.064131 +ex4_1_2 -663.5000966000 TIMEOUT +ex4_1_3 -443.6717047000 solved -443.6717047 0.06548 +ex4_1_4 0.0000000000 solved 0 1.91013 +ex4_1_5 0.0000000000 solved 0 0.052907 +ex4_1_6 7.0000000000 solved 7 1.03424 +ex4_1_7 -7.5000000000 solved -7.5 0.056992 +ex4_1_8 -16.7388931800 solved -16.73889319 0.056283 +ex4_1_9 -5.5080132710 solved -5.508013274 0.062555 +ex5_2_2_case1 -400.0000000000 solved -400 0.060653 +ex5_2_2_case2 -600.0000000000 solved -600.0000001 0.066527 +ex5_2_2_case3 -750.0000000000 solved -750.0000001 0.062217 +ex5_2_4 -450.0000000000 solved -450.0000001 0.052786 +ex5_2_5 -3500.0000000000 TIMEOUT +ex5_3_2 1.8641594590 TIMEOUT +ex5_4_2 7512.2301450000 solved 7512.230145 0.072638 +ex5_4_3 4845.4620050000 TIMEOUT +ex5_4_4 10077.7754000000 TIMEOUT +ex6_1_1 -0.0201983117 TIMEOUT +ex6_1_3 -0.3524978012 TIMEOUT +ex6_1_4 -0.2945412877 limit 30.086 +ex6_2_14 -0.6953579346 TIMEOUT +ex7_2_1 1227.2260750000 TIMEOUT +ex7_2_2 -0.3888114343 solved -0.3888114343 0.131789 +ex7_2_4 3.9180102260 TIMEOUT +ex7_3_1 0.3417395531 TIMEOUT +ex7_3_2 1.0898639710 solved 1.089863971 0.053966 +ex7_3_3 0.8175290489 solved 0.8175290486 0.062952 +ex7_3_4 6.2746343370 TIMEOUT +ex8_1_1 -2.0218067830 TIMEOUT +ex8_1_6 -10.0860015000 TIMEOUT +ex8_1_7 0.0293108307 TIMEOUT +ex8_4_1 0.6185727593 TIMEOUT +ex9_1_1 -13.0000000000 TIMEOUT +ex9_1_2 -16.0000000000 TIMEOUT +ex9_1_4 -37.0000000000 TIMEOUT +ex9_1_5 -1.0000000000 TIMEOUT +ex9_1_8 -3.2500000000 TIMEOUT +ex9_2_2 99.9999693900 TIMEOUT +ex9_2_3 -0.0000000000 TIMEOUT +ex9_2_4 0.5000000000 solved 0.5 0.047792 +ex9_2_5 5.0000001460 solved 5 0.062366 +ex9_2_6 -1.0000000000 TIMEOUT +ex9_2_7 17.0000000000 TIMEOUT +ex9_2_8 1.5000000000 solved 1.5 0.045341 +gancns 0.0000000000 TIMEOUT +haverly -400.0000000000 TIMEOUT +himmel11 -30665.5386700000 solved -30665.53868 0.054877 +himmel16 -0.8660254038 TIMEOUT +house -4500.0000000000 TIMEOUT +hydro 4366944.1600000001 TIMEOUT +korcns 0.0000000000 TIMEOUT +launch 2257.7975580000 TIMEOUT +least 0.0000000000 TIMEOUT +minlphi 582.2361420000 TIMEOUT +otpop 0.0000000000 TIMEOUT +process -1161.3366020000 TIMEOUT +prolog -0.0000000000 TIMEOUT +qp3 0.0008093151 TIMEOUT +ramsey -2.4874686390 TIMEOUT +rbrock 0.0000000000 solved 0 0.048076 +torsion100 -0.4182392133 TIMEOUT +torsion25 -0.4175107296 TIMEOUT +torsion50 -0.4180876320 TIMEOUT +torsion75 -0.4181994007 TIMEOUT diff --git a/benchmarks/globallib/compare_baron.py b/benchmarks/globallib/compare_baron.py new file mode 100644 index 00000000..4f411394 --- /dev/null +++ b/benchmarks/globallib/compare_baron.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +"""Timing/correctness cross-check of pounce-global against BARON on GLOBALLib. + +BARON is a true spatial-branch-and-bound global solver — the canonical +reference for this Floudas/GAMS test set. Unlike HiGHS (an LP/convex-QP solver +whose AMPL driver only *piecewise-linearly approximates* nonconvex terms), +BARON certifies global optima, so it is both a correctness peer *and* a timing +yardstick. The BARON used here is AMPL's bundled **demo** build, capped at 10 +variables / 10 constraints for nonlinear models, so it can only solve the small +subset — for those it is the gold standard. + +Inputs: + * optima.txt — proven optima (MINLPLib ``=opt=``, ground truth) + * pounce.json (--pounce) — the pounce-global harness report (obj, wall, nodes) + * baron_sweep.tsv (--baron) — `stem proven result obj time` + +Reports, over the subset BARON's demo could solve, a side-by-side of the +certified objective (vs ground truth) and the wall-clock time, so the headline +is "where both certify, do they agree, and how do the solve times compare." +""" +import argparse +import json +from pathlib import Path + + +def load_optima(path): + opt = {} + for line in Path(path).read_text().splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + stem, val = line.split() + opt[stem] = float(val) + return opt + + +def load_baron(path): + rows = {} + for i, line in enumerate(Path(path).read_text().splitlines()): + if i == 0: + continue + parts = (line.split("\t") + ["", "", "", "", ""])[:5] + stem, _proven, result, obj, tim = parts + rows[stem] = { + "result": result, + "obj": _f(obj), + "time": _f(tim), + } + return rows + + +def load_pounce(path): + data = json.loads(Path(path).read_text()) + records = data if isinstance(data, list) else data.get("results", data) + rows = {} + for r in records: + stem = r.get("stem") or r.get("problem") + rows[stem] = { + "verdict": r.get("verdict") or r.get("status"), + "obj": r.get("obj"), + "wall": r.get("wall"), + "nodes": r.get("nodes"), + } + return rows + + +def _f(s): + try: + return float(s) + except (ValueError, TypeError): + return None + + +def rel_ok(a, b, tol): + if a is None or b is None: + return False + return abs(a - b) <= tol * max(1.0, abs(b)) + + +def main(): + ap = argparse.ArgumentParser() + here = Path(__file__).parent + ap.add_argument("--optima", default=str(here / "optima.txt")) + ap.add_argument("--pounce", default=str(here / "pounce.json")) + ap.add_argument("--baron", default=str(here / "baron_sweep.tsv")) + ap.add_argument("--tol", type=float, default=1e-4) + args = ap.parse_args() + + opt = load_optima(args.optima) + baron = load_baron(args.baron) + pounce = load_pounce(args.pounce) if Path(args.pounce).exists() else {} + + # The interesting set: problems BARON's demo actually solved. + solved = sorted(s for s in opt if baron.get(s, {}).get("result") == "solved") + + print(f"BARON solved {len(solved)}/{len(opt)} (demo: ≤10 vars/cons nonlinear)\n") + hdr = f"{'problem':<14}{'proven':>14}{'baron_obj':>14}{'baron_s':>9}" \ + f"{'pounce_obj':>14}{'pounce_s':>10} {'verdict'}" + print(hdr) + print("-" * len(hdr)) + + n_both_agree = 0 + baron_t = [] + pounce_t = [] + for stem in solved: + proven = opt[stem] + b = baron[stem] + p = pounce.get(stem, {}) + pobj, pwall, pv = p.get("obj"), p.get("wall"), p.get("verdict") + b_ok = rel_ok(b["obj"], proven, args.tol) + p_ok = rel_ok(pobj, proven, args.tol) if pobj is not None else False + if b_ok and p_ok: + n_both_agree += 1 + if b["time"] is not None: + baron_t.append(b["time"]) + if p_ok and pwall is not None: + pounce_t.append(pwall) + verdict = "both✓" if (b_ok and p_ok) else ( + f"pounce={pv}" if not p_ok else "baron-off") + ps = f"{pobj:.5g}" if isinstance(pobj, (int, float)) else "-" + pw = f"{pwall:.2f}" if isinstance(pwall, (int, float)) else "-" + print(f"{stem:<14}{proven:>14.5g}{b['obj']:>14.5g}{b['time']:>9.3f}" + f"{ps:>14}{pw:>10} {verdict}") + + print(f"\n{'='*70}") + print(f"on BARON's {len(solved)}-problem demo subset:") + print(f" both certify the proven optimum : {n_both_agree}/{len(solved)}") + if baron_t: + print(f" BARON wall: median {median(baron_t):.3f}s max {max(baron_t):.3f}s") + if pounce_t: + print(f" pounce wall: median {median(pounce_t):.3f}s max {max(pounce_t):.3f}s" + f" (n={len(pounce_t)} it also solved)") + + +def median(xs): + xs = sorted(xs) + n = len(xs) + return xs[n // 2] if n % 2 else (xs[n // 2 - 1] + xs[n // 2]) / 2 + + +if __name__ == "__main__": + main() diff --git a/benchmarks/globallib/compare_obbt_engines.py b/benchmarks/globallib/compare_obbt_engines.py new file mode 100755 index 00000000..7cd0eb22 --- /dev/null +++ b/benchmarks/globallib/compare_obbt_engines.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 +"""Cross-check the two OBBT LP engines on GLOBALLib. + +`pounce-global`'s spatial branch-and-bound tightens variable bounds with +optimality-based bound tightening (OBBT), and the LP solves inside OBBT can be +driven by either engine: + + * the default conic interior-point solver (`global_obbt_lp=ipm`), or + * the bounded-variable revised simplex (`global_obbt_lp=simplex`, gated behind + the off-by-default `simplex-obbt` cargo feature). + +OBBT only narrows boxes; it must never cut off the global optimum. A bug in +either LP engine can produce a too-tight (wrong) bound that prunes the true +minimizer, so the branch-and-bound then *certifies the wrong optimum*. This is a +silent soundness failure: the run reports "Global optimum found" with a bogus +value. + +This harness runs the GLOBALLib proven-optimum subset twice — once per engine — +and asserts the two engines certify the **same** optimum on every model either +of them solves. Concretely it fails (nonzero exit) when: + + 1. either engine returns a WRONG certified value (disagrees with the MINLPLib + proven optimum beyond tolerance), or + 2. both engines certify "Global optimum found" but disagree with **each + other** beyond tolerance. + +A model that one engine solves and the other times out is reported but is not a +failure (timeouts are a performance difference, not a soundness one). This is +the validation gate before graduating `simplex-obbt` to the default engine. + +Usage: + compare_obbt_engines.py [--timeout SECS] [--max-vars N] [--tol REL] + [--atol ABS] [--bin PATH] [--nl-dir DIR] + [--out-dir DIR] [stems...] + + # Or compare two already-generated run_globallib.py reports: + compare_obbt_engines.py --ipm-json ipm.json --simplex-json simplex.json + +Exit code 0 iff the two engines agree everywhere (soundness gate passes). +""" +import argparse +import json +import subprocess +import sys +from pathlib import Path + +HERE = Path(__file__).parent +RUNNER = HERE / "run_globallib.py" + + +def run_engine(args, engine_opts, out_path): + """Invoke run_globallib.py for one engine, returning its parsed rows.""" + cmd = [ + sys.executable, str(RUNNER), + "--bin", args.bin, + "--nl-dir", args.nl_dir, + "--timeout", str(args.timeout), + "--tol", str(args.tol), + "--atol", str(args.atol), + "--out", str(out_path), + ] + if args.max_vars is not None: + cmd += ["--max-vars", str(args.max_vars)] + if args.stems_file: + cmd += ["--stems-file", args.stems_file] + for o in engine_opts: + cmd += ["--opt", o] + cmd += args.stems + print(f"\n{'#'*72}\n# running: {' '.join(cmd)}\n{'#'*72}", flush=True) + subprocess.run(cmd, check=True) + return {r["stem"]: r for r in json.loads(Path(out_path).read_text())} + + +def agree(a, b, tol, atol): + """True if two certified objectives agree within abs OR rel tolerance.""" + if a is None or b is None: + return False + abs_err = abs(a - b) + rel = abs_err / max(abs(a), abs(b), 1e-6) + return abs_err <= atol or rel <= tol + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--bin", default="./target/release/pounce") + ap.add_argument("--nl-dir", + default=str(__import__("os").environ.get( + "POUNCE_BENCH_DATA", + str(Path.home() / "Dropbox/projects/pounce-bench-data")) + ) + "/globallib/nl") + ap.add_argument("--timeout", type=float, default=30.0) + ap.add_argument("--max-vars", type=int, default=None) + ap.add_argument("--tol", type=float, default=1e-4) + ap.add_argument("--atol", type=float, default=1e-6) + ap.add_argument("--out-dir", default="/tmp") + ap.add_argument("--ipm-json", default=None, + help="skip running; load this IPM report instead") + ap.add_argument("--simplex-json", default=None, + help="skip running; load this simplex report instead") + ap.add_argument("--stems-file", default=None, + help="newline-separated stem list (e.g. tiers/micro.txt)") + ap.add_argument("stems", nargs="*") + args = ap.parse_args() + + if args.ipm_json and args.simplex_json: + ipm = {r["stem"]: r for r in json.loads(Path(args.ipm_json).read_text())} + spx = {r["stem"]: r + for r in json.loads(Path(args.simplex_json).read_text())} + else: + out = Path(args.out_dir) + ipm = run_engine(args, ["global_obbt_lp=ipm"], out / "globallib_ipm.json") + spx = run_engine(args, ["global_obbt_lp=simplex"], + out / "globallib_simplex.json") + + stems = sorted(set(ipm) | set(spx)) + wrong = [] # engine certified a value disagreeing with proven optimum + disagree = [] # engines disagree with each other + both_ok = 0 + only_ipm = only_spx = neither = 0 + + print(f"\n{'='*94}") + print(f"{'problem':<14}{'known':>15}{'ipm':>16}{'simplex':>16} verdict") + print(f"{'='*94}") + for stem in stems: + ri, rs = ipm.get(stem), spx.get(stem) + known = (ri or rs).get("known") + oi = ri["obj"] if ri else None + os_ = rs["obj"] if rs else None + vi = ri["verdict"] if ri else "MISSING" + vs = rs["verdict"] if rs else "MISSING" + ok_i = vi == "OK" + ok_s = vs == "OK" + + notes = [] + if vi.startswith("WRONG"): + wrong.append((stem, "ipm", oi, known)) + notes.append(f"IPM {vi}") + if vs.startswith("WRONG"): + wrong.append((stem, "simplex", os_, known)) + notes.append(f"SIMPLEX {vs}") + if ok_i and ok_s: + both_ok += 1 + if not agree(oi, os_, args.tol, args.atol): + disagree.append((stem, oi, os_)) + notes.append("ENGINES DISAGREE") + elif ok_i and not ok_s: + only_ipm += 1 + notes.append(f"only IPM solved (spx={vs})") + elif ok_s and not ok_i: + only_spx += 1 + notes.append(f"only simplex solved (ipm={vi})") + else: + neither += 1 + + ci = f"{oi:.6e}" if oi is not None else "n/a" + cs = f"{os_:.6e}" if os_ is not None else "n/a" + kn = f"{known:.6e}" if known is not None else "n/a" + print(f"{stem:<14}{kn:>15}{ci:>16}{cs:>16} {'; '.join(notes)}") + + print(f"\n{'='*94}\nSUMMARY ({len(stems)} models, timeout={args.timeout}s)") + print(f" both engines certified correct optimum : {both_ok}") + print(f" only IPM solved (simplex timed out) : {only_ipm}") + print(f" only simplex solved (IPM timed out) : {only_spx}") + print(f" neither solved : {neither}") + print(f" WRONG certified values : {len(wrong)}") + print(f" engine-vs-engine disagreements : {len(disagree)}") + + if wrong: + print("\n *** WRONG (certified value disagrees with proven optimum) ***") + for stem, eng, got, known in wrong: + print(f" {stem} [{eng}]: certified {got} vs known {known}") + if disagree: + print("\n *** ENGINE DISAGREEMENT (ipm vs simplex) ***") + for stem, oi, os_ in disagree: + print(f" {stem}: ipm {oi} vs simplex {os_}") + + if wrong or disagree: + print("\nSOUNDNESS GATE: FAIL") + return 1 + print("\nSOUNDNESS GATE: PASS — both engines certify identical optima.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/benchmarks/globallib/optima.txt b/benchmarks/globallib/optima.txt new file mode 100644 index 00000000..d367bf7a --- /dev/null +++ b/benchmarks/globallib/optima.txt @@ -0,0 +1,104 @@ +camcns 0.0000000000 +chakra -179.1335579000 +chance 29.8943781600 +chem -47.7065148300 +chenery -1058.9198560000 +demo7 -1589042.3859999999 +dispatch 3155.2879270000 +ex14_1_1 -0.0000000000 +ex14_1_2 0.0000000000 +ex14_1_3 -0.0000000000 +ex14_1_4 -0.0000000000 +ex14_1_5 -0.0000000000 +ex14_1_6 0.0000000000 +ex14_1_7 0.0000000000 +ex14_1_8 0.0000000000 +ex14_1_9 -0.0000000000 +ex14_2_1 0.0000000000 +ex14_2_2 0.0000000000 +ex14_2_3 0.0000000000 +ex14_2_4 0.0000000000 +ex14_2_5 0.0000000000 +ex14_2_6 0.0000000000 +ex14_2_7 0.0000000000 +ex14_2_8 0.0000000000 +ex14_2_9 0.0000000000 +ex2_1_1 -17.0000000000 +ex2_1_10 49318.0179600000 +ex2_1_2 -213.0000000000 +ex2_1_3 -15.0000000000 +ex2_1_4 -11.0000000000 +ex2_1_5 -268.0146315000 +ex2_1_6 -39.0000000000 +ex2_1_7 -4150.4101340000 +ex2_1_8 15639.0000000000 +ex2_1_9 -0.3750000000 +ex3_1_1 7049.2480210000 +ex3_1_2 -30665.5386700000 +ex3_1_3 -310.0000000000 +ex3_1_4 -4.0000000000 +ex4_1_1 -7.4873123650 +ex4_1_2 -663.5000966000 +ex4_1_3 -443.6717047000 +ex4_1_4 0.0000000000 +ex4_1_5 0.0000000000 +ex4_1_6 7.0000000000 +ex4_1_7 -7.5000000000 +ex4_1_8 -16.7388931800 +ex4_1_9 -5.5080132710 +ex5_2_2_case1 -400.0000000000 +ex5_2_2_case2 -600.0000000000 +ex5_2_2_case3 -750.0000000000 +ex5_2_4 -450.0000000000 +ex5_2_5 -3500.0000000000 +ex5_3_2 1.8641594590 +ex5_4_2 7512.2301450000 +ex5_4_3 4845.4620050000 +ex5_4_4 10077.7754000000 +ex6_1_1 -0.0201983117 +ex6_1_3 -0.3524978012 +ex6_1_4 -0.2945412877 +ex6_2_14 -0.6953579346 +ex7_2_1 1227.2260750000 +ex7_2_2 -0.3888114343 +ex7_2_4 3.9180102260 +ex7_3_1 0.3417395531 +ex7_3_2 1.0898639710 +ex7_3_3 0.8175290489 +ex7_3_4 6.2746343370 +ex8_1_1 -2.0218067830 +ex8_1_6 -10.0860015000 +ex8_1_7 0.0293108307 +ex8_4_1 0.6185727593 +ex9_1_1 -13.0000000000 +ex9_1_2 -16.0000000000 +ex9_1_4 -37.0000000000 +ex9_1_5 -1.0000000000 +ex9_1_8 -3.2500000000 +ex9_2_2 99.9999693900 +ex9_2_3 -0.0000000000 +ex9_2_4 0.5000000000 +ex9_2_5 5.0000001460 +ex9_2_6 -1.0000000000 +ex9_2_7 17.0000000000 +ex9_2_8 1.5000000000 +gancns 0.0000000000 +haverly -400.0000000000 +himmel11 -30665.5386700000 +himmel16 -0.8660254038 +house -4500.0000000000 +hydro 4366944.1600000001 +korcns 0.0000000000 +launch 2257.7975580000 +least 0.0000000000 +minlphi 582.2361420000 +otpop 0.0000000000 +process -1161.3366020000 +prolog -0.0000000000 +qp3 0.0008093151 +ramsey -2.4874686390 +rbrock 0.0000000000 +torsion100 -0.4182392133 +torsion25 -0.4175107296 +torsion50 -0.4180876320 +torsion75 -0.4181994007 diff --git a/benchmarks/globallib/pounce.json b/benchmarks/globallib/pounce.json new file mode 100644 index 00000000..c6253d33 --- /dev/null +++ b/benchmarks/globallib/pounce.json @@ -0,0 +1,1146 @@ +[ + { + "stem": "camcns", + "n": 210, + "known": 0.0, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "chakra", + "n": 60, + "known": -179.1335579, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "chance", + "n": 4, + "known": 29.89437816, + "status": "Global optimum found.", + "obj": 29.89437805, + "gap": 0.0, + "nodes": 3, + "wall": 0.11095190048217773, + "verdict": "OK" + }, + { + "stem": "chem", + "n": 11, + "known": -47.70651483, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "chenery", + "n": 43, + "known": -1058.919856, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "demo7", + "n": 70, + "known": -1589042.386, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "dispatch", + "n": 4, + "known": 3155.287927, + "status": "Global optimum found.", + "obj": 3155.28792028, + "gap": 0.0, + "nodes": 25, + "wall": 2.1082260608673096, + "verdict": "OK" + }, + { + "stem": "ex14_1_1", + "n": 3, + "known": -0.0, + "status": "Global optimum found.", + "obj": -9.9e-07, + "gap": 0.0, + "nodes": 121, + "wall": 14.632992029190063, + "verdict": "OK" + }, + { + "stem": "ex14_1_2", + "n": 6, + "known": 0.0, + "status": "Global optimum found.", + "obj": 0.0, + "gap": 0.0, + "nodes": 39, + "wall": 23.12484884262085, + "verdict": "OK" + }, + { + "stem": "ex14_1_3", + "n": 3, + "known": -0.0, + "status": "Global optimum found.", + "obj": -1e-08, + "gap": 0.0, + "nodes": 5, + "wall": 0.10895800590515137, + "verdict": "OK" + }, + { + "stem": "ex14_1_4", + "n": 3, + "known": -0.0, + "status": "Global optimum found.", + "obj": -4.6e-07, + "gap": 0.0, + "nodes": 119, + "wall": 9.618108034133911, + "verdict": "OK" + }, + { + "stem": "ex14_1_5", + "n": 6, + "known": -0.0, + "status": "Global optimum found.", + "obj": -5e-08, + "gap": 0.0, + "nodes": 11, + "wall": 1.0782511234283447, + "verdict": "OK" + }, + { + "stem": "ex14_1_6", + "n": 9, + "known": 0.0, + "status": "Global optimum found.", + "obj": 3e-08, + "gap": 0.0, + "nodes": 3, + "wall": 0.8392512798309326, + "verdict": "OK" + }, + { + "stem": "ex14_1_7", + "n": 10, + "known": 0.0, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex14_1_8", + "n": 3, + "known": 0.0, + "status": "Global optimum found.", + "obj": -0.0, + "gap": 0.0, + "nodes": 7, + "wall": 0.24362397193908691, + "verdict": "OK" + }, + { + "stem": "ex14_1_9", + "n": 2, + "known": -0.0, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex14_2_1", + "n": 5, + "known": 0.0, + "status": "Global optimum found.", + "obj": -1e-08, + "gap": 0.0, + "nodes": 1, + "wall": 0.2945139408111572, + "verdict": "OK" + }, + { + "stem": "ex14_2_2", + "n": 4, + "known": 0.0, + "status": "Global optimum found.", + "obj": -1e-08, + "gap": 0.0, + "nodes": 1, + "wall": 0.06694293022155762, + "verdict": "OK" + }, + { + "stem": "ex14_2_3", + "n": 6, + "known": 0.0, + "status": "Global optimum found.", + "obj": -1e-08, + "gap": 0.0, + "nodes": 1, + "wall": 0.6793646812438965, + "verdict": "OK" + }, + { + "stem": "ex14_2_4", + "n": 5, + "known": 0.0, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex14_2_5", + "n": 4, + "known": 0.0, + "status": "Global optimum found.", + "obj": -1e-08, + "gap": 0.0, + "nodes": 3, + "wall": 2.7303459644317627, + "verdict": "OK" + }, + { + "stem": "ex14_2_6", + "n": 5, + "known": 0.0, + "status": "Global optimum found.", + "obj": -1e-08, + "gap": 0.0, + "nodes": 1, + "wall": 1.121068000793457, + "verdict": "OK" + }, + { + "stem": "ex14_2_7", + "n": 6, + "known": 0.0, + "status": "Global optimum found.", + "obj": -1e-08, + "gap": 0.0, + "nodes": 1, + "wall": 2.9285218715667725, + "verdict": "OK" + }, + { + "stem": "ex14_2_8", + "n": 4, + "known": 0.0, + "status": "Global optimum found.", + "obj": -1e-08, + "gap": 0.0, + "nodes": 1, + "wall": 0.430117130279541, + "verdict": "OK" + }, + { + "stem": "ex14_2_9", + "n": 4, + "known": 0.0, + "status": "Global optimum found.", + "obj": -1e-08, + "gap": 0.0, + "nodes": 1, + "wall": 0.553987979888916, + "verdict": "OK" + }, + { + "stem": "ex2_1_1", + "n": 5, + "known": -17.0, + "status": "Global optimum found.", + "obj": -17.00000221, + "gap": 0.0, + "nodes": 7, + "wall": 0.4002680778503418, + "verdict": "OK" + }, + { + "stem": "ex2_1_10", + "n": 20, + "known": 49318.01796, + "status": "Global optimum found.", + "obj": 49318.01744933, + "gap": 0.0, + "nodes": 5, + "wall": 18.01752805709839, + "verdict": "OK" + }, + { + "stem": "ex2_1_2", + "n": 6, + "known": -213.0, + "status": "Global optimum found.", + "obj": -213.0000022, + "gap": 0.0, + "nodes": 1, + "wall": 0.04917001724243164, + "verdict": "OK" + }, + { + "stem": "ex2_1_3", + "n": 13, + "known": -15.0, + "status": "Global optimum found.", + "obj": -15.00000032, + "gap": 0.0, + "nodes": 1, + "wall": 0.06622481346130371, + "verdict": "OK" + }, + { + "stem": "ex2_1_4", + "n": 6, + "known": -11.0, + "status": "Global optimum found.", + "obj": -11.0000002, + "gap": 0.0, + "nodes": 1, + "wall": 0.052351951599121094, + "verdict": "OK" + }, + { + "stem": "ex2_1_5", + "n": 10, + "known": -268.0146315, + "status": "Global optimum found.", + "obj": -268.01463861, + "gap": 0.0, + "nodes": 3, + "wall": 8.325256824493408, + "verdict": "OK" + }, + { + "stem": "ex2_1_6", + "n": 10, + "known": -39.0, + "status": "Global optimum found.", + "obj": -39.00000511, + "gap": 0.0, + "nodes": 5, + "wall": 0.9705379009246826, + "verdict": "OK" + }, + { + "stem": "ex2_1_7", + "n": 20, + "known": -4150.410134, + "status": "Global optimum found.", + "obj": -4150.41025808, + "gap": 0.0, + "nodes": 21, + "wall": 28.774846076965332, + "verdict": "OK" + }, + { + "stem": "ex2_1_8", + "n": 24, + "known": 15639.0, + "status": "Global optimum found.", + "obj": 15638.99989105, + "gap": 0.0, + "nodes": 5, + "wall": 5.96922492980957, + "verdict": "OK" + }, + { + "stem": "ex2_1_9", + "n": 10, + "known": -0.375, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex3_1_1", + "n": 8, + "known": 7049.248021, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex3_1_2", + "n": 5, + "known": -30665.53867, + "status": "Global optimum found.", + "obj": -30665.5388632, + "gap": 0.0, + "nodes": 3, + "wall": 0.2592799663543701, + "verdict": "OK" + }, + { + "stem": "ex3_1_3", + "n": 6, + "known": -310.0, + "status": "Global optimum found.", + "obj": -310.00000953, + "gap": 0.0, + "nodes": 3, + "wall": 0.763909101486206, + "verdict": "OK" + }, + { + "stem": "ex3_1_4", + "n": 3, + "known": -4.0, + "status": "Global optimum found.", + "obj": -4.00000016, + "gap": 0.0, + "nodes": 21, + "wall": 0.6158857345581055, + "verdict": "OK" + }, + { + "stem": "ex4_1_1", + "n": 1, + "known": -7.487312365, + "status": "Global optimum found.", + "obj": -7.48731236, + "gap": 0.0, + "nodes": 19, + "wall": 0.14807510375976562, + "verdict": "OK" + }, + { + "stem": "ex4_1_2", + "n": 1, + "known": -663.5000966, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex4_1_3", + "n": 1, + "known": -443.6717047, + "status": "Global optimum found.", + "obj": -443.67170474, + "gap": 0.0, + "nodes": 29, + "wall": 0.20477700233459473, + "verdict": "OK" + }, + { + "stem": "ex4_1_4", + "n": 1, + "known": 0.0, + "status": "Global optimum found.", + "obj": 0.0, + "gap": 0.0, + "nodes": 65, + "wall": 0.37479305267333984, + "verdict": "OK" + }, + { + "stem": "ex4_1_5", + "n": 2, + "known": 0.0, + "status": "Global optimum found.", + "obj": 0.0, + "gap": 0.0, + "nodes": 117, + "wall": 2.876505136489868, + "verdict": "OK" + }, + { + "stem": "ex4_1_6", + "n": 1, + "known": 7.0, + "status": "Global optimum found.", + "obj": 7.0, + "gap": 0.0, + "nodes": 51, + "wall": 0.3832576274871826, + "verdict": "OK" + }, + { + "stem": "ex4_1_7", + "n": 1, + "known": -7.5, + "status": "Global optimum found.", + "obj": -7.5, + "gap": 0.0, + "nodes": 7, + "wall": 0.044551849365234375, + "verdict": "OK" + }, + { + "stem": "ex4_1_8", + "n": 2, + "known": -16.73889318, + "status": "Global optimum found.", + "obj": -16.73889318, + "gap": 0.0, + "nodes": 3, + "wall": 0.029237985610961914, + "verdict": "OK" + }, + { + "stem": "ex4_1_9", + "n": 2, + "known": -5.508013271, + "status": "Global optimum found.", + "obj": -5.50801353, + "gap": 0.0, + "nodes": 41, + "wall": 0.5273418426513672, + "verdict": "OK" + }, + { + "stem": "ex5_2_2_case1", + "n": 9, + "known": -400.0, + "status": "Global optimum found.", + "obj": -400.00000413, + "gap": 0.0, + "nodes": 9, + "wall": 0.7591931819915771, + "verdict": "OK" + }, + { + "stem": "ex5_2_2_case2", + "n": 9, + "known": -600.0, + "status": "Global optimum found.", + "obj": -600.00000622, + "gap": 0.0, + "nodes": 7, + "wall": 1.532594919204712, + "verdict": "OK" + }, + { + "stem": "ex5_2_2_case3", + "n": 9, + "known": -750.0, + "status": "Global optimum found.", + "obj": -750.00000754, + "gap": 0.0, + "nodes": 7, + "wall": 0.6596837043762207, + "verdict": "OK" + }, + { + "stem": "ex5_2_4", + "n": 7, + "known": -450.0, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex5_2_5", + "n": 32, + "known": -3500.0, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex5_3_2", + "n": 22, + "known": 1.864159459, + "status": "Global optimum found.", + "obj": 1.86415946, + "gap": 0.0, + "nodes": 9, + "wall": 4.477342844009399, + "verdict": "OK" + }, + { + "stem": "ex5_4_2", + "n": 8, + "known": 7512.230145, + "status": "Global optimum found.", + "obj": 7512.23028348, + "gap": 0.007121, + "nodes": 247, + "wall": 21.06121802330017, + "verdict": "OK" + }, + { + "stem": "ex5_4_3", + "n": 16, + "known": 4845.462005, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex5_4_4", + "n": 27, + "known": 10077.7754, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex6_1_1", + "n": 8, + "known": -0.0201983117, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex6_1_3", + "n": 12, + "known": -0.3524978012, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex6_1_4", + "n": 6, + "known": -0.2945412877, + "status": "Global optimum found.", + "obj": -0.29454349, + "gap": 0.0, + "nodes": 105, + "wall": 21.577629804611206, + "verdict": "OK" + }, + { + "stem": "ex6_2_14", + "n": 4, + "known": -0.6953579346, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex7_2_1", + "n": 7, + "known": 1227.226075, + "status": "Global optimum found.", + "obj": 1227.22568126, + "gap": 0.0, + "nodes": 11, + "wall": 13.426575899124146, + "verdict": "OK" + }, + { + "stem": "ex7_2_2", + "n": 6, + "known": -0.3888114343, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex7_2_4", + "n": 8, + "known": 3.918010226, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex7_3_1", + "n": 4, + "known": 0.3417395531, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex7_3_2", + "n": 4, + "known": 1.089863971, + "status": "Global optimum found.", + "obj": 1.08986392, + "gap": 0.0, + "nodes": 175, + "wall": 20.677563905715942, + "verdict": "OK" + }, + { + "stem": "ex7_3_3", + "n": 5, + "known": 0.8175290489, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex7_3_4", + "n": 12, + "known": 6.274634337, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex8_1_1", + "n": 2, + "known": -2.021806783, + "status": "Global optimum found.", + "obj": -2.0218068, + "gap": 0.0, + "nodes": 5, + "wall": 0.044663190841674805, + "verdict": "OK" + }, + { + "stem": "ex8_1_6", + "n": 2, + "known": -10.0860015, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex8_1_7", + "n": 5, + "known": 0.0293108307, + "status": "Global optimum found.", + "obj": 0.02931083, + "gap": 0.0, + "nodes": 29, + "wall": 2.4908838272094727, + "verdict": "OK" + }, + { + "stem": "ex8_4_1", + "n": 22, + "known": 0.6185727593, + "status": "Global optimum found.", + "obj": 0.61857249, + "gap": 0.0, + "nodes": 17, + "wall": 17.146764755249023, + "verdict": "OK" + }, + { + "stem": "ex9_1_1", + "n": 13, + "known": -13.0, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex9_1_2", + "n": 10, + "known": -16.0, + "status": "Global optimum found.", + "obj": -16.00000001, + "gap": 0.0, + "nodes": 3, + "wall": 0.7734110355377197, + "verdict": "OK" + }, + { + "stem": "ex9_1_4", + "n": 10, + "known": -37.0, + "status": "Global optimum found.", + "obj": -37.0, + "gap": 0.0, + "nodes": 3, + "wall": 0.4052093029022217, + "verdict": "OK" + }, + { + "stem": "ex9_1_5", + "n": 13, + "known": -1.0, + "status": "Global optimum found.", + "obj": -1.0, + "gap": 0.0, + "nodes": 5, + "wall": 1.9099478721618652, + "verdict": "OK" + }, + { + "stem": "ex9_1_8", + "n": 14, + "known": -3.25, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex9_2_2", + "n": 10, + "known": 99.99996939, + "status": "Global optimum found.", + "obj": 99.99999992, + "gap": 0.0, + "nodes": 3, + "wall": 1.202683925628662, + "verdict": "OK" + }, + { + "stem": "ex9_2_3", + "n": 16, + "known": -0.0, + "status": "Global optimum found.", + "obj": -3e-08, + "gap": 0.0, + "nodes": 11, + "wall": 5.527990102767944, + "verdict": "OK" + }, + { + "stem": "ex9_2_4", + "n": 8, + "known": 0.5, + "status": "Global optimum found.", + "obj": 0.5, + "gap": 0.0, + "nodes": 13, + "wall": 0.43445920944213867, + "verdict": "OK" + }, + { + "stem": "ex9_2_5", + "n": 8, + "known": 5.000000146, + "status": "Global optimum found.", + "obj": 5.0, + "gap": 0.0, + "nodes": 29, + "wall": 1.3461930751800537, + "verdict": "OK" + }, + { + "stem": "ex9_2_6", + "n": 16, + "known": -1.0, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ex9_2_7", + "n": 10, + "known": 17.0, + "status": "Global optimum found.", + "obj": 16.99999999, + "gap": 0.0, + "nodes": 3, + "wall": 1.0829360485076904, + "verdict": "OK" + }, + { + "stem": "ex9_2_8", + "n": 3, + "known": 1.5, + "status": "Global optimum found.", + "obj": 1.49999999, + "gap": 0.0, + "nodes": 1, + "wall": 0.012163877487182617, + "verdict": "OK" + }, + { + "stem": "gancns", + "n": 237, + "known": 0.0, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "haverly", + "n": 12, + "known": -400.0, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "himmel11", + "n": 9, + "known": -30665.53867, + "status": "Global optimum found.", + "obj": -30665.53935624, + "gap": 0.0, + "nodes": 3, + "wall": 0.33977603912353516, + "verdict": "OK" + }, + { + "stem": "himmel16", + "n": 13, + "known": -0.8660254038, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "house", + "n": 8, + "known": -4500.0, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "hydro", + "n": 30, + "known": 4366944.16, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "korcns", + "n": 64, + "known": 0.0, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "launch", + "n": 38, + "known": 2257.797558, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "least", + "n": 3, + "known": 0.0, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "minlphi", + "n": 26, + "known": 582.236142, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "otpop", + "n": 60, + "known": 0.0, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "process", + "n": 10, + "known": -1161.336602, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "prolog", + "n": 20, + "known": -0.0, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "qp3", + "n": 100, + "known": 0.0008093151, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "ramsey", + "n": 29, + "known": -2.487468639, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "rbrock", + "n": 2, + "known": 0.0, + "status": "Global optimum found.", + "obj": 0.0, + "gap": 0.0, + "nodes": 1, + "wall": 0.02007293701171875, + "verdict": "OK" + }, + { + "stem": "torsion100", + "n": 5004, + "known": -0.4182392133, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "torsion25", + "n": 1254, + "known": -0.4175107296, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "torsion50", + "n": 2504, + "known": -0.418087632, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + }, + { + "stem": "torsion75", + "n": 3754, + "known": -0.4181994007, + "status": "TIMEOUT", + "obj": null, + "gap": null, + "nodes": null, + "wall": 30.0, + "verdict": "TIMEOUT" + } +] \ No newline at end of file diff --git a/benchmarks/globallib/run_globallib.py b/benchmarks/globallib/run_globallib.py new file mode 100755 index 00000000..416dcc5e --- /dev/null +++ b/benchmarks/globallib/run_globallib.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +"""GLOBALLib global-optimization benchmark harness for `pounce-global`. + +Drives `pounce .nl solver_selection=global` on the GLOBALLib subset that +has a *proven* global optimum (MINLPLib `=opt=`), and checks the **certified** +objective the spatial branch-and-bound solver returns against that ground truth. + +Unlike the synthetic Rust suite (`crates/pounce-global/examples/benchmark.rs`), +this runs real AMPL `.nl` files through the same CLI path users hit, so it tests +the whole pipeline: parse -> classify -> bound-capping -> B&B -> certificate. + +Ground truth lives in `optima.txt` (one ` ` per line, from +MINLPLib's `minlplib.solu`, `=opt=` entries only). The `.nl` files are supplied +via the bench-data tree (see README for the AMPL translation recipe). + +Usage: + run_globallib.py [--bin PATH] [--nl-dir DIR] [--timeout SECS] + [--max-vars N] [--out report.json] [stems...] + +Default nl-dir: $POUNCE_BENCH_DATA/globallib/nl or + ~/Dropbox/projects/pounce-bench-data/globallib/nl +""" +import argparse +import json +import os +import re +import subprocess +import time +from pathlib import Path + +# "POUNCE (global B&B, pounce-global): obj=.. gap=.. nodes=N peak_frontier=.." +RESULT_RE = re.compile( + r"obj=(?P[-+0-9.eE]+)\s+gap=(?P[-+0-9.eE]+)\s+nodes=(?P\d+)" +) +STATUS_RE = re.compile(r"pounce-global\):\s*(?P[^.]+\.)") + + +def default_nl_dir(): + env = os.environ.get("POUNCE_BENCH_DATA") + if env: + return Path(env) / "globallib" / "nl" + return Path.home() / "Dropbox/projects/pounce-bench-data/globallib/nl" + + +def load_optima(path): + opt = {} + for line in Path(path).read_text().splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + stem, val = line.split() + opt[stem] = float(val) + return opt + + +def run_one(bin_path, nl, timeout, extra_opts=()): + start = time.time() + try: + p = subprocess.run( + [bin_path, str(nl), "solver_selection=global", *extra_opts], + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + timeout=timeout, text=True, + ) + except subprocess.TimeoutExpired: + return {"status": "TIMEOUT", "obj": None, "gap": None, + "nodes": None, "wall": timeout} + wall = time.time() - start + out = p.stdout + rec = {"status": None, "obj": None, "gap": None, "nodes": None, "wall": wall} + ms = STATUS_RE.search(out) + if ms: + rec["status"] = ms.group("msg").strip() + mr = RESULT_RE.search(out) + if mr: + rec["obj"] = float(mr.group("obj")) + rec["gap"] = float(mr.group("gap")) + rec["nodes"] = int(mr.group("nodes")) + if rec["status"] is None: + # crash / panic / no result line + rec["status"] = f"NO-RESULT(rc={p.returncode})" + return rec + + +def var_count(nl): + try: + with open(nl) as fh: + fh.readline() + return int(fh.readline().split()[0]) + except Exception: + return None + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--bin", default="./target/release/pounce") + ap.add_argument("--nl-dir", default=str(default_nl_dir())) + ap.add_argument("--optima", default=str(Path(__file__).with_name("optima.txt"))) + ap.add_argument("--timeout", type=float, default=30.0) + ap.add_argument("--max-vars", type=int, default=None, + help="skip problems with more than this many variables") + ap.add_argument("--tol", type=float, default=1e-4, + help="relative tolerance for the certified-vs-known check") + ap.add_argument("--atol", type=float, default=1e-6, + help="absolute tolerance floor (so a proven optimum of 0 is " + "not failed for a correct certified value of ~1e-7)") + ap.add_argument("--out", default=None) + ap.add_argument("--opt", action="append", default=[], metavar="KEY=VALUE", + help="extra `key=value` option passed to pounce (repeatable), " + "e.g. --opt global_obbt_lp=simplex") + ap.add_argument("--stems-file", default=None, + help="newline-separated stem list (e.g. a dev tier under " + "tiers/); '#' comments and blanks ignored. Combined " + "with any positional stems.") + ap.add_argument("stems", nargs="*", help="restrict to these stems") + args = ap.parse_args() + + nl_dir = Path(args.nl_dir) + optima = load_optima(args.optima) + stems = list(args.stems) + if args.stems_file: + for line in Path(args.stems_file).read_text().splitlines(): + line = line.split("#", 1)[0].strip() + if line: + stems.append(line) + stems = stems or sorted(optima) + + rows = [] + print(f"{'problem':<14}{'n':>4} {'status':<24}{'certified':>16}" + f"{'known':>16}{'gap':>9}{'nodes':>8}{'s':>8} verdict") + n_ok = n_to = n_wrong = n_other = 0 + for stem in stems: + nl = nl_dir / f"{stem}.nl" + known = optima.get(stem) + if not nl.exists() or known is None: + continue + nv = var_count(nl) + if args.max_vars is not None and nv is not None and nv > args.max_vars: + continue + rec = run_one(args.bin, nl, args.timeout, args.opt) + cert = rec["obj"] + # verdict + if rec["status"] == "TIMEOUT": + verdict, n_to = "TIMEOUT", n_to + 1 + elif "Global optimum found" in (rec["status"] or "") and cert is not None: + # Combined absolute+relative check: a proven optimum of exactly 0 + # (common here — ex14_1_*, ex9_2_3) makes a pure *relative* metric + # explode for a certified value of ~1e-7 that is in fact correct to + # ~1e-6 absolute. Accept when EITHER the absolute gap is within the + # floor OR the relative gap is within tol. + abs_err = abs(cert - known) + rel = abs_err / max(abs(known), abs(cert), 1e-6) + if abs_err <= args.atol or rel <= args.tol: + verdict, n_ok = "OK", n_ok + 1 + else: + verdict, n_wrong = f"WRONG(rel={rel:.1e})", n_wrong + 1 + else: + verdict, n_other = rec["status"] or "??", n_other + 1 + rows.append({"stem": stem, "n": nv, "known": known, **rec, + "verdict": verdict}) + c = f"{cert:.6e}" if cert is not None else "n/a" + g = f"{rec['gap']:.1e}" if rec["gap"] is not None else "n/a" + print(f"{stem:<14}{str(nv):>4} {(rec['status'] or '')[:23]:<24}{c:>16}" + f"{known:>16.6e}{g:>9}{str(rec['nodes']):>8}{rec['wall']:>8.2f} {verdict}") + + total = len(rows) + print(f"\n{'='*70}\nSUMMARY ({total} problems, timeout={args.timeout}s, " + f"tol={args.tol})\n{'='*70}") + print(f" certified correct global optimum : {n_ok}") + print(f" timed out : {n_to}") + print(f" wrong certified value : {n_wrong}") + print(f" other (node-limit/infeas/crash) : {n_other}") + if n_wrong: + print("\n *** WRONG (certified value disagrees with proven optimum) ***") + for r in rows: + if r["verdict"].startswith("WRONG"): + print(f" {r['stem']}: certified {r['obj']} vs known {r['known']}") + + if args.out: + Path(args.out).write_text(json.dumps(rows, indent=2)) + print(f"\nwrote {args.out}") + + +if __name__ == "__main__": + main() diff --git a/benchmarks/globallib/tiers/fast.txt b/benchmarks/globallib/tiers/fast.txt new file mode 100644 index 00000000..d9d9d72a --- /dev/null +++ b/benchmarks/globallib/tiers/fast.txt @@ -0,0 +1,42 @@ +# fast tier — every GLOBALLib proven-optimum model the IPM-OBBT engine +# solves in <1s (wall from the 2026-06-07 30s sweep). ~12s total per engine: +# the broader regression set once a change passes tiers/micro.txt. +# +# python3 run_globallib.py --stems-file tiers/fast.txt --timeout 10 +# python3 compare_obbt_engines.py --stems-file tiers/fast.txt --timeout 10 +# +# stem n nodes wall(IPM) +ex9_2_8 # n=3 1n 0.01s +rbrock # n=2 1n 0.02s +ex2_1_2 # n=6 1n 0.03s +ex4_1_8 # n=2 3n 0.03s +ex8_1_1 # n=2 5n 0.04s +ex4_1_7 # n=1 7n 0.05s +ex2_1_4 # n=6 1n 0.06s +ex14_2_2 # n=4 1n 0.07s +ex2_1_3 # n=13 1n 0.07s +chance # n=4 3n 0.11s +ex14_1_3 # n=3 5n 0.11s +ex4_1_1 # n=1 19n 0.13s +ex4_1_3 # n=1 29n 0.18s +ex14_1_8 # n=3 7n 0.23s +ex3_1_2 # n=5 3n 0.24s +ex14_2_1 # n=5 1n 0.30s +himmel11 # n=9 3n 0.32s +ex4_1_6 # n=1 51n 0.38s +ex4_1_4 # n=1 65n 0.40s +ex9_2_4 # n=8 11n 0.40s +ex2_1_1 # n=5 7n 0.40s +ex9_1_4 # n=10 3n 0.42s +ex14_2_8 # n=4 1n 0.44s +ex14_1_4 # n=3 7n 0.50s +ex4_1_9 # n=2 41n 0.54s +ex14_2_9 # n=4 1n 0.57s +ex3_1_4 # n=3 21n 0.62s +ex5_2_2_case3 # n=9 7n 0.65s +ex14_2_3 # n=6 1n 0.69s +ex3_1_3 # n=6 3n 0.77s +ex9_1_2 # n=10 3n 0.82s +ex5_2_2_case1 # n=9 9n 0.91s +ex14_1_6 # n=9 3n 0.95s +ex2_1_6 # n=10 5n 0.96s diff --git a/benchmarks/globallib/tiers/micro.txt b/benchmarks/globallib/tiers/micro.txt new file mode 100644 index 00000000..d5408f71 --- /dev/null +++ b/benchmarks/globallib/tiers/micro.txt @@ -0,0 +1,27 @@ +# micro tier — the inner dev loop for wiring the global solver's per-node +# pieces (OBBT sweep, simplex/IPM warm-starts, relaxation, branching/incumbent). +# Curated to run in ~1-2s total per engine so the edit→run loop is seconds. +# Wall times below are from the IPM-OBBT engine (30s sweep, 2026-06-07). +# +# Run: python3 run_globallib.py --stems-file tiers/micro.txt --timeout 10 +# Both engines + soundness gate: +# python3 compare_obbt_engines.py --stems-file tiers/micro.txt --timeout 10 +# +# Keep every entry SUB-SECOND. If a change makes one slow, that's the signal — +# don't pad the timeout, fix the regression or move it to fast.txt. + +# --- root-only: exercises OBBT 2n-LP sweep + relaxation + local solve, no tree +rbrock # n=2 1 node 0.02s +ex2_1_2 # n=6 1 node 0.03s +ex14_2_2 # n=4 1 node 0.07s +ex2_1_3 # n=13 1 node 0.07s (widest root OBBT sweep in the tier) + +# --- branching: also exercises tree / branch-var select / incumbent update +ex4_1_8 # n=2 3 nodes 0.03s +ex8_1_1 # n=2 5 nodes 0.04s +ex4_1_7 # n=1 7 nodes 0.05s +chance # n=4 3 nodes 0.11s +ex4_1_1 # n=1 19 nodes 0.13s +ex4_1_3 # n=1 29 nodes 0.18s (deepest tree in the tier) +ex2_1_1 # n=5 7 nodes 0.40s (warm-start across more vars) +ex9_2_4 # n=8 11 nodes 0.40s (widest sweep × branching) diff --git a/benchmarks/globallib/translate.sh b/benchmarks/globallib/translate.sh new file mode 100755 index 00000000..b4760c8c --- /dev/null +++ b/benchmarks/globallib/translate.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# Regenerate the GLOBALLib `.nl` benchmark files from their AMPL `.mod` sources. +# +# The benchmark set is the GLOBALLib subset that has a *proven* global optimum +# (MINLPLib `=opt=`). The `.mod` files come from ampl/global-optimization; the +# `.nl` files are produced by AMPL's `write` and dropped into the bench-data +# tree (Dropbox), the same place every other supplied benchmark tier lives. +# +# Requirements: an `ampl` on PATH (or set $AMPL), and the optima reference +# (`optima.txt`) that ships next to this script. +# +# Usage: benchmarks/globallib/translate.sh [out_nl_dir] +set -euo pipefail + +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +AMPL="${AMPL:-ampl}" +OUT="${1:-${POUNCE_BENCH_DATA:-$HOME/Dropbox/projects/pounce-bench-data}/globallib/nl}" +WORK="$(mktemp -d)" +trap 'rm -rf "$WORK"' EXIT + +command -v "$AMPL" >/dev/null || { echo "error: no '$AMPL' on PATH (set \$AMPL)"; exit 1; } +mkdir -p "$OUT" + +echo "cloning ampl/global-optimization (.mod sources)..." +git clone --depth 1 https://github.com/ampl/global-optimization.git "$WORK/go" >/dev/null 2>&1 +MOD="$WORK/go/global" + +echo "translating $(wc -l < "$HERE/optima.txt") models -> $OUT" + n=0; fail=0 +while read -r stem _val; do + [ -n "$stem" ] || continue + src="$MOD/$stem.mod" + if [ ! -f "$src" ]; then echo " MISSING .mod: $stem"; fail=$((fail+1)); continue; fi + ( cd "$OUT" && printf 'model %s;\noption auxfiles rc;\nwrite g%s;\n' "$src" "$stem" \ + | "$AMPL" >/dev/null 2>&1 ) + if [ -f "$OUT/$stem.nl" ]; then n=$((n+1)); else echo " FAIL: $stem"; fail=$((fail+1)); fi +done < "$HERE/optima.txt" +echo "done: $n translated, $fail failed" diff --git a/benchmarks/scripts/compare_pounce_clarabel.py b/benchmarks/scripts/compare_pounce_clarabel.py new file mode 100644 index 00000000..affcd75c --- /dev/null +++ b/benchmarks/scripts/compare_pounce_clarabel.py @@ -0,0 +1,482 @@ +#!/usr/bin/env python3 +"""Compare POUNCE's convex LP/QP IPM against Clarabel on the LP (netlib + +Maros-Meszaros) and QP (Maros-Meszaros) benchmark suites. + +POUNCE numbers are read from the canonical reports produced by the .nl runs +(``benchmarks/lp/pounce.json``, ``benchmarks/qp/pounce.json``). Clarabel is run +fresh here, in-process, on the *same* source problems and joined by name. + +Clarabel has no model-file reader, so each instance is converted to matrices: + + QP (.mat) : min 1/2 x'Px + q'x s.t. l <= Ax <= u (+ const r) + LP (e[mps]) : min c'x s.t. rl <= Ax <= ru, cl <= x <= cu + +Two-sided rows / finite variable bounds become a ZeroCone (equalities) plus a +NonnegativeCone (one-sided inequalities), in that order. + +LP sources are emps-compressed (Maros-Meszaros additionally gzipped); we build +the repo's ``benchmarks/lp/mps/emps.c`` decompressor and pipe through HiGHS. + +Usage: + python3 benchmarks/scripts/compare_pounce_clarabel.py [--class lp|qp|both] + [--limit N] + [--time-limit SECS] + [--from-json] + [--check] +Out: + benchmarks/clarabel_compare_{lp,qp}.json per-problem records + benchmarks/clarabel_compare.md side-by-side markdown report + +--from-json skip the live run; load the per-problem records from the existing + benchmarks/clarabel_compare_{lp,qp}.json (regression gate / CI). +--check exit nonzero if any *genuine* objective disagreement remains. A + disagreement counts only when BOTH solvers report a hard solve + (pounce SolveSucceeded AND clarabel Solved -- AlmostSolved and + SolvedToAcceptableLevel are excluded as not-certified) yet their + objectives differ by more than the numpy-isclose band + |a-b| > atol + rtol*max(|a|,|b|) (rtol=atol=1e-3). This flags real + wrong-answer bugs while tolerating convergence-point slack. +""" +import argparse +import glob +import gzip +import json +import math +import os +import subprocess +import sys +import tempfile +import time + +import numpy as np +import scipy.io as sio +import scipy.sparse as sp + +import clarabel + +HERE = os.path.dirname(os.path.abspath(__file__)) +BENCH = os.path.dirname(HERE) +ROOT = os.path.dirname(BENCH) + +INF = 1e20 +EMPS_SRC = os.path.join(BENCH, "lp", "mps", "emps.c") +EMPS_BIN = os.path.join(tempfile.gettempdir(), "pounce_emps") +POUNCE_BIN = os.path.join(ROOT, "target", "release", "pounce") +MPS_TO_NL = os.path.join(BENCH, "lp", "mps_to_nl.py") + +# POUNCE statuses that count as a successful optimal solve. POUNCE is run LIVE +# (the committed pounce.json reports were found to be partially stale), so we +# read its --json-output: solution.status + statistics.{final_objective, +# iteration_count, total_wallclock_time_secs}. +POUNCE_OK = {"SolveSucceeded", "SolvedToAcceptableLevel"} +CLARABEL_OK = {"Solved", "AlmostSolved"} + +# Lazily imported single-file .mat -> Pyomo model converter from generate_nl.py. +_qp_gen = None + + +def qp_gen(): + global _qp_gen + if _qp_gen is None: + import importlib.util + spec = importlib.util.spec_from_file_location( + "qp_generate_nl", os.path.join(BENCH, "qp", "generate_nl.py")) + _qp_gen = importlib.util.module_from_spec(spec) + spec.loader.exec_module(_qp_gen) + return _qp_gen + + +# ---------------------------------------------------------------------------- +# Matrix assembly: l <= Ax <= u (+ box) -> Clarabel (Zero then Nonneg cones). +# ---------------------------------------------------------------------------- +def build_cones(A, lo, hi, P, q, eq_tol=1e-9): + """Return (P, q, G, b, cones) for min 1/2 x'Px+q'x s.t. lo<=Ax<=hi. + + Variable bounds, if any, should already be folded into A/lo/hi by the + caller (LP folds an identity block; QP has them inside A).""" + A = A.tocsr() + lo = np.asarray(lo, float) + hi = np.asarray(hi, float) + + fin_lo = lo > -INF + fin_hi = hi < INF + eq = fin_lo & fin_hi & (np.abs(hi - lo) <= eq_tol) + only_hi = fin_hi & ~eq + only_lo = fin_lo & ~eq + + blocks, rhs = [], [] + # ZeroCone block: equalities Ax = lo. + n_zero = int(eq.sum()) + if n_zero: + blocks.append(A[eq]) + rhs.append(lo[eq]) + # NonnegativeCone block: Ax <= hi and -Ax <= -lo. + n_nn = 0 + if only_hi.any(): + blocks.append(A[only_hi]) + rhs.append(hi[only_hi]) + n_nn += int(only_hi.sum()) + if only_lo.any(): + blocks.append(-A[only_lo]) + rhs.append(-lo[only_lo]) + n_nn += int(only_lo.sum()) + + if blocks: + G = sp.vstack(blocks).tocsc() + b = np.concatenate(rhs) + else: + G = sp.csc_matrix((0, A.shape[1])) + b = np.zeros(0) + + cones = [] + if n_zero: + cones.append(clarabel.ZeroConeT(n_zero)) + if n_nn: + cones.append(clarabel.NonnegativeConeT(n_nn)) + return P, q, G, b, cones + + +def load_qp(path): + """Maros-Meszaros .mat -> (P,q,G,b,cones, n,m, const_offset).""" + m = sio.loadmat(path) + P = sp.csc_matrix(m["P"]).astype(float) + q = np.asarray(m["q"], float).ravel() + A = sp.csc_matrix(m["A"]).astype(float) + lo = np.asarray(m["l"], float).ravel() + hi = np.asarray(m["u"], float).ravel() + r = float(np.asarray(m.get("r", 0.0)).ravel()[0]) if "r" in m else 0.0 + n = P.shape[0] + mcon = A.shape[0] + P, q, G, b, cones = build_cones(A, lo, hi, P, q) + return P, q, G, b, cones, n, mcon, r + + +def ensure_emps(): + if os.path.exists(EMPS_BIN): + return + r = subprocess.run(["cc", "-std=gnu89", "-O2", "-w", "-o", EMPS_BIN, EMPS_SRC], + capture_output=True, text=True) + if r.returncode != 0 or not os.path.exists(EMPS_BIN): + raise RuntimeError(f"failed to build emps: {r.stderr[:300]}") + + +def load_lp(path): + """netlib/Maros emps (maybe .gz) -> (P,q,G,b,cones, n,m, const_offset). + + P is the zero matrix (pure LP). Variable bounds are folded into A.""" + import highspy + + ensure_emps() + # Decompress emps -> plain MPS. + raw = gzip.open(path, "rb").read() if path.endswith(".gz") else open(path, "rb").read() + dec = subprocess.run([EMPS_BIN], input=raw, capture_output=True) + if dec.returncode != 0 or not dec.stdout: + raise RuntimeError("emps decompress produced no output") + with tempfile.NamedTemporaryFile("wb", suffix=".mps", delete=False) as tf: + tf.write(dec.stdout) + mps = tf.name + try: + h = highspy.Highs() + h.setOptionValue("output_flag", False) + h.readModel(mps) + lp = h.getLp() + n, mcon = lp.num_col_, lp.num_row_ + c = np.array(lp.col_cost_, float) + cl = np.array(lp.col_lower_, float) + cu = np.array(lp.col_upper_, float) + rl = np.array(lp.row_lower_, float) + ru = np.array(lp.row_upper_, float) + offset = float(getattr(lp, "offset_", 0.0)) + A = sp.csc_matrix((lp.a_matrix_.value_, lp.a_matrix_.index_, + lp.a_matrix_.start_), shape=(mcon, n)) + sense = getattr(lp, "sense_", None) + # HiGHS: kMaximize flips; pounce/clarabel minimize. Normalize to min. + if sense is not None and int(sense) == int(getattr(highspy.ObjSense, "kMaximize", 1)): + c = -c + offset = -offset + finally: + os.unlink(mps) + + # Fold variable bounds into the constraint block as an identity. + I = sp.eye(n, format="csr") + Afull = sp.vstack([A, I]).tocsr() + lofull = np.concatenate([rl, cl]) + hifull = np.concatenate([ru, cu]) + P = sp.csc_matrix((n, n)) + P, q, G, b, cones = build_cones(Afull, lofull, hifull, P, c) + return P, q, G, b, cones, n, mcon, offset + + +# ---------------------------------------------------------------------------- +def solve_clarabel(P, q, G, b, cones, offset, time_limit): + s = clarabel.DefaultSettings() + s.verbose = False + s.time_limit = float(time_limit) + t = time.perf_counter() + try: + sol = clarabel.DefaultSolver(P, q, G, b, cones, s).solve() + wall = time.perf_counter() - t + st = str(sol.status) + obj = sol.obj_val + offset if st in CLARABEL_OK else None + return {"status": st, "objective": obj, + "iterations": int(sol.iterations), + "solve_time": float(sol.solve_time), "wall": wall} + except Exception as e: + return {"status": f"Error:{type(e).__name__}", "objective": None, + "iterations": None, "solve_time": None, + "wall": time.perf_counter() - t} + + +def reldiff(a, b): + if a is None or b is None: + return None + return abs(a - b) / max(abs(a), abs(b), 1e-10) + + +# Strict objective-agreement gate for --check. Statuses that count as a +# *certified* solve for each solver (AlmostSolved / SolvedToAcceptableLevel are +# deliberately excluded: an uncertified point may legitimately differ). +POUNCE_STRICT = {"SolveSucceeded"} +CLARABEL_STRICT = {"Solved"} +CHECK_RTOL = 1e-3 +CHECK_ATOL = 1e-3 + + +def isclose(a, b, rtol=CHECK_RTOL, atol=CHECK_ATOL): + """numpy-isclose style absolute+relative tolerance.""" + if a is None or b is None: + return False + return abs(a - b) <= atol + rtol * max(abs(a), abs(b)) + + +def check_disagreements(rows): + """Return the rows where both solvers certify a solve yet objectives differ + beyond the isclose band -- the genuine wrong-answer set the gate fails on.""" + bad = [] + for r in rows: + if (r["pounce"]["status"] in POUNCE_STRICT + and r["clarabel"]["status"] in CLARABEL_STRICT + and not isclose(r["pounce"]["objective"], r["clarabel"]["objective"])): + bad.append(r) + return bad + + +# ---------------------------------------------------------------------------- +# POUNCE, run live on a freshly generated .nl (same problem Clarabel solves). +# ---------------------------------------------------------------------------- +def gen_nl_lp(src_path, out_nl): + """emps[.gz] source -> plain MPS -> .nl via the repo's mps_to_nl.py.""" + ensure_emps() + raw = gzip.open(src_path, "rb").read() if src_path.endswith(".gz") else open(src_path, "rb").read() + dec = subprocess.run([EMPS_BIN], input=raw, capture_output=True) + if dec.returncode != 0 or not dec.stdout: + raise RuntimeError("emps decompress produced no output") + with tempfile.NamedTemporaryFile("wb", suffix=".mps", delete=False) as tf: + tf.write(dec.stdout) + mps = tf.name + try: + r = subprocess.run([sys.executable, MPS_TO_NL, mps, out_nl], + capture_output=True, text=True, timeout=120) + if r.returncode != 0 or not os.path.exists(out_nl): + raise RuntimeError(f"mps_to_nl failed: {r.stderr[:200]}") + finally: + os.unlink(mps) + + +def gen_nl_qp(mat_path, out_nl): + """Maros-Meszaros .mat -> .nl via generate_nl.build_model (the repo path).""" + g = qp_gen() + name = os.path.basename(mat_path)[:-4] + P, q, r, C, lc, uc, lb, ub = g.load_qp(mat_path) + model = g.build_model(name, P, q, r, C, lc, uc, lb, ub) + model.write(out_nl, format="nl", + io_options={"symbolic_solver_labels": False}) + + +def run_pounce(nl_path, selection, time_limit): + with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tf: + out = tf.name + t = time.perf_counter() + try: + subprocess.run([POUNCE_BIN, nl_path, f"solver_selection={selection}", + "--json-output", out], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, + timeout=time_limit) + except subprocess.TimeoutExpired: + return {"status": "TimeOut", "objective": None, + "iterations": None, "solve_time": time_limit} + wall = time.perf_counter() - t + try: + d = json.load(open(out)) + sol, stat = d.get("solution", {}), d.get("statistics", {}) + return {"status": sol.get("status"), + "objective": stat.get("final_objective", sol.get("objective")), + "iterations": stat.get("iteration_count"), + "solve_time": stat.get("total_wallclock_time_secs", wall)} + except Exception as e: + return {"status": f"ParseError:{type(e).__name__}", "objective": None, + "iterations": None, "solve_time": wall} + finally: + os.path.exists(out) and os.unlink(out) + + +# ---------------------------------------------------------------------------- +def run_class(kind, limit, time_limit): + """kind in {'lp','qp'}. Runs BOTH solvers live on each source problem and + returns joined per-problem records.""" + if kind == "qp": + srcs = sorted(glob.glob(os.path.join(BENCH, "qp", "data", "*.mat")), + key=os.path.getsize) + name_of = lambda p: os.path.basename(p)[:-4] + loader, gen_nl, selection = load_qp, gen_nl_qp, "qp-ipm" + else: + srcs = (sorted(glob.glob(os.path.join(BENCH, "lp", "data", "netlib", "*"))) + + sorted(glob.glob(os.path.join(BENCH, "lp", "data", "meszaros", "*")))) + name_of = lambda p: os.path.basename(p).split(".")[0] + loader, gen_nl, selection = load_lp, gen_nl_lp, "lp-ipm" + if limit: + srcs = srcs[:limit] + + rows = [] + print(f"\n=== {kind.upper()} ({len(srcs)} problems, pounce={selection}) ===") + print(f"{'problem':<16}{'p.status':>14}{'c.status':>14}" + f"{'reldiff':>11}{'p.it':>6}{'c.it':>6}{'p.s':>9}{'c.s':>9}") + for p in srcs: + name = name_of(p) + # POUNCE (live): generate .nl, solve. + try: + with tempfile.NamedTemporaryFile(suffix=".nl", delete=False) as tf: + nl = tf.name + gen_nl(p, nl) + pr = run_pounce(nl, selection, time_limit) + os.path.exists(nl) and os.unlink(nl) + except Exception as e: + pr = {"status": f"GenError:{type(e).__name__}", "objective": None, + "iterations": None, "solve_time": None} + # Clarabel: load matrices, solve. + try: + P, q, G, b, cones, n, m, off = loader(p) + cl = solve_clarabel(P, q, G, b, cones, off, time_limit) + except Exception as e: + cl = {"status": f"LoadError:{type(e).__name__}", "objective": None, + "iterations": None, "solve_time": None, "wall": None} + n = m = None + rd = reldiff(pr.get("objective"), cl["objective"]) + rows.append({"name": name, "n": n, "m": m, + "pounce": pr, "clarabel": cl, "reldiff": rd}) + fr = f"{rd:.1e}" if rd is not None else "n/a" + ps, cs = pr.get("solve_time"), cl.get("solve_time") + print(f"{name:<16}{str(pr.get('status'))[:13]:>14}{cl['status'][:13]:>14}" + f"{fr:>11}{str(pr.get('iterations')):>6}{str(cl['iterations']):>6}" + f"{(ps if ps is not None else float('nan')):>9.3f}" + f"{(cs if cs is not None else float('nan')):>9.3f}") + return rows + + +def geomean(xs): + xs = [x for x in xs if x is not None and x > 0] + return math.exp(sum(map(math.log, xs)) / len(xs)) if xs else None + + +def summarize(kind, rows): + both = [r for r in rows + if r["pounce"]["status"] in POUNCE_OK and r["clarabel"]["status"] in CLARABEL_OK] + agree = [r for r in both if r["reldiff"] is not None and r["reldiff"] < 1e-4] + p_only = [r for r in rows + if r["pounce"]["status"] in POUNCE_OK and r["clarabel"]["status"] not in CLARABEL_OK] + c_only = [r for r in rows + if r["pounce"]["status"] not in POUNCE_OK and r["clarabel"]["status"] in CLARABEL_OK] + speed = [r["pounce"]["solve_time"] / r["clarabel"]["solve_time"] + for r in both + if r["pounce"]["solve_time"] and r["clarabel"]["solve_time"]] + gm = geomean(speed) + out = [ + f"### {kind.upper()} — {len(rows)} problems", + "", + f"- Solved by **both**: {len(both)}", + f"- Objective agreement (reldiff < 1e-4): **{len(agree)}/{len(both)}**", + f"- POUNCE solved, Clarabel did not: {len(p_only)}", + f"- Clarabel solved, POUNCE did not: {len(c_only)}", + ] + if gm: + faster = "Clarabel faster" if gm > 1 else "POUNCE faster" + out.append(f"- Geomean solve-time ratio pounce/clarabel: **{gm:.2f}×** " + f"({faster} on average, over {len(speed)} both-solved)") + if p_only: + out.append(f"- Clarabel non-solves: " + + ", ".join(f"{r['name']}({r['clarabel']['status']})" for r in p_only[:12]) + + (" …" if len(p_only) > 12 else "")) + if c_only: + out.append(f"- POUNCE non-solves: " + + ", ".join(f"{r['name']}({r['pounce']['status']})" for r in c_only[:12]) + + (" …" if len(c_only) > 12 else "")) + out.append("") + return "\n".join(out) + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--class", dest="cls", choices=["lp", "qp", "both"], default="both") + ap.add_argument("--limit", type=int, default=0, help="cap problems per class (debug)") + ap.add_argument("--time-limit", type=float, default=120.0) + ap.add_argument("--from-json", action="store_true", + help="load existing clarabel_compare_{kind}.json instead of " + "running both solvers live") + ap.add_argument("--check", action="store_true", + help="exit nonzero on any genuine objective disagreement " + "(strict-solved gate, isclose rtol=atol=1e-3)") + args = ap.parse_args() + + kinds = ["lp", "qp"] if args.cls == "both" else [args.cls] + md = ["# POUNCE vs Clarabel — convex LP/QP benchmark comparison", "", + f"Both solvers run live on this machine, per-solver time limit " + f"{args.time_limit:g}s. POUNCE: convex LP/QP IPM (`solver_selection=" + "{lp,qp}-ipm`) on a freshly generated `.nl`. Clarabel " + f"{clarabel.__version__} (Python) on matrices from the same source " + "(its backend may use multiple threads, so wall-time comparisons " + "favor it on larger problems). Both minimize; objectives joined by " + "problem name.", + ""] + all_bad = [] + for kind in kinds: + json_path = os.path.join(BENCH, f"clarabel_compare_{kind}.json") + if args.from_json: + with open(json_path) as fh: + rows = json.load(fh) + print(f"\n=== {kind.upper()} (loaded {len(rows)} records from " + f"{os.path.relpath(json_path, ROOT)}) ===") + else: + rows = run_class(kind, args.limit, args.time_limit) + with open(json_path, "w") as fh: + json.dump(rows, fh, indent=2) + md.append(summarize(kind, rows)) + print("\n" + summarize(kind, rows)) + + if args.check: + bad = check_disagreements(rows) + if bad: + print(f"--check {kind.upper()}: {len(bad)} genuine " + f"disagreement(s) (both certified-solved, " + f"|Δobj| > {CHECK_ATOL}+{CHECK_RTOL}·max):") + for r in bad: + print(f" {r['name']:<16} pounce={r['pounce']['objective']!r} " + f"clarabel={r['clarabel']['objective']!r} " + f"reldiff={r['reldiff']}") + else: + print(f"--check {kind.upper()}: PASS " + f"(no certified-solve objective disagreements)") + all_bad.extend((kind, r) for r in bad) + + if not args.from_json: + with open(os.path.join(BENCH, "clarabel_compare.md"), "w") as fh: + fh.write("\n".join(md)) + print(f"\nwrote {os.path.join(BENCH, 'clarabel_compare.md')}") + + if args.check and all_bad: + print(f"\nFAIL: {len(all_bad)} genuine objective disagreement(s) across " + f"{', '.join(sorted(set(k.upper() for k, _ in all_bad)))}.") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/scripts/compare_solvers.py b/benchmarks/scripts/compare_solvers.py new file mode 100644 index 00000000..7f8557e5 --- /dev/null +++ b/benchmarks/scripts/compare_solvers.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +"""Compare the NLP filter-IPM solver against the convex LP/QP IPM on a +suite of .nl files. + +For each problem we solve it twice through the same pounce binary: + - solver_selection=nlp (the Ipopt-derived filter line-search IPM) + - solver_selection= (the convex/conic HSDE IPM, pounce-convex) + +and compare final objective, iteration count, wall-clock, and status, +using each solver's --json-output report (uniform schema across paths). + +Usage: + compare_solvers.py + convex_sel in {lp-ipm, qp-ipm} +""" +import json +import subprocess +import sys +import tempfile +import time +from pathlib import Path + + +def solve(bin_path, nl, selection, time_limit=120): + """Run one solve; return (record_dict, wall_seconds).""" + with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tf: + out = tf.name + start = time.time() + try: + subprocess.run( + [bin_path, nl, f"solver_selection={selection}", + "--json-output", out], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, + timeout=time_limit, + ) + except subprocess.TimeoutExpired: + return {"status": "TimeOut", "objective": None, + "iteration_count": None, "wall": time_limit}, time_limit + wall = time.time() - start + try: + with open(out) as fh: + data = json.load(fh) + sol = data.get("solution", {}) + stat = data.get("statistics", {}) + return { + "status": sol.get("status"), + "objective": stat.get("final_objective", sol.get("objective")), + "iteration_count": stat.get("iteration_count"), + "wall": stat.get("total_wallclock_time_secs", wall), + }, wall + except Exception as e: + return {"status": f"ParseError:{e}", "objective": None, + "iteration_count": None, "wall": wall}, wall + finally: + Path(out).unlink(missing_ok=True) + + +def main(): + bin_path, nl_dir, convex_sel, out_json = sys.argv[1:5] + nls = sorted(Path(nl_dir).glob("*.nl")) + rows = [] + print(f"{'problem':<14}{'nlp_obj':>16}{'cvx_obj':>16}" + f"{'nlp_it':>8}{'cvx_it':>8}{'nlp_s':>9}{'cvx_s':>9}{' reldiff':>12}") + for nl in nls: + name = nl.stem + nlp, _ = solve(bin_path, str(nl), "nlp") + cvx, _ = solve(bin_path, str(nl), convex_sel) + a, b = nlp["objective"], cvx["objective"] + if a is not None and b is not None: + denom = max(abs(a), abs(b), 1e-10) + reldiff = abs(a - b) / denom + else: + reldiff = None + rows.append({"name": name, "nlp": nlp, "convex": cvx, + "reldiff": reldiff}) + fa = f"{a:.6e}" if a is not None else "n/a" + fb = f"{b:.6e}" if b is not None else "n/a" + fr = f"{reldiff:.2e}" if reldiff is not None else "n/a" + print(f"{name:<14}{fa:>16}{fb:>16}" + f"{str(nlp['iteration_count']):>8}{str(cvx['iteration_count']):>8}" + f"{nlp['wall']:>9.3f}{cvx['wall']:>9.3f}{fr:>12}") + with open(out_json, "w") as fh: + json.dump(rows, fh, indent=2) + print(f"\nwrote {out_json}") + + +if __name__ == "__main__": + main() diff --git a/crates/pounce-algorithm/src/debug.rs b/crates/pounce-algorithm/src/debug.rs index f2fa1887..9c98bf63 100644 --- a/crates/pounce-algorithm/src/debug.rs +++ b/crates/pounce-algorithm/src/debug.rs @@ -30,169 +30,14 @@ use pounce_common::types::Number; use pounce_linalg::{Matrix, Vector}; use pounce_nlp::ipopt_nlp::SplitNames; -/// Where in the main loop a checkpoint fired. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum Checkpoint { - /// Top of an outer iteration — after the intermediate callback, - /// before this iteration's Newton step is computed. The iterate, - /// multipliers, and μ all reflect the *accepted* point from the - /// previous iteration. - IterStart, - /// After the barrier parameter μ was updated for this iteration - /// (before the search direction is computed). - AfterBarrierUpdate, - /// After the primal-dual Newton step was computed — the search - /// direction `δ` (`data.delta`), the applied regularization, and the - /// KKT factorization are available. - AfterSearchDirection, - /// After the line search chose a step length and the trial point was - /// accepted — α (`info_alpha_*`) and the new iterate are in place. - AfterStep, - /// The line search *rejected* this iteration's step — it hit the tiny-step - /// floor or exhausted its backtracks without an acceptable point, and the - /// solver is about to fall into restoration. The search direction `δ` and - /// the un-accepted current iterate are intact for inspection. The "why did - /// the line search give up here?" stop, distinct from the restoration entry - /// that follows. - StepRejected, - /// Just before the algorithm switches into the restoration phase — - /// the iterate that tripped restoration is intact. The most-requested - /// "why did this go to restoration?" stop. - PreRestoration, - /// Just after the restoration phase returns, so its effect on the - /// iterate can be inspected. - PostRestoration, - /// The solve has finished (or is about to): fired once before - /// `optimize` returns, at the final iterate, carrying the outcome - /// via [`DebugCtx::status`]. Lets a debugger drop in for a - /// post-mortem at the failing (or final) point. The [`DebugAction`] - /// returned at this checkpoint is **ignored** — the solve is already - /// over, so there is nothing left to resume or stop. - Terminated, -} - -impl Checkpoint { - /// The stable wire/CLI protocol name for this checkpoint. These strings - /// are intentionally **not** the variant identifiers (`AfterBarrierUpdate` - /// → `"after_mu"`, `PreRestoration` → `"pre_restoration_entry"`) — they're - /// the names the JSON protocol and `stop-at` use, so match on the variant, - /// not the string. Locked by the `checkpoint_as_str_is_stable` test. - pub fn as_str(self) -> &'static str { - match self { - Checkpoint::IterStart => "iter_start", - Checkpoint::AfterBarrierUpdate => "after_mu", - Checkpoint::AfterSearchDirection => "after_search_dir", - Checkpoint::AfterStep => "after_step", - Checkpoint::StepRejected => "step_rejected", - Checkpoint::PreRestoration => "pre_restoration_entry", - Checkpoint::PostRestoration => "post_restoration_exit", - Checkpoint::Terminated => "terminated", - } - } - - /// Sub-iteration checkpoints (everything between `IterStart` and the - /// next `IterStart`). - pub fn is_sub_iteration(self) -> bool { - matches!( - self, - Checkpoint::AfterBarrierUpdate - | Checkpoint::AfterSearchDirection - | Checkpoint::AfterStep - | Checkpoint::StepRejected - | Checkpoint::PreRestoration - | Checkpoint::PostRestoration - ) - } -} - -/// What the algorithm should do after a [`DebugHook`] returns. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum DebugAction { - /// Keep solving. - Resume, - /// Stop the solve now. Surfaces to the caller as - /// `SolverReturn::UserRequestedStop`. - Stop, -} +pub use pounce_common::debug::{ + Checkpoint, DebugAction, DebugHook, DebugState, IterSnapshot, KktReport, KktTriplets, LFactor, + ResidKind, Residual, +}; /// The eight primal/dual blocks of an iterate, addressable by name. pub const BLOCK_NAMES: [&str; 8] = ["x", "s", "y_c", "y_d", "z_l", "z_u", "v_l", "v_u"]; -/// KKT-factorization report (see [`DebugCtx::kkt`]). The inertia of a -/// well-posed primal-dual system is `(n_pos = n, n_neg = m, n_zero = 0)`; -/// a mismatch (or nonzero regularization) is the classic signal that the -/// step is being stabilized. -#[derive(Clone, Debug)] -pub struct KktReport { - /// The outer iteration this factorization was assembled at — may be the - /// previous iteration when paused at `iter_start` (look-back). - pub iter: i32, - /// Augmented-system dimension (n + m). - pub dim: i32, - /// Negative eigenvalues reported (-1 if the backend has no inertia). - pub n_neg: i32, - /// Positive eigenvalues = `dim − n_neg` (-1 if unknown). - pub n_pos: i32, - /// Expected negatives = number of equality + inequality multipliers. - pub expected_neg: i32, - /// Whether the backend reports inertia. - pub provides_inertia: bool, - /// `true` when reported inertia matches the expected `(n, m, 0)`. - pub inertia_correct: bool, - /// Primal regularization δ_w applied to the (1,1) block. - pub delta_w: Number, - /// Dual regularization δ_c applied to the (3,3)/(4,4) blocks. - pub delta_c: Number, - /// Factorization status (debug string). - pub status: String, -} - -/// Which residual space a [`Residual`] entry comes from. -/// -/// Primal entries are the per-constraint violations whose max-norm is -/// `inf_pr`; dual entries are the per-variable Lagrangian-gradient -/// components whose max-norm is `inf_du`. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum ResidKind { - /// Equality constraint residual `c_i(x)`. - Eq, - /// Inequality residual `d_i(x) − s_i` (the IPM slack reformulation). - Ineq, - /// `x`-space stationarity component `(∇_x L)_i`. - DualX, - /// `s`-space stationarity component `(∇_s L)_i`. - DualS, -} - -impl ResidKind { - /// Short label used in the debugger's `print residuals` output and - /// the JSON `space` field. Stable — readers may match on it. - pub fn tag(self) -> &'static str { - match self { - ResidKind::Eq => "c", - ResidKind::Ineq => "d-s", - ResidKind::DualX => "grad_x_L", - ResidKind::DualS => "grad_s_L", - } - } - - /// `true` for the primal (constraint) spaces, `false` for the dual - /// (stationarity) spaces. - pub fn is_primal(self) -> bool { - matches!(self, ResidKind::Eq | ResidKind::Ineq) - } -} - -/// One signed residual component at the current iterate: its space, its -/// index within that space, and its value. See -/// [`DebugCtx::constraint_residuals`] / [`DebugCtx::dual_residuals`]. -#[derive(Clone, Copy, Debug)] -pub struct Residual { - pub kind: ResidKind, - pub index: usize, - pub value: Number, -} - /// Live, mutable view of solver state handed to a [`DebugHook`]. /// /// Cheap to construct (two `Rc` clones); every accessor borrows the @@ -276,6 +121,21 @@ impl IterateSnapshot { } } +impl IterSnapshot for IterateSnapshot { + fn iter(&self) -> i32 { + IterateSnapshot::iter(self) + } + fn mu(&self) -> Number { + IterateSnapshot::mu(self) + } + fn block(&self, name: &str) -> Option> { + IterateSnapshot::block(self, name) + } + fn as_any(&self) -> &dyn std::any::Any { + self + } +} + impl DebugCtx { pub fn new(data: IpoptDataHandle, cq: IpoptCqHandle, cp: Checkpoint) -> Self { Self { @@ -786,22 +646,111 @@ fn block_ref_mut<'a>( }) } -/// A consumer that the main loop pauses at each checkpoint. The CLI's -/// REPL / agent driver is the production implementation. -pub trait DebugHook { - /// Called at every [`Checkpoint`]. Inspect and/or mutate via `ctx`, - /// then return whether to keep solving. - fn at_checkpoint(&mut self, ctx: &mut DebugCtx) -> DebugAction; - - /// Whether the main loop should capture the (heavier) KKT matrix - /// triplets and `LDLᵀ` factor into `kkt_debug` this iteration, so - /// `viz kkt` / `viz L` can look back at the previous iteration's - /// system. True while the debugger is stepping interactively; an - /// implementation that has detached (running free) returns false so - /// the O(nnz) assembly isn't paid every iteration. Defaults to true - /// — the cheap inertia/status fields are captured regardless. - fn wants_kkt_capture(&self) -> bool { - true +/// Expose the NLP solver's [`DebugCtx`] through the shared +/// [`DebugState`] surface, forwarding to its inherent accessors. The NLP +/// solver supports the full surface, so every method is overridden. +impl DebugState for DebugCtx { + fn as_any(&self) -> Option<&dyn std::any::Any> { + Some(self) + } + fn as_any_mut(&mut self) -> Option<&mut dyn std::any::Any> { + Some(self) + } + fn checkpoint(&self) -> Checkpoint { + DebugCtx::checkpoint(self) + } + fn iter(&self) -> i32 { + DebugCtx::iter(self) + } + fn mu(&self) -> Number { + DebugCtx::mu(self) + } + fn objective(&self) -> Number { + DebugCtx::objective(self) + } + fn inf_pr(&self) -> Number { + DebugCtx::inf_pr(self) + } + fn inf_du(&self) -> Number { + DebugCtx::inf_du(self) + } + fn complementarity(&self) -> Number { + DebugCtx::complementarity(self) + } + fn alpha(&self) -> (Number, Number) { + DebugCtx::alpha(self) + } + fn block_dims(&self) -> Vec<(&'static str, usize)> { + DebugCtx::block_dims(self) + } + fn block(&self, name: &str) -> Option> { + DebugCtx::block(self, name) + } + fn delta_block(&self, name: &str) -> Option> { + DebugCtx::delta_block(self, name) + } + fn status(&self) -> Option<&str> { + DebugCtx::status(self) + } + fn nlp_error(&self) -> Number { + DebugCtx::nlp_error(self) + } + fn bound_slack(&self, which: &str) -> Option> { + DebugCtx::bound_slack(self, which) + } + fn regularization(&self) -> Number { + DebugCtx::regularization(self) + } + fn ls_count(&self) -> i32 { + DebugCtx::ls_count(self) + } + fn kkt(&self) -> Option { + DebugCtx::kkt(self) + } + fn kkt_matrix(&self) -> Option { + DebugCtx::kkt_matrix(self) + } + fn kkt_l_factor(&self) -> Option { + DebugCtx::kkt_l_factor(self) + } + fn kkt_captured_iter(&self) -> Option { + DebugCtx::kkt_captured_iter(self) + } + fn request_l_factor(&mut self) -> bool { + // Arming for future solves is handled by `DebugHook::wants_kkt_capture` + // (the NLP solver captures the factor while the debugger steps); here we + // just report whether it is already available now. + DebugCtx::kkt_l_factor(self).is_some() + } + fn request_kkt_matrix(&mut self) -> bool { + DebugCtx::kkt_matrix(self).is_some() + } + fn set_mu(&mut self, mu: Number) -> Result<(), String> { + DebugCtx::set_mu(self, mu) + } + fn set_block(&mut self, name: &str, vals: &[Number]) -> Result<(), String> { + DebugCtx::set_block(self, name, vals) + } + fn set_component(&mut self, name: &str, idx: usize, val: Number) -> Result<(), String> { + DebugCtx::set_component(self, name, idx, val) + } + fn snapshot(&self) -> Option> { + DebugCtx::snapshot(self).map(|s| Box::new(s) as Box) + } + fn restore(&mut self, snap: &dyn IterSnapshot) -> bool { + match snap.as_any().downcast_ref::() { + Some(s) => { + DebugCtx::restore(self, s); + true + } + None => false, + } + } + fn constraint_residuals(&self) -> Option> { + DebugCtx::constraint_residuals(self) + } + fn dual_residuals(&self) -> Option> { + DebugCtx::dual_residuals(self) } } diff --git a/crates/pounce-cli/Cargo.toml b/crates/pounce-cli/Cargo.toml index 2abe4eb8..00b3747c 100644 --- a/crates/pounce-cli/Cargo.toml +++ b/crates/pounce-cli/Cargo.toml @@ -26,6 +26,13 @@ path = "src/main.rs" name = "pounce_sens" path = "src/bin/pounce_sens.rs" +# `pounce_cblib` solves a CBLIB Conic Benchmark Format (.cbf) instance +# through the convex conic driver and emits a pounce.solve-report/v1 JSON +# report. Used by the benchmarks/cblib harness (conic tier). +[[bin]] +name = "pounce_cblib" +path = "src/bin/pounce_cblib.rs" + [dependencies] pounce-common.workspace = true pounce-nlp = { workspace = true, features = ["serde"] } @@ -41,6 +48,9 @@ pounce-sensitivity.workspace = true pounce-solve-report.workspace = true pounce-studio-core.workspace = true pounce-observability.workspace = true +# Specialized convex LP/QP interior-point solver, dispatched to for +# classified LP / convex-QP `.nl` inputs. +pounce-convex.workspace = true serde = { version = "1", features = ["derive"] } serde_json = "1" tracing.workspace = true diff --git a/crates/pounce-cli/src/bin/pounce_cblib.rs b/crates/pounce-cli/src/bin/pounce_cblib.rs new file mode 100644 index 00000000..be81195e --- /dev/null +++ b/crates/pounce-cli/src/bin/pounce_cblib.rs @@ -0,0 +1,215 @@ +//! `pounce_cblib` — solve a CBLIB Conic Benchmark Format (`.cbf`) instance +//! through POUNCE's convex conic driver and emit a `pounce.solve-report/v1` +//! JSON report (status / iterations / time / objective, and the +//! per-iteration trace at `--json-detail full`). +//! +//! ```text +//! pounce_cblib [--json-output PATH] [--json-detail summary|full] +//! [--max-iter N] +//! ``` +//! +//! Used by the `benchmarks/cblib` harness to record per-instance POUNCE +//! results alongside the `.nl`-driven suites. The exit code follows the AMPL +//! convention via [`status_to_solve_result_num`] (0 = solved). + +use pounce_cli::cbf; +use pounce_cli::solve_report::{ + status_to_solve_result_num, write_report_file, InputDescriptor, ReportBuilder, ReportDetail, +}; +use pounce_convex::{solve_socp_ipm, QpOptions, QpStatus}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; +use pounce_nlp::return_codes::ApplicationReturnStatus; +use pounce_nlp::solve_statistics::IterRecord; +use std::path::PathBuf; +use std::process::ExitCode; + +fn qp_status_to_ars(s: QpStatus) -> ApplicationReturnStatus { + match s { + QpStatus::Optimal => ApplicationReturnStatus::SolveSucceeded, + QpStatus::PrimalInfeasible => ApplicationReturnStatus::InfeasibleProblemDetected, + QpStatus::DualInfeasible => ApplicationReturnStatus::DivergingIterates, // unbounded + QpStatus::IterationLimit => ApplicationReturnStatus::MaximumIterationsExceeded, + QpStatus::NumericalFailure => ApplicationReturnStatus::InternalError, + } +} + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +struct Args { + file: PathBuf, + json_output: Option, + detail: ReportDetail, + max_iter: usize, + debug: Option, + debug_script: Option, +} + +fn parse_args() -> Result { + let mut file = None; + let mut json_output = None; + let mut detail = ReportDetail::Summary; + let mut max_iter = 500; + let mut debug = None; + let mut debug_script = None; + let mut it = std::env::args().skip(1); + while let Some(a) = it.next() { + match a.as_str() { + "--debug" => debug = Some(pounce_cli::cli::DebugMode::Repl), + "--debug-json" => debug = Some(pounce_cli::cli::DebugMode::Json), + "--debug-script" => { + debug_script = Some(PathBuf::from( + it.next().ok_or("--debug-script needs a PATH")?, + )); + } + "--json-output" => { + json_output = Some(PathBuf::from( + it.next().ok_or("--json-output needs a PATH")?, + )); + } + "--json-detail" => { + let d = it.next().ok_or("--json-detail needs a value")?; + detail = ReportDetail::parse(&d)?; + } + "--max-iter" => { + max_iter = it + .next() + .ok_or("--max-iter needs N")? + .parse() + .map_err(|_| "--max-iter expects an integer")?; + } + other if other.starts_with("--") => return Err(format!("unknown flag '{other}'")), + other => { + if file.is_some() { + return Err(format!("unexpected extra argument '{other}'")); + } + file = Some(PathBuf::from(other)); + } + } + } + Ok(Args { + file: file.ok_or("usage: pounce_cblib [--json-output PATH] …")?, + json_output, + detail, + max_iter, + debug, + debug_script, + }) +} + +fn main() -> ExitCode { + let args = match parse_args() { + Ok(a) => a, + Err(e) => { + eprintln!("pounce_cblib: {e}"); + return ExitCode::from(2); + } + }; + + let text = match std::fs::read_to_string(&args.file) { + Ok(t) => t, + Err(e) => { + eprintln!("pounce_cblib: cannot read {}: {e}", args.file.display()); + return ExitCode::from(2); + } + }; + let model = match cbf::parse(&text) { + Ok(m) => m, + Err(e) => { + eprintln!("pounce_cblib: parse {}: {e}", args.file.display()); + return ExitCode::from(2); + } + }; + let cp = match model.to_conic() { + Ok(c) => c, + Err(e) => { + eprintln!("pounce_cblib: map {}: {e}", args.file.display()); + return ExitCode::from(2); + } + }; + + let full = matches!(args.detail, ReportDetail::Full); + let opts = QpOptions { + max_iter: args.max_iter, + collect_iterates: full, + ..QpOptions::default() + }; + let t0 = std::time::Instant::now(); + let sol = if let Some(mode) = args.debug { + // Interactive debug of the conic solve (exp/power → non-symmetric + // HSDE; orthant/SOC/PSD → direct symmetric IPM). A `--debug-script` + // drives it non-interactively. + use pounce_cli::debug_repl::SolverDebugger; + let mut dbg = SolverDebugger::new(mode, None); + if let Some(p) = &args.debug_script { + dbg = dbg.with_script(p.to_string_lossy().into_owned()); + } + pounce_convex::solve_socp_ipm_debug(&cp.prob, &cp.cones, &opts, &mut dbg, backend) + } else { + solve_socp_ipm(&cp.prob, &cp.cones, &opts, backend) + }; + let elapsed = t0.elapsed().as_secs_f64(); + let obj = cp.cbf_objective(sol.obj, model.minimize); + let status = qp_status_to_ars(sol.status); + + println!( + "POUNCE (conic HSDE, pounce-convex): {:?} obj={obj:.8} iters={} ({elapsed:.3}s) [{}]", + sol.status, + sol.iters, + args.file.display(), + ); + + if let Some(path) = &args.json_output { + let size_bytes = std::fs::metadata(&args.file).ok().map(|m| m.len()); + let mut b = ReportBuilder::new( + args.detail, + InputDescriptor::CbfFile { + path: args.file.clone(), + size_bytes, + }, + ); + b.problem.n_variables = cp.prob.n as _; + b.problem.n_constraints = (cp.prob.m_eq() + cp.prob.m_ineq()) as _; + b.problem.n_objectives = 1; + b.problem.minimize = model.minimize; + b.solution.status = status; + b.solution.solve_result_num = status_to_solve_result_num(status); + b.solution.objective = obj; + b.solution.x = sol.x.clone(); + b.stats.iteration_count = sol.iters as _; + b.stats.final_objective = obj; + b.stats.total_wallclock_time_secs = elapsed; + if full { + b.iterations = sol + .iterates + .iter() + .map(|it| IterRecord { + iter: it.iter as _, + objective: it.objective, + inf_pr: it.primal_infeasibility, + inf_du: it.dual_infeasibility, + mu: it.mu, + d_norm: 0.0, + regularization: 0.0, + alpha_dual: it.alpha_dual, + alpha_primal: it.alpha_primal, + alpha_primal_char: ' ', + ls_trials: 0, + }) + .collect(); + } + let report = b.finish(); + if let Err(e) = write_report_file(path, &report) { + eprintln!("pounce_cblib: write {}: {e}", path.display()); + return ExitCode::from(2); + } + } + + if matches!(sol.status, QpStatus::Optimal) { + ExitCode::SUCCESS + } else { + ExitCode::from(1) + } +} diff --git a/crates/pounce-cli/src/cbf.rs b/crates/pounce-cli/src/cbf.rs new file mode 100644 index 00000000..73a5a27b --- /dev/null +++ b/crates/pounce-cli/src/cbf.rs @@ -0,0 +1,867 @@ +//! Reader for the **Conic Benchmark Format** (CBF / `.cbf`), the format the +//! CBLIB conic benchmark library () ships its instances +//! in, plus a mapping to a pounce conic program. +//! +//! # Format (the subset CBLIB's exponential-cone GPs use) +//! +//! A CBF file is a sequence of keyword blocks, blank-line separated, with `#` +//! comments. The blocks this reader understands: +//! +//! - `VER` — format version (read and ignored). +//! - `OBJSENSE` — `MIN` or `MAX`. +//! - `POWCONES` — power-cone parameter table: each entry's weight vector +//! `(α₀, α₁)` gives the exponent `α = α₀/(α₀+α₁)`, referenced as `@k:POW`. +//! - `VAR n k` — `n` scalar variables partitioned into `k` cones, one cone +//! per following line as `CONE dim` (`F`/`L+`/`L-`/`L=`/`EXP`/`Q`/`@k:POW`). +//! - `CON m k` — `m` scalar constraint rows `Ax + b`, each lying in one of `k` +//! cones (same syntax). `L=` ⇒ `Ax+b = 0`, `L-` ⇒ `≤ 0`, `L+` ⇒ `≥ 0`. +//! - `OBJACOORD` / `OBJBCOORD` — sparse objective `c` and constant `c₀`. +//! - `ACOORD` / `BCOORD` — sparse `A` (`row col val`) and `b` (`row val`). +//! - `PSDCON` + `HCOORD` / `DCOORD` — affine PSD constraints +//! `D_c + Σ_k x_k H_{c,k} ⪰ 0`, mapped to a `Psd` cone on the slack. +//! +//! The problem is `min/max cᵀx + c₀ s.t. x ∈ K_var, Ax + b ∈ K_con`, +//! plus any affine PSD constraints. +//! +//! # Exponential-cone convention +//! +//! CBF's primal exponential cone is `{(u₀,u₁,u₂) : u₀ ≥ u₁·exp(u₂/u₁), u₁>0}` +//! (the **first** coordinate is the bound), whereas pounce's is +//! `{(x,y,z) : z ≥ y·exp(x/y), y>0}` (the **third** is the bound). The triple +//! therefore **reverses**: pounce `(x,y,z) = (u₂, u₁, u₀)`. See +//! `dev-notes/hsde.md` (the CBLIB benchmark-tier plan). + +use pounce_convex::{ConeSpec, QpProblem, Triplet}; +use std::fmt; + +/// A parsed CBF cone declaration: a kind and the number of scalar rows it +/// spans. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct ConeDecl { + pub kind: ConeKind, + pub dim: usize, + /// The power-cone exponent `α ∈ (0, 1)` for [`ConeKind::Pow`]; `None` + /// for every other kind. + pub alpha: Option, +} + +/// The CBF cone kinds this reader supports (`F`/`L=`/`L+`/`L-`/`EXP`/`Q`, +/// plus the 3-D power cone `@k:POW` resolved against `POWCONES`). Unsupported +/// kinds (PSD `DCOORD`, the rotated SOC `QR`, dual power cones) are rejected +/// at parse time with a clear error rather than silently mis-handled. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ConeKind { + /// `F` — free (ℝ): no constraint. + Free, + /// `L=` — the zero cone: the rows are equalities. + Zero, + /// `L+` — nonnegative orthant. + Nonneg, + /// `L-` — nonpositive orthant. + Nonpos, + /// `EXP` — the 3-D exponential cone (CBF order; reversed for pounce). + Exp, + /// `Q` — the second-order cone. + SecondOrder, + /// `@k:POW` — the 3-D power cone, with the exponent `α` resolved from the + /// referenced `POWCONES` parameter set (stored on [`ConeDecl::alpha`]). + Pow, +} + +impl ConeKind { + /// Parse a plain (non-parametric) cone token. Parametric cones + /// (`@k:POW`) are handled by [`parse_cone_token`]. + fn parse(tok: &str) -> Option { + Some(match tok { + "F" => ConeKind::Free, + "L=" => ConeKind::Zero, + "L+" => ConeKind::Nonneg, + "L-" => ConeKind::Nonpos, + "EXP" => ConeKind::Exp, + "Q" => ConeKind::SecondOrder, + _ => return None, + }) + } +} + +/// A parsed CBF instance: the objective, the variable / constraint cone +/// partitions, and the sparse `A`/`b` (and objective `c`/`c₀`). +#[derive(Debug, Clone)] +pub struct CbfModel { + /// `true` for `OBJSENSE MIN`, `false` for `MAX`. + pub minimize: bool, + pub num_var: usize, + pub var_cones: Vec, + pub num_con: usize, + pub con_cones: Vec, + /// Objective linear term `c`, dense (length `num_var`). + pub c: Vec, + /// Objective constant `c₀`. + pub c0: f64, + /// Constraint matrix `A` as `(row, col, val)` triplets. + pub a: Vec<(usize, usize, f64)>, + /// Constraint constant `b`, dense (length `num_con`). + pub b: Vec, + /// Matrix sizes of the affine PSD constraints (`PSDCON`): constraint `c` + /// asserts `D_c + Σ_k x_k H_{c,k} ⪰ 0` over a `psdcon_dims[c]`-square + /// matrix. + pub psdcon_dims: Vec, + /// `HCOORD` entries `(con, var, i, j, val)`: `H_{con,var}[i][j] = val` + /// (lower triangle, `i ≥ j`) — the coefficient of scalar variable `var` + /// on entry `(i,j)` of PSD constraint `con`. + pub hcoord: Vec<(usize, usize, usize, usize, f64)>, + /// `DCOORD` entries `(con, i, j, val)`: `D_con[i][j] = val` (lower + /// triangle) — the constant term of PSD constraint `con`. + pub dcoord: Vec<(usize, usize, usize, f64)>, +} + +/// A CBF instance mapped to a pounce conic program +/// `min ½xᵀPx + cᵀx s.t. Ax = b, Gx ⪯_K h` (here `P = 0`). The `cones` +/// partition the rows of `G` in order; `obj_constant` (`c₀`, sign-adjusted) +/// is added to `solution.obj` to recover the CBF objective value. +#[derive(Debug, Clone)] +pub struct ConicProgram { + pub prob: QpProblem, + pub cones: Vec, + pub obj_constant: f64, +} + +impl ConicProgram { + /// Recover the CBF objective value from a pounce solution objective + /// `½xᵀPx + cᵀx`. For a `MAX` instance the linear term was negated when + /// building, so the value is `−pounce_obj + c₀`. + pub fn cbf_objective(&self, pounce_obj: f64, minimize: bool) -> f64 { + if minimize { + pounce_obj + self.obj_constant + } else { + -pounce_obj + self.obj_constant + } + } +} + +/// A CBF parse / mapping failure, with enough context to locate the problem. +#[derive(Debug, Clone, PartialEq)] +pub enum CbfError { + /// A required section or token was missing / malformed. + Malformed(String), + /// A cone kind appeared that this reader does not yet support. + UnsupportedCone(String), + /// An exponential cone was declared with a dimension other than 3. + BadExpDim(usize), +} + +impl fmt::Display for CbfError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CbfError::Malformed(s) => write!(f, "malformed CBF: {s}"), + CbfError::UnsupportedCone(s) => write!(f, "unsupported CBF cone '{s}'"), + CbfError::BadExpDim(d) => write!(f, "EXP cone must have dim 3, got {d}"), + } + } +} + +impl std::error::Error for CbfError {} + +/// A cursor over the meaningful (non-blank, non-comment) lines of a CBF file. +struct Lines<'a> { + rows: Vec<&'a str>, + pos: usize, +} + +impl<'a> Lines<'a> { + fn new(text: &'a str) -> Self { + let rows = text + .lines() + .map(str::trim) + .filter(|l| !l.is_empty() && !l.starts_with('#')) + .collect(); + Lines { rows, pos: 0 } + } + + fn next(&mut self) -> Option<&'a str> { + let row = self.rows.get(self.pos).copied(); + if row.is_some() { + self.pos += 1; + } + row + } + + fn require(&mut self, what: &str) -> Result<&'a str, CbfError> { + self.next() + .ok_or_else(|| CbfError::Malformed(format!("expected {what}, got end of file"))) + } +} + +fn parse_usize(tok: &str, what: &str) -> Result { + tok.parse() + .map_err(|_| CbfError::Malformed(format!("expected integer for {what}, got '{tok}'"))) +} + +fn parse_f64(tok: &str, what: &str) -> Result { + tok.parse() + .map_err(|_| CbfError::Malformed(format!("expected number for {what}, got '{tok}'"))) +} + +/// Resolve a cone token to its `(kind, alpha)`. Plain tokens (`F`, `EXP`, +/// …) go through [`ConeKind::parse`]; a parametric `@k:POW` token looks up +/// power-cone parameter set `k` in `pow_params` and resolves the exponent +/// `α = α₀ / (α₀ + α₁)` for the 3-D power cone (parameter vector `(α₀, α₁)`). +fn parse_cone_token( + tok: &str, + pow_params: &[Vec], +) -> Result<(ConeKind, Option), CbfError> { + if let Some(rest) = tok.strip_prefix('@') { + // `@k:KIND` — a reference into a parameter table (only POW today). + let (idx, kind) = rest + .split_once(':') + .ok_or_else(|| CbfError::Malformed(format!("bad parametric cone '{tok}'")))?; + if kind != "POW" { + return Err(CbfError::UnsupportedCone(format!("@{idx}:{kind}"))); + } + let k = parse_usize(idx, "POW reference index")?; + let params = pow_params + .get(k) + .ok_or_else(|| CbfError::Malformed(format!("POW references @{k}, not declared")))?; + if params.len() != 2 { + return Err(CbfError::UnsupportedCone(format!( + "POW with {} parameters (only the 3-D power cone, 2 parameters, is supported)", + params.len() + ))); + } + let alpha = params[0] / (params[0] + params[1]); + Ok((ConeKind::Pow, Some(alpha))) + } else { + let kind = + ConeKind::parse(tok).ok_or_else(|| CbfError::UnsupportedCone(tok.to_string()))?; + Ok((kind, None)) + } +} + +/// Read a `VAR`/`CON`-style cone partition: a header `total k`, then `k` +/// lines of `CONE dim`. Returns `(total, cones)` and validates the dims sum. +fn parse_cone_block( + lines: &mut Lines, + what: &str, + pow_params: &[Vec], +) -> Result<(usize, Vec), CbfError> { + let header = lines.require(what)?; + let mut it = header.split_whitespace(); + let total = parse_usize(it.next().unwrap_or(""), &format!("{what} total"))?; + let k = parse_usize(it.next().unwrap_or(""), &format!("{what} cone count"))?; + let mut cones = Vec::with_capacity(k); + let mut sum = 0; + for _ in 0..k { + let line = lines.require(&format!("{what} cone"))?; + let mut t = line.split_whitespace(); + let tok = t.next().unwrap_or(""); + let (kind, alpha) = parse_cone_token(tok, pow_params)?; + let dim = parse_usize(t.next().unwrap_or(""), &format!("{what} cone dim"))?; + if kind == ConeKind::Exp && dim != 3 { + return Err(CbfError::BadExpDim(dim)); + } + if kind == ConeKind::Pow && dim != 3 { + return Err(CbfError::Malformed(format!( + "{what}: only the 3-D power cone is supported, got POW dim {dim}" + ))); + } + sum += dim; + cones.push(ConeDecl { kind, dim, alpha }); + } + if sum != total { + return Err(CbfError::Malformed(format!( + "{what} cone dims sum to {sum}, header says {total}" + ))); + } + Ok((total, cones)) +} + +/// Parse a CBF instance from its text. Errors on malformed input or a cone +/// kind outside the supported subset. +pub fn parse(text: &str) -> Result { + let mut lines = Lines::new(text); + + let mut minimize = true; + let mut num_var = 0usize; + let mut var_cones = Vec::new(); + let mut num_con = 0usize; + let mut con_cones = Vec::new(); + let mut c = Vec::new(); + let mut c0 = 0.0; + let mut a = Vec::new(); + let mut b = Vec::new(); + let mut pow_params: Vec> = Vec::new(); + let mut psdcon_dims: Vec = Vec::new(); + let mut hcoord: Vec<(usize, usize, usize, usize, f64)> = Vec::new(); + let mut dcoord: Vec<(usize, usize, usize, f64)> = Vec::new(); + let mut seen_var = false; + + while let Some(kw) = lines.next() { + match kw { + "VER" => { + lines.require("VER value")?; + } + // Power-cone parameter table: `n total`, then for each of the `n` + // cones a length followed by that many α weights. Must precede the + // `VAR`/`CON` that reference it via `@k:POW`. + "POWCONES" => { + let header = lines.require("POWCONES header")?; + let mut it = header.split_whitespace(); + let ncones = parse_usize(it.next().unwrap_or(""), "POWCONES count")?; + let _total = parse_usize(it.next().unwrap_or(""), "POWCONES total")?; + for _ in 0..ncones { + let len = parse_usize(lines.require("POWCONES cone length")?, "POWCONES len")?; + let mut params = Vec::with_capacity(len); + for _ in 0..len { + params.push(parse_f64( + lines.require("POWCONES alpha")?, + "POWCONES alpha", + )?); + } + pow_params.push(params); + } + } + // Affine PSD constraints: header `count`, then one matrix size + // per constraint. The constraint `c` is `D_c + Σ_k x_k H_{c,k} ⪰ 0`. + "PSDCON" => { + let count = parse_usize(lines.require("PSDCON count")?, "PSDCON count")?; + for _ in 0..count { + psdcon_dims.push(parse_usize(lines.require("PSDCON dim")?, "PSDCON dim")?); + } + } + // Variable coefficient matrices of the PSD constraints. + "HCOORD" => { + let nnz = parse_usize(lines.require("HCOORD nnz")?, "HCOORD nnz")?; + for _ in 0..nnz { + let line = lines.require("HCOORD entry")?; + let mut t = line.split_whitespace(); + let con = parse_usize(t.next().unwrap_or(""), "HCOORD con")?; + let var = parse_usize(t.next().unwrap_or(""), "HCOORD var")?; + let i = parse_usize(t.next().unwrap_or(""), "HCOORD i")?; + let j = parse_usize(t.next().unwrap_or(""), "HCOORD j")?; + let val = parse_f64(t.next().unwrap_or(""), "HCOORD val")?; + hcoord.push((con, var, i, j, val)); + } + } + // Constant matrices of the PSD constraints. + "DCOORD" => { + let nnz = parse_usize(lines.require("DCOORD nnz")?, "DCOORD nnz")?; + for _ in 0..nnz { + let line = lines.require("DCOORD entry")?; + let mut t = line.split_whitespace(); + let con = parse_usize(t.next().unwrap_or(""), "DCOORD con")?; + let i = parse_usize(t.next().unwrap_or(""), "DCOORD i")?; + let j = parse_usize(t.next().unwrap_or(""), "DCOORD j")?; + let val = parse_f64(t.next().unwrap_or(""), "DCOORD val")?; + dcoord.push((con, i, j, val)); + } + } + "OBJSENSE" => { + let s = lines.require("OBJSENSE value")?; + minimize = match s { + "MIN" => true, + "MAX" => false, + other => { + return Err(CbfError::Malformed(format!("bad OBJSENSE '{other}'"))); + } + }; + } + "VAR" => { + let (n, cones) = parse_cone_block(&mut lines, "VAR", &pow_params)?; + num_var = n; + var_cones = cones; + c = vec![0.0; n]; + seen_var = true; + } + "CON" => { + let (m, cones) = parse_cone_block(&mut lines, "CON", &pow_params)?; + num_con = m; + con_cones = cones; + b = vec![0.0; m]; + } + "OBJACOORD" => { + if !seen_var { + return Err(CbfError::Malformed("OBJACOORD before VAR".into())); + } + let nnz = parse_usize(lines.require("OBJACOORD nnz")?, "OBJACOORD nnz")?; + for _ in 0..nnz { + let line = lines.require("OBJACOORD entry")?; + let mut t = line.split_whitespace(); + let col = parse_usize(t.next().unwrap_or(""), "OBJACOORD col")?; + let val = parse_f64(t.next().unwrap_or(""), "OBJACOORD val")?; + if col >= num_var { + return Err(CbfError::Malformed(format!("OBJACOORD col {col} ≥ n"))); + } + c[col] += val; + } + } + "OBJBCOORD" => { + c0 = parse_f64(lines.require("OBJBCOORD value")?, "OBJBCOORD")?; + } + "ACOORD" => { + let nnz = parse_usize(lines.require("ACOORD nnz")?, "ACOORD nnz")?; + a.reserve(nnz); + for _ in 0..nnz { + let line = lines.require("ACOORD entry")?; + let mut t = line.split_whitespace(); + let row = parse_usize(t.next().unwrap_or(""), "ACOORD row")?; + let col = parse_usize(t.next().unwrap_or(""), "ACOORD col")?; + let val = parse_f64(t.next().unwrap_or(""), "ACOORD val")?; + a.push((row, col, val)); + } + } + "BCOORD" => { + if b.is_empty() && num_con > 0 { + b = vec![0.0; num_con]; + } + let nnz = parse_usize(lines.require("BCOORD nnz")?, "BCOORD nnz")?; + for _ in 0..nnz { + let line = lines.require("BCOORD entry")?; + let mut t = line.split_whitespace(); + let row = parse_usize(t.next().unwrap_or(""), "BCOORD row")?; + let val = parse_f64(t.next().unwrap_or(""), "BCOORD val")?; + if row >= num_con { + return Err(CbfError::Malformed(format!("BCOORD row {row} ≥ m"))); + } + b[row] += val; + } + } + // Integrality markers: solve the continuous relaxation, so the + // index list is read and discarded. + "INT" => { + let nnz = parse_usize(lines.require("INT count")?, "INT count")?; + for _ in 0..nnz { + lines.require("INT entry")?; + } + } + other => { + return Err(CbfError::UnsupportedCone(format!("section '{other}'"))); + } + } + } + + if !seen_var { + return Err(CbfError::Malformed("no VAR section".into())); + } + + Ok(CbfModel { + minimize, + num_var, + var_cones, + num_con, + con_cones, + c, + c0, + a, + b, + psdcon_dims, + hcoord, + dcoord, + }) +} + +impl CbfModel { + /// Row-major dense access to `A` is avoided; instead group `A` by row so + /// constraint-cone rows can pull their own coefficients. + fn rows_of_a(&self) -> Vec> { + let mut rows = vec![Vec::new(); self.num_con]; + for &(r, col, val) in &self.a { + rows[r].push((col, val)); + } + rows + } + + /// Map this instance to a pounce conic program. Variable cones become + /// slack blocks `s = −Gx ∈ K` (a `G = −I` selection, `h = 0`); + /// constraint cones use `s = h − Gx = Ax + b ∈ K`. `L=` rows become + /// equalities `Ax = −b`. Exponential triples are reversed, and power + /// triples rotated, into pounce cone order (see the per-arm comments). + pub fn to_conic(&self) -> Result { + let n = self.num_var; + let a_rows = self.rows_of_a(); + + let mut g: Vec = Vec::new(); + let mut h: Vec = Vec::new(); + let mut cones: Vec = Vec::new(); + let mut a_eq: Vec = Vec::new(); + let mut b_eq: Vec = Vec::new(); + + // Push one cone row whose slack must equal the affine form `(coeffs, + // constant)`: `s = h − Gx = Σ coeffs·x + constant` ⇒ `G = −coeffs`, + // `h = constant`. + let push_row = + |g: &mut Vec, h: &mut Vec, coeffs: &[(usize, f64)], constant: f64| { + let r = h.len(); + for &(col, val) in coeffs { + g.push(Triplet::new(r, col, -val)); + } + h.push(constant); + }; + + // --- Variable cones: the affine form is the variable itself. --- + let mut v = 0usize; // running scalar-variable index + for cone in &self.var_cones { + match cone.kind { + ConeKind::Free => {} + ConeKind::Nonneg => { + for j in 0..cone.dim { + push_row(&mut g, &mut h, &[(v + j, 1.0)], 0.0); + } + cones.push(ConeSpec::Nonneg(cone.dim)); + } + ConeKind::Nonpos => { + // x ≤ 0 ⇒ slack −x ≥ 0. + for j in 0..cone.dim { + push_row(&mut g, &mut h, &[(v + j, -1.0)], 0.0); + } + cones.push(ConeSpec::Nonneg(cone.dim)); + } + ConeKind::SecondOrder => { + for j in 0..cone.dim { + push_row(&mut g, &mut h, &[(v + j, 1.0)], 0.0); + } + cones.push(ConeSpec::SecondOrder(cone.dim)); + } + ConeKind::Exp => { + // Reverse to pounce order (x,y,z) = (u₂,u₁,u₀). + for j in (0..3).rev() { + push_row(&mut g, &mut h, &[(v + j, 1.0)], 0.0); + } + cones.push(ConeSpec::Exponential); + } + ConeKind::Pow => { + // CBF power cone (x₀,x₁,x₂): x₀^β₀·x₁^β₁ ≥ |x₂|. pounce + // K_α = {|x| ≤ y^α z^{1−α}} ⇒ (x,y,z) = (x₂, x₀, x₁) with + // α = β₀. Emit slack rows in that pounce order. + let alpha = cone.alpha.ok_or_else(|| { + CbfError::Malformed("POW cone missing its exponent".into()) + })?; + push_row(&mut g, &mut h, &[(v + 2, 1.0)], 0.0); // x ← x₂ + push_row(&mut g, &mut h, &[(v, 1.0)], 0.0); // y ← x₀ + push_row(&mut g, &mut h, &[(v + 1, 1.0)], 0.0); // z ← x₁ + cones.push(ConeSpec::Power(alpha)); + } + ConeKind::Zero => { + // x = 0 — an equality on the variable. + for j in 0..cone.dim { + a_eq.push(Triplet::new(b_eq.len(), v + j, 1.0)); + b_eq.push(0.0); + } + } + } + v += cone.dim; + } + + // --- Constraint cones: the affine form is row `r` of `Ax + b`. --- + let mut r = 0usize; // running constraint-row index + for cone in &self.con_cones { + match cone.kind { + ConeKind::Zero => { + // Ax + b = 0 ⇒ Ax = −b. + for i in 0..cone.dim { + let row = r + i; + for &(col, val) in &a_rows[row] { + a_eq.push(Triplet::new(b_eq.len(), col, val)); + } + b_eq.push(-self.b[row]); + } + } + ConeKind::Nonneg => { + // Ax + b ≥ 0 ⇒ slack = Ax + b ≥ 0. + for i in 0..cone.dim { + let row = r + i; + push_row(&mut g, &mut h, &a_rows[row], self.b[row]); + } + cones.push(ConeSpec::Nonneg(cone.dim)); + } + ConeKind::Nonpos => { + // Ax + b ≤ 0 ⇒ slack = −(Ax + b) ≥ 0. + for i in 0..cone.dim { + let row = r + i; + let neg: Vec<(usize, f64)> = + a_rows[row].iter().map(|&(c, v)| (c, -v)).collect(); + push_row(&mut g, &mut h, &neg, -self.b[row]); + } + cones.push(ConeSpec::Nonneg(cone.dim)); + } + ConeKind::SecondOrder => { + for i in 0..cone.dim { + let row = r + i; + push_row(&mut g, &mut h, &a_rows[row], self.b[row]); + } + cones.push(ConeSpec::SecondOrder(cone.dim)); + } + ConeKind::Exp => { + // Slack must be ((Ax+b)₂, (Ax+b)₁, (Ax+b)₀) — reversed. + for i in (0..3).rev() { + let row = r + i; + push_row(&mut g, &mut h, &a_rows[row], self.b[row]); + } + cones.push(ConeSpec::Exponential); + } + ConeKind::Pow => { + // pounce (x,y,z) = ((Ax+b)₂, (Ax+b)₀, (Ax+b)₁), α = β₀. + let alpha = cone.alpha.ok_or_else(|| { + CbfError::Malformed("POW cone missing its exponent".into()) + })?; + for &i in &[2usize, 0, 1] { + let row = r + i; + push_row(&mut g, &mut h, &a_rows[row], self.b[row]); + } + cones.push(ConeSpec::Power(alpha)); + } + ConeKind::Free => {} // a free constraint row imposes nothing + } + r += cone.dim; + } + + // --- Affine PSD constraints (PSDCON): D_c + Σ_k x_k H_{c,k} ⪰ 0. --- + // The slack svec entry (i,j) is `D[i][j] + Σ_k x_k H_k[i][j]`, scaled + // by √2 off the diagonal so smat(s) reconstructs the matrix. Appended + // after the VAR/CON cone rows as Psd blocks. + if !self.psdcon_dims.is_empty() { + use std::collections::HashMap; + let r2 = std::f64::consts::SQRT_2; + let mut h_by: HashMap<(usize, usize, usize), Vec<(usize, f64)>> = HashMap::new(); + for &(con, var, i, j, val) in &self.hcoord { + h_by.entry((con, i, j)).or_default().push((var, val)); + } + let mut d_by: HashMap<(usize, usize, usize), f64> = HashMap::new(); + for &(con, i, j, val) in &self.dcoord { + *d_by.entry((con, i, j)).or_insert(0.0) += val; + } + for (con, &dim) in self.psdcon_dims.iter().enumerate() { + // svec order: column by column, lower triangle (j ≤ i). + for j in 0..dim { + for i in j..dim { + let scale = if i == j { 1.0 } else { r2 }; + let constant = scale * d_by.get(&(con, i, j)).copied().unwrap_or(0.0); + let coeffs: Vec<(usize, f64)> = h_by + .get(&(con, i, j)) + .map(|v| v.iter().map(|&(var, val)| (var, scale * val)).collect()) + .unwrap_or_default(); + push_row(&mut g, &mut h, &coeffs, constant); + } + } + cones.push(ConeSpec::Psd(dim)); + } + } + + // Objective: minimize cᵀx (negate for MAX), constant carried out. + let c: Vec = if self.minimize { + self.c.clone() + } else { + self.c.iter().map(|v| -v).collect() + }; + + let prob = QpProblem { + n, + p_lower: Vec::new(), + c, + a: a_eq, + b: b_eq, + g, + h, + lb: Vec::new(), + ub: Vec::new(), + }; + Ok(ConicProgram { + prob, + cones, + obj_constant: self.c0, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const TINY_GP: &str = "\ +VER +2 + +OBJSENSE +MIN + +VAR +4 2 +F 1 +EXP 3 + +CON +1 1 +L= 1 + +OBJACOORD +1 +0 1.0 + +ACOORD +2 +0 1 1.0 +0 3 -1.0 + +BCOORD +1 +0 -2.0 +"; + + #[test] + fn parses_sections() { + let m = parse(TINY_GP).unwrap(); + assert!(m.minimize); + assert_eq!(m.num_var, 4); + assert_eq!(m.var_cones.len(), 2); + assert_eq!(m.var_cones[0].kind, ConeKind::Free); + assert_eq!(m.var_cones[1].kind, ConeKind::Exp); + assert_eq!(m.num_con, 1); + assert_eq!(m.con_cones[0].kind, ConeKind::Zero); + assert_eq!(m.c, vec![1.0, 0.0, 0.0, 0.0]); + assert_eq!(m.b, vec![-2.0]); + assert_eq!(m.a.len(), 2); + } + + #[test] + fn rejects_bad_exp_dim() { + let bad = TINY_GP.replace("EXP 3", "EXP 2"); + assert!(matches!(parse(&bad), Err(CbfError::BadExpDim(2)))); + } + + #[test] + fn rejects_unsupported_cone() { + let bad = TINY_GP.replace("EXP 3", "POW 3"); + assert!(matches!(parse(&bad), Err(CbfError::UnsupportedCone(_)))); + } + + #[test] + fn cone_dim_sum_is_checked() { + let bad = TINY_GP.replace("4 2", "5 2"); + assert!(matches!(parse(&bad), Err(CbfError::Malformed(_)))); + } + + #[test] + fn to_conic_builds_exp_and_equality() { + let m = parse(TINY_GP).unwrap(); + let cp = m.to_conic().unwrap(); + // One exp cone over vars {1,2,3}; the L= row is an equality. + assert_eq!(cp.cones, vec![ConeSpec::Exponential]); + assert_eq!(cp.prob.m_eq(), 1); // the L= constraint + assert_eq!(cp.prob.m_ineq(), 3); // the exp cone's 3 rows + assert_eq!(cp.obj_constant, 0.0); + // The exp rows reverse CBF (vars 1,2,3) to pounce order (3,2,1): + // G row 0 selects var 3, row 1 var 2, row 2 var 1 (each with −1·−? ). + // push_row uses G = −coeffs with coeff +1 ⇒ G entry −1. + let row0: Vec<_> = cp.prob.g.iter().filter(|t| t.row == 0).collect(); + assert_eq!(row0.len(), 1); + assert_eq!(row0[0].col, 3); + } + + const TINY_POW: &str = "\ +VER +2 + +OBJSENSE +MAX + +POWCONES +1 2 +2 +3.0 +1.0 + +VAR +3 1 +@0:POW 3 + +CON +0 0 + +OBJACOORD +1 +2 1.0 +"; + + #[test] + fn parses_powcones_and_resolves_alpha() { + let m = parse(TINY_POW).unwrap(); + assert_eq!(m.var_cones.len(), 1); + assert_eq!(m.var_cones[0].kind, ConeKind::Pow); + // α = α₀/(α₀+α₁) = 3/(3+1) = 0.75. + let a = m.var_cones[0].alpha.unwrap(); + assert!((a - 0.75).abs() < 1e-12, "alpha {a}"); + } + + #[test] + fn to_conic_builds_power_cone_with_permutation() { + let m = parse(TINY_POW).unwrap(); + let cp = m.to_conic().unwrap(); + assert_eq!(cp.cones, vec![ConeSpec::Power(0.75)]); + assert_eq!(cp.prob.m_ineq(), 3); // the power cone's 3 rows + // pounce (x,y,z) = (CBF x₂, x₀, x₁): row 0 selects var 2. + let row0: Vec<_> = cp.prob.g.iter().filter(|t| t.row == 0).collect(); + assert_eq!(row0[0].col, 2); + let row1: Vec<_> = cp.prob.g.iter().filter(|t| t.row == 1).collect(); + assert_eq!(row1[0].col, 0); + let row2: Vec<_> = cp.prob.g.iter().filter(|t| t.row == 2).collect(); + assert_eq!(row2[0].col, 1); + } + + #[test] + fn pow_reference_to_undeclared_set_errors() { + let bad = TINY_POW.replace("@0:POW", "@5:POW"); + assert!(matches!(parse(&bad), Err(CbfError::Malformed(_)))); + } + + const TINY_SDP: &str = "\ +VER +2 + +OBJSENSE +MAX + +VAR +1 1 +F 1 + +PSDCON +1 +2 + +OBJACOORD +1 +0 1.0 + +HCOORD +2 +0 0 0 0 -1.0 +0 0 1 1 -1.0 + +DCOORD +2 +0 0 0 2.0 +0 1 1 5.0 +"; + + #[test] + fn parses_psdcon_hcoord_dcoord() { + let m = parse(TINY_SDP).unwrap(); + assert_eq!(m.psdcon_dims, vec![2]); + assert_eq!(m.hcoord.len(), 2); + assert_eq!(m.dcoord.len(), 2); + } + + #[test] + fn to_conic_builds_psd_constraint() { + let m = parse(TINY_SDP).unwrap(); + let cp = m.to_conic().unwrap(); + // One affine PSD constraint of size 2 → a Psd(2) cone over 3 rows. + assert_eq!(cp.cones, vec![ConeSpec::Psd(2)]); + assert_eq!(cp.prob.m_ineq(), 3); + // s = svec(M − λI) = [2 − λ, 0, 5 − λ]: h = [2, 0, 5] and the diagonal + // svec rows (0 and 2) carry +λ from G (push_row negates H = −1). + assert_eq!(cp.prob.h, vec![2.0, 0.0, 5.0]); + let row0: Vec<_> = cp.prob.g.iter().filter(|t| t.row == 0).collect(); + assert_eq!(row0.len(), 1); + assert!((row0[0].val - 1.0).abs() < 1e-12); // −H = −(−1) = +1 + } +} diff --git a/crates/pounce-cli/src/debug_repl.rs b/crates/pounce-cli/src/debug_repl.rs index 946b6b04..3cb8589b 100644 --- a/crates/pounce-cli/src/debug_repl.rs +++ b/crates/pounce-cli/src/debug_repl.rs @@ -3,7 +3,7 @@ //! Implements [`pounce_algorithm::debug::DebugHook`]. The core fires us //! at every checkpoint (today: the top of each outer iteration); we //! pause, hand the user (or an agent) a command prompt, and apply -//! inspect / mutate / flow commands against the live [`DebugCtx`] before +//! inspect / mutate / flow commands against the live [`DebugState`] before //! returning [`DebugAction::Resume`] or [`DebugAction::Stop`]. //! //! Two front ends share one command engine ([`SolverDebugger::dispatch`]): @@ -42,10 +42,10 @@ use crate::cli::DebugMode; use pounce_algorithm::debug::{ - is_live_tolerance, Checkpoint, DebugAction, DebugCtx, DebugHook, IterateSnapshot, ResidKind, - Residual, BLOCK_NAMES, + is_live_tolerance, DebugCtx, IterateSnapshot, ResidKind, Residual, BLOCK_NAMES, }; use pounce_algorithm::debug_rank::{RankReport, RankRow}; +use pounce_common::debug::{Checkpoint, DebugAction, DebugHook, DebugState}; use pounce_common::reg_options::{DefaultValue, OptionType, RegisteredOptions}; use pounce_nlp::ipopt_nlp::SplitNames; use pounce_presolve::dulmage_mendelsohn::DulmageMendelsohnPartition; @@ -412,7 +412,7 @@ impl Metric { _ => return None, }) } - fn eval(self, ctx: &DebugCtx) -> f64 { + fn eval(self, ctx: &dyn DebugState) -> f64 { match self { Metric::Mu => ctx.mu(), Metric::InfPr => ctx.inf_pr(), @@ -505,7 +505,7 @@ impl Atom { }) } - fn holds(&self, ctx: &DebugCtx) -> bool { + fn holds(&self, ctx: &dyn DebugState) -> bool { self.op.eval(self.metric.eval(ctx), self.rhs) } } @@ -575,7 +575,7 @@ impl Condition { }) } - fn holds(&self, ctx: &DebugCtx) -> bool { + fn holds(&self, ctx: &dyn DebugState) -> bool { let mut acc = self.first.holds(ctx); for (join, atom) in &self.rest { let v = atom.holds(ctx); @@ -981,7 +981,7 @@ pub struct SolverDebugger { break_events: HashSet<&'static str>, /// Per-iteration primal-dual snapshots for `goto`/`restart`, keyed by /// iteration index. Capped at [`SNAPSHOT_CAP`] (oldest evicted). - snapshots: BTreeMap, + snapshots: BTreeMap>, /// Shared slot for `resolve` to request a fresh solve from the /// current point with staged options. `None` disables `resolve`. restart: Option, @@ -1022,8 +1022,19 @@ pub struct SolverDebugger { /// `None` when no `.nl` model was wired in. See Lee et al. (2024, /// ). structure_book: Option, + /// A command queue shared with another REPL (the branch-and-bound tree + /// debugger), used when this debugger drives a *sub-solve* under + /// `--debug-script`. When set, [`next_command_line`](Self::next_command_line) + /// pops from it instead of stdin, so a single script interleaves tree and + /// interior-point commands. + script_queue: Option, } +/// A command queue shared between the tree debugger and an interior-point +/// sub-solve debugger so one `--debug-script` drives both (they run +/// sequentially, never concurrently). +pub type SharedScript = Rc>>; + impl SolverDebugger { /// Fully interactive: pause at the first iteration and at the /// terminal checkpoint. @@ -1065,9 +1076,24 @@ impl SolverDebugger { prompt_interrupts: 0, equation_book: None, structure_book: None, + script_queue: None, } } + /// A debugger that stays **quiet** (never pauses) until [`arm`]ed. Used as + /// the on-demand sub-solve hook for the branch-and-bound tree debugger: + /// it sees a node's relaxation solve only when the user steps into it. + /// + /// [`arm`]: DebugHook::arm + pub fn quiet(mode: DebugMode, reg: Option>) -> Self { + let mut d = Self::new(mode, reg); + d.step = false; + d.pause_iters = false; + d.pause_terminal = false; + d.detached = true; + d + } + /// Queue a debugger script to run once at the first pause. pub fn with_script(mut self, path: String) -> Self { self.pending_script = Some(path); @@ -1090,6 +1116,14 @@ impl SolverDebugger { self.structure_book = Some(book); } + /// Read commands from a queue shared with the tree debugger, so one + /// `--debug-script` drives both this sub-solve and the tree (see + /// [`SharedScript`]). Takes precedence over stdin / the editor. + pub fn with_shared_script(mut self, queue: SharedScript) -> Self { + self.script_queue = Some(queue); + self + } + /// Enable the `resolve` command, wiring the shared restart slot the /// CLI's re-solve loop reads. pub fn with_restart(mut self, cell: RestartCell) -> Self { @@ -1152,7 +1186,7 @@ impl SolverDebugger { /// First conditional breakpoint that holds at the current state, if /// any. Returns its source text (for the pause banner / event). - fn matched_condition(&self, ctx: &DebugCtx) -> Option { + fn matched_condition(&self, ctx: &dyn DebugState) -> Option { if self.detached { return None; } @@ -1165,7 +1199,7 @@ impl SolverDebugger { /// First armed event that fires at the current checkpoint/state, if /// any. Events are derived from observable state, so they're evaluated /// at the checkpoint where the relevant quantity is meaningful. - fn matched_event(&self, ctx: &DebugCtx) -> Option<&'static str> { + fn matched_event(&self, ctx: &dyn DebugState) -> Option<&'static str> { if self.detached || self.break_events.is_empty() { return None; } @@ -1209,7 +1243,7 @@ impl SolverDebugger { /// First watchpoint whose value changed (beyond its threshold) since /// the previous iteration. Updates the stored baselines. - fn matched_watchpoint(&mut self, ctx: &DebugCtx) -> Option { + fn matched_watchpoint(&mut self, ctx: &dyn DebugState) -> Option { if self.detached { return None; } @@ -1243,7 +1277,7 @@ impl SolverDebugger { // ---- command engine ----------------------------------------------- - fn dispatch(&mut self, line: &str, ctx: &mut DebugCtx) -> CmdOut { + fn dispatch(&mut self, line: &str, ctx: &mut dyn DebugState) -> CmdOut { // Quote-aware so a file path with spaces (e.g. `load "my run.json"`) // survives as a single token; identical to `split_whitespace` for any // line without quotes. `owned` backs the `&str` slices `toks` holds. @@ -1294,7 +1328,7 @@ impl SolverDebugger { } None => CmdOut::err("usage: tbreak "), }, - "watchpoint" | "wp" => self.cmd_watchpoint(rest), + "watchpoint" | "wp" => self.cmd_watchpoint(rest, ctx), "commands" => self.cmd_commands(rest), "stop-at" | "stopat" => self.cmd_stop_at(rest), "progress" => match rest.first().copied() { @@ -1314,19 +1348,34 @@ impl SolverDebugger { "complete" => self.cmd_complete(rest), "viz" | "plot" => self.cmd_viz(rest, ctx), "save" => self.cmd_save(rest, ctx), - "load" => self.cmd_load(rest, ctx), - "sweep" => self.cmd_sweep(rest, ctx), - "multistart" => self.cmd_multistart(rest, ctx), + "load" => match as_nlp_mut(ctx) { + Some(c) => self.cmd_load(rest, c), + None => nlp_only("load"), + }, + "sweep" => match as_nlp_mut(ctx) { + Some(c) => self.cmd_sweep(rest, c), + None => nlp_only("sweep"), + }, + "multistart" => match as_nlp_mut(ctx) { + Some(c) => self.cmd_multistart(rest, c), + None => nlp_only("multistart"), + }, "goto" | "jump" => self.cmd_goto(rest, ctx), "restart" => match self.snapshots.keys().next().copied() { Some(k) => self.restore_to(k, ctx), None => CmdOut::err("no snapshots captured yet"), }, - "resolve" | "re-solve" => self.cmd_resolve(ctx), + "resolve" | "re-solve" => match as_nlp(ctx) { + Some(c) => self.cmd_resolve(c), + None => nlp_only("resolve"), + }, "ask" | "explain" | "claude" => self.cmd_ask(rest, ctx), "watch" | "display" => self.cmd_watch(rest), "diff" => self.cmd_diff(ctx), - "diagnose" | "diag" => self.cmd_diagnose(ctx), + "diagnose" | "diag" => match as_nlp(ctx) { + Some(c) => self.cmd_diagnose(c), + None => nlp_only("diagnose"), + }, "source" => self.cmd_source(rest, ctx), "detach" => { self.detached = true; @@ -1428,7 +1477,7 @@ impl SolverDebugger { CmdOut::ok(lines) } - fn cmd_info(&self, ctx: &DebugCtx) -> CmdOut { + fn cmd_info(&self, ctx: &dyn DebugState) -> CmdOut { let dims: Vec<_> = ctx.block_dims(); let dims_json: serde_json::Map = dims .iter() @@ -1460,7 +1509,7 @@ impl SolverDebugger { })) } - fn cmd_print(&self, rest: &[&str], ctx: &DebugCtx) -> CmdOut { + fn cmd_print(&self, rest: &[&str], ctx: &dyn DebugState) -> CmdOut { let Some(&what) = rest.first() else { return self.cmd_info(ctx); }; @@ -1480,11 +1529,14 @@ impl SolverDebugger { return self.cmd_print_equation(&rest[1..]); } if what == "rank" { - return self.cmd_print_rank(ctx); + return match as_nlp(ctx) { + Some(c) => self.cmd_print_rank(c), + None => nlp_only("print rank"), + }; } // step / delta blocks: `dx`, `ds`, ... or `delta_x`. - let delta = what.strip_prefix("d").filter(|b| BLOCK_NAMES.contains(b)); - if BLOCK_NAMES.contains(&what) { + let delta = what.strip_prefix("d").filter(|b| is_block(ctx, b)); + if is_block(ctx, what) { match ctx.block(what) { Some(v) => CmdOut::ok(vec![fmt_vec(what, &v)]) .with_data(serde_json::json!({"name": what, "values": v})), @@ -1521,7 +1573,7 @@ impl SolverDebugger { /// below `tol`) and reports the min slack; `inactive` is the mirror — /// it counts the bounds with room to spare (slack ≥ `tol`) and reports /// the max slack, the variables furthest from their bound. - fn cmd_print_bounds(&self, ctx: &DebugCtx, active: bool) -> CmdOut { + fn cmd_print_bounds(&self, ctx: &dyn DebugState, active: bool) -> CmdOut { let tol = 1e-6; let mut lines = Vec::new(); let mut cats = serde_json::Map::new(); @@ -1567,7 +1619,7 @@ impl SolverDebugger { /// together; `primal`/`dual` restrict to one space. Default `k=10`. /// The top primal entry equals `inf_pr`; the top dual equals /// `inf_du`. Args may appear in either order. - fn cmd_print_residuals(&self, rest: &[&str], ctx: &DebugCtx) -> CmdOut { + fn cmd_print_residuals(&self, rest: &[&str], ctx: &dyn DebugState) -> CmdOut { let mut k: Option = None; let mut filter: Option = None; // Some(true)=primal, Some(false)=dual for &arg in rest { @@ -1613,7 +1665,12 @@ impl SolverDebugger { // print as `mass_balance` rather than `c[3]` — the model-vs-index // gap Lee et al. (2024, ) flag // for equation-oriented debugging. `None` ⇒ index labels throughout. - let names = ctx.split_names(); + // Model names are NLP-specific (.col/.row); only the NLP debugger + // exposes them — other solvers fall back to index labels. + let names = ctx + .as_any() + .and_then(|a| a.downcast_ref::()) + .and_then(|c| c.split_names()); let name_of = |r: &Residual| resid_name(r, &names); let lines = top @@ -1947,7 +2004,7 @@ impl SolverDebugger { /// `print kkt` — inertia + regularization of the factored augmented /// system. Only meaningful at/after `after_search_dir`. - fn cmd_print_kkt(&self, ctx: &DebugCtx) -> CmdOut { + fn cmd_print_kkt(&self, ctx: &dyn DebugState) -> CmdOut { let Some(k) = ctx.kkt() else { return CmdOut::err( "no KKT factorization yet — stop at `after_search_dir` (e.g. `stop-at kkt`)", @@ -2151,7 +2208,7 @@ impl SolverDebugger { } } - fn cmd_set(&mut self, rest: &[&str], ctx: &mut DebugCtx) -> CmdOut { + fn cmd_set(&mut self, rest: &[&str], ctx: &mut dyn DebugState) -> CmdOut { match rest { ["mu", v] => match v.parse::() { Ok(mu) => match ctx.set_mu(mu) { @@ -2160,7 +2217,10 @@ impl SolverDebugger { }, Err(_) => CmdOut::err("usage: set mu "), }, - ["opt", name, value] => self.cmd_set_opt(name, value, ctx), + ["opt", name, value] => match as_nlp_mut(ctx) { + Some(c) => self.cmd_set_opt(name, value, c), + None => nlp_only("set opt"), + }, [target, value] => self.cmd_set_block(target, value, ctx), _ => CmdOut::err( "usage: set mu | set [] | set | set opt ", @@ -2169,7 +2229,7 @@ impl SolverDebugger { } /// `set x[2] 1.5` (component) or `set x 1,2,3` (whole block). - fn cmd_set_block(&mut self, target: &str, value: &str, ctx: &mut DebugCtx) -> CmdOut { + fn cmd_set_block(&mut self, target: &str, value: &str, ctx: &mut dyn DebugState) -> CmdOut { // Component form: name[idx] if let Some(open) = target.find('[') { if !target.ends_with(']') { @@ -2350,7 +2410,7 @@ impl SolverDebugger { /// `save [path]` — dump the full current iterate (all blocks + /// search-direction blocks) and residual scalars to a JSON file for /// external analysis. Defaults to a temp path keyed by iteration. - fn cmd_save(&self, rest: &[&str], ctx: &DebugCtx) -> CmdOut { + fn cmd_save(&self, rest: &[&str], ctx: &dyn DebugState) -> CmdOut { let iter = ctx.iter(); let path = rest .first() @@ -2358,7 +2418,7 @@ impl SolverDebugger { .unwrap_or_else(|| std::env::temp_dir().join(format!("pounce-dbg-iter{iter}.json"))); let collect = |delta: bool| -> serde_json::Map { let mut m = serde_json::Map::new(); - for &b in BLOCK_NAMES.iter() { + for b in block_names(ctx) { let v = if delta { ctx.delta_block(b) } else { @@ -2726,7 +2786,7 @@ impl SolverDebugger { } /// `goto ` — rewind to a captured iteration. - fn cmd_goto(&mut self, rest: &[&str], ctx: &mut DebugCtx) -> CmdOut { + fn cmd_goto(&mut self, rest: &[&str], ctx: &mut dyn DebugState) -> CmdOut { match rest.first().and_then(|s| s.parse::().ok()) { Some(k) => self.restore_to(k, ctx), None => CmdOut::err("usage: goto "), @@ -2736,10 +2796,14 @@ impl SolverDebugger { /// Restore the snapshot for iteration `k` (primal-dual state only; /// strategy history is not rewound). Stays paused so the user can /// inspect / re-tune before `continue`/`step`. - fn restore_to(&mut self, k: i32, ctx: &mut DebugCtx) -> CmdOut { + fn restore_to(&mut self, k: i32, ctx: &mut dyn DebugState) -> CmdOut { match self.snapshots.get(&k) { Some(snap) => { - ctx.restore(snap); + if !ctx.restore(snap.as_ref()) { + return CmdOut::err(format!( + "this solver does not support rewinding to iter {k}" + )); + } CmdOut::ok(vec![format!( "rewound to iter {k} (primal-dual only; strategy history not restored). \ `continue`/`step` to resume." @@ -2800,7 +2864,7 @@ impl SolverDebugger { /// selects another provider (`codex`, `gemini`, `llm`) or a full command /// template. Degrades gracefully when the CLI isn't installed. /// "Ask why this step looks wrong without leaving the debugger." - fn cmd_ask(&self, rest: &[&str], ctx: &DebugCtx) -> CmdOut { + fn cmd_ask(&self, rest: &[&str], ctx: &dyn DebugState) -> CmdOut { let question = if rest.is_empty() { "Explain the current state of this interior-point solve and suggest what to try next." .to_string() @@ -2848,7 +2912,7 @@ impl SolverDebugger { /// `watchpoint [] [threshold] | clear | del ` — pause /// when a watched value changes by more than `threshold` (default 0, /// any change) between iterations. - fn cmd_watchpoint(&mut self, rest: &[&str]) -> CmdOut { + fn cmd_watchpoint(&mut self, rest: &[&str], ctx: &dyn DebugState) -> CmdOut { match rest { [] => { let v: Vec<&str> = self.watchpoints.iter().map(|w| w.raw.as_str()).collect(); @@ -2879,7 +2943,7 @@ impl SolverDebugger { } _ => (spec.to_string(), None), }; - if !BLOCK_NAMES.contains(&block.as_str()) { + if !is_block(ctx, block.as_str()) { return CmdOut::err(format!("unknown block `{block}`")); } let raw = spec.to_string(); @@ -2945,7 +3009,7 @@ impl SolverDebugger { /// `diff` — what changed in the iterate since the previous captured /// iteration: per-block max |Δ| (and where), plus Δμ. - fn cmd_diff(&self, ctx: &DebugCtx) -> CmdOut { + fn cmd_diff(&self, ctx: &dyn DebugState) -> CmdOut { let iter = ctx.iter(); let Some((&piter, prev)) = self.snapshots.range(..iter).next_back() else { return CmdOut::err("no previous iterate to diff against"); @@ -2954,7 +3018,7 @@ impl SolverDebugger { let dmu = ctx.mu() - prev.mu(); lines.push(format!(" mu = {:.6e} (Δ {:+.3e})", ctx.mu(), dmu)); let mut blocks = serde_json::Map::new(); - for b in BLOCK_NAMES { + for b in block_names(ctx) { let (Some(cur), Some(old)) = (ctx.block(b), prev.block(b)) else { continue; }; @@ -2992,7 +3056,7 @@ impl SolverDebugger { /// `source ` — run debugger commands from a file (one per line; /// `#` comments and blank lines skipped). Stops early if a command /// resumes or stops the solve, propagating that control flow. - fn cmd_source(&mut self, rest: &[&str], ctx: &mut DebugCtx) -> CmdOut { + fn cmd_source(&mut self, rest: &[&str], ctx: &mut dyn DebugState) -> CmdOut { let Some(&path) = rest.first() else { return CmdOut::err("usage: source "); }; @@ -3023,7 +3087,7 @@ impl SolverDebugger { } } - fn cmd_viz(&self, rest: &[&str], ctx: &mut DebugCtx) -> CmdOut { + fn cmd_viz(&self, rest: &[&str], ctx: &mut dyn DebugState) -> CmdOut { let Some(&target) = rest.first() else { return CmdOut::err("usage: viz "); }; @@ -3097,12 +3161,12 @@ impl SolverDebugger { } } // Resolve the vector to visualize. - let (label, vals) = if BLOCK_NAMES.contains(&target) { + let (label, vals) = if is_block(ctx, target) { match ctx.block(target) { Some(v) => (target.to_string(), v), None => return CmdOut::err(format!("no data for block `{target}`")), } - } else if let Some(blk) = target.strip_prefix("d").filter(|b| BLOCK_NAMES.contains(b)) { + } else if let Some(blk) = target.strip_prefix("d").filter(|b| is_block(ctx, b)) { match ctx.delta_block(blk) { Some(v) => (format!("d{blk}"), v), None => return CmdOut::err(format!("no search direction for `d{blk}`")), @@ -3125,7 +3189,7 @@ impl SolverDebugger { // ---- front ends ---------------------------------------------------- /// Emit the pause banner / state for the current front end. - fn emit_pause(&self, ctx: &DebugCtx, reason: Option<&str>) { + fn emit_pause(&self, ctx: &dyn DebugState, reason: Option<&str>) { let terminal = matches!(ctx.checkpoint(), Checkpoint::Terminated); match self.mode { DebugMode::Repl => { @@ -3214,7 +3278,7 @@ impl SolverDebugger { /// same scalar fields, under the same names, as `pause` (minus the /// per-pause `dims` / `breakpoints` / `watches`); fired while running /// between pauses. - fn emit_progress_event(&self, ctx: &DebugCtx) { + fn emit_progress_event(&self, ctx: &dyn DebugState) { let ev = serde_json::json!({ "event": "progress", "iter": ctx.iter(), @@ -3352,6 +3416,16 @@ impl SolverDebugger { /// an editor is active (history / Tab / Ctrl-R); otherwise a plain /// reader with a stderr prompt (REPL) or no prompt (JSON). fn next_command_line(&mut self) -> Option { + // A shared script (sub-solve under the tree debugger's --debug-script) + // takes precedence: pop the next command, echoing it. An empty queue + // returns None, which resumes this sub-solve back to the tree. + if let Some(q) = &self.script_queue { + let cmd = q.borrow_mut().pop_front(); + if let Some(c) = &cmd { + let _ = writeln!(std::io::stderr(), "pounce-dbg> {c}"); + } + return cmd; + } if let DebugMode::Repl = self.mode { if let Some(ed) = self.editor.as_mut() { return match ed.readline("pounce-dbg> ") { @@ -3770,7 +3844,17 @@ impl DebugHook for SolverDebugger { !self.detached } - fn at_checkpoint(&mut self, ctx: &mut DebugCtx) -> DebugAction { + /// Re-arm a [`quiet`](SolverDebugger::quiet) debugger to drop in at the + /// next checkpoint of the next sub-solve (the tree debugger's + /// step-into-relaxation). + fn arm(&mut self) { + self.step = true; + self.detached = false; + self.pause_iters = true; + self.pause_terminal = true; + } + + fn at_checkpoint(&mut self, ctx: &mut dyn DebugState) -> DebugAction { // One-time handshake so a JSON client learns the protocol / // capabilities before the first pause. if matches!(self.mode, DebugMode::Json) && !self.hello_sent { @@ -3785,8 +3869,12 @@ impl DebugHook for SolverDebugger { // launches the next; `Some` means "re-solving from the next // seed", `None` means the sweep finished (fall through). if self.sweep.is_some() { - if let Some(action) = self.drive_sweep(ctx) { - return action; + // A sweep can only be started on the NLP solver, so the + // downcast succeeds whenever one is in flight. + if let Some(c) = as_nlp(ctx) { + if let Some(action) = self.drive_sweep(c) { + return action; + } } } let failed = ctx.status().map(|s| !is_success_status(s)).unwrap_or(false); @@ -3814,7 +3902,7 @@ impl DebugHook for SolverDebugger { // by evicting the oldest beyond the cap. if is_iter_start { if let Some(snap) = ctx.snapshot() { - self.snapshots.insert(snap.iter(), snap); + self.snapshots.insert(ctx.iter(), snap); while self.snapshots.len() > SNAPSHOT_CAP { let Some(&oldest) = self.snapshots.keys().next() else { break; @@ -3908,7 +3996,7 @@ impl DebugHook for SolverDebugger { impl SolverDebugger { /// Read and dispatch commands until one resumes or stops the solve. - fn prompt_loop(&mut self, ctx: &mut DebugCtx) -> DebugAction { + fn prompt_loop(&mut self, ctx: &mut dyn DebugState) -> DebugAction { // Run a `--debug-script` once, at the first pause, before reading // any interactive command. It may itself resume / stop the solve. if let Some(path) = self.pending_script.take() { @@ -4043,6 +4131,39 @@ fn emit_json(v: &serde_json::Value) { let _ = h.flush(); } +/// Downcast a generic [`DebugState`] to the NLP solver's concrete +/// [`DebugCtx`], for the NLP-only REPL commands (rank diagnosis, model-name +/// resolution, warm `resolve`, sweep/multistart). `None` for the +/// convex/conic and global solvers, whose REPL reports "not supported". +fn as_nlp<'a>(ctx: &'a dyn DebugState) -> Option<&'a DebugCtx> { + ctx.as_any().and_then(|a| a.downcast_ref::()) +} + +/// Mutable form of [`as_nlp`], for commands that mutate NLP-specific state. +fn as_nlp_mut<'a>(ctx: &'a mut dyn DebugState) -> Option<&'a mut DebugCtx> { + ctx.as_any_mut().and_then(|a| a.downcast_mut::()) +} + +/// Standard "command needs the NLP solver" error for the convex/global REPL. +fn nlp_only(cmd: &str) -> CmdOut { + CmdOut::err(format!( + "`{cmd}` is only available for the NLP solver (not the convex/conic or global solvers)" + )) +} + +/// The iterate-block names the *current* solver exposes (NLP: the eight +/// primal-dual blocks; convex IPM: `x`/`s`/`y`/`z`). Block commands use +/// this rather than the static NLP [`BLOCK_NAMES`] so they work for any +/// solver behind the [`DebugState`] trait. +fn block_names(ctx: &dyn DebugState) -> Vec<&'static str> { + ctx.block_dims().into_iter().map(|(n, _)| n).collect() +} + +/// Whether `name` is one of the current solver's iterate blocks. +fn is_block(ctx: &dyn DebugState, name: &str) -> bool { + block_names(ctx).iter().any(|n| *n == name) +} + fn fmt_vec(name: &str, v: &[f64]) -> String { const MAX: usize = 12; if v.len() <= MAX { @@ -4092,12 +4213,13 @@ fn write_and_open(label: &str, iter: i32, vals: &[f64]) -> Result<(String, Strin /// Build the prompt handed to the LLM by `ask`: a compact, self-contained /// description of the paused interior-point state plus the user question. -fn build_ask_prompt(ctx: &DebugCtx, question: &str) -> String { +fn build_ask_prompt(ctx: &dyn DebugState, question: &str) -> String { use std::fmt::Write as _; let mut p = String::new(); p.push_str( - "You are helping debug a paused run of POUNCE, a pure-Rust port of the Ipopt \ - interior-point NLP solver. The solve is stopped at a debugger checkpoint. \ + "You are helping debug a paused run of POUNCE, a pure-Rust interior-point \ + optimization solver whose NLP core is ported from Ipopt. The solve is \ + stopped at a debugger checkpoint. \ Use the state below to answer concisely and suggest concrete next steps \ (options to try, what to inspect). State:\n\n", ); diff --git a/crates/pounce-cli/src/dispatch.rs b/crates/pounce-cli/src/dispatch.rs new file mode 100644 index 00000000..096d7e8a --- /dev/null +++ b/crates/pounce-cli/src/dispatch.rs @@ -0,0 +1,1086 @@ +//! Solver routing (Phase 1 of the LP/QP dispatch plan). +//! +//! See `dev-notes/lp-qp-routing.md`. This module sits between problem +//! loading and the call to `optimize_tnlp`. It does three things: +//! +//! 1. **Classify** the parsed problem into a [`ProblemClass`] by walking +//! the nonlinear expression trees the `.nl` reader already produced. +//! 2. **Resolve** that class against the user's `solver_selection` +//! option into a [`SolverChoice`]. +//! 3. (Phase 2+) **Dispatch** to the chosen solver. +//! +//! Phase 1 ships with *no behavior change*: the only solvers wired are +//! `Nlp` (the existing filter-IPM) and `auto`, which resolves to `Nlp` +//! for every class until `pounce-convex` lands. The classifier and the +//! option plumbing are fully present and tested so Phase 2 can drop in +//! the specialized solvers behind the seam. +//! +//! ## Classification +//! +//! The `.nl` format has no dedicated quadratic section: each row's +//! linear part lives in the `G`/`J` coefficient segments (already split +//! out into [`NlProblem::obj_linear`] / [`NlProblem::con_linear`]), +//! while any higher-order term — including a QP's quadratic terms — is +//! written into the nonlinear expression tree as `Mul`/`Pow` nodes. So: +//! +//! - no nonlinear parts at all → **LP**; +//! - all nonlinear parts are degree-2 polynomials → **QP** family +//! (convex / nonconvex / QCQP split by curvature); +//! - anything else (transcendental, higher degree) → **NLP**. +//! +//! ### Conservative fallback (correctness guard) +//! +//! Misclassifying an indefinite or non-quadratic problem *into* a convex +//! solver would return a spurious KKT point as if globally optimal. +//! Whenever the walk cannot *prove* the stronger class, the classifier +//! falls back to the more general one, ultimately `Nlp`. The convexity +//! (PSD) test uses a tolerance and routes "inconclusive within +//! tolerance" to the safe side, never to the convex path. + +use crate::nl_reader::{BinOp, Expr, NlProblem, UnaryOp}; +use std::collections::BTreeMap; + +/// Tolerance for the smallest-eigenvalue sign test in the convexity +/// check. A Hessian eigenvalue below `-PSD_TOL` is treated as a genuine +/// negative direction (nonconvex); within `±PSD_TOL` it is treated as +/// zero. Scaled tolerances would be better once we have problem scaling +/// in this path; for Phase 1 a fixed absolute tolerance is adequate and +/// errs toward the safe (more general) class. +const PSD_TOL: f64 = 1e-9; + +/// The mathematical class of a loaded problem, from most to least +/// specialized. See the module docs and `dev-notes/lp-qp-routing.md`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ProblemClass { + /// Linear objective, linear constraints. + Lp, + /// Convex quadratic objective, linear constraints (Hessian PSD). + ConvexQp, + /// Convex quadratic objective and/or convex quadratic constraints. + /// SOCP-representable; routes to the conic solver from Phase 4. + ConvexQcqp, + /// Quadratic but with an indefinite Hessian somewhere. Falls through + /// to the NLP solver for a local minimum. + NonconvexQp, + /// General nonlinear (transcendental terms, higher-degree + /// polynomials, or anything the classifier cannot prove quadratic). + Nlp, +} + +impl ProblemClass { + /// Human-readable name for diagnostics and the + /// forced-solver-mismatch error message. + pub fn name(self) -> &'static str { + match self { + ProblemClass::Lp => "LP", + ProblemClass::ConvexQp => "convex QP", + ProblemClass::ConvexQcqp => "convex QCQP", + ProblemClass::NonconvexQp => "nonconvex QP", + ProblemClass::Nlp => "NLP", + } + } +} + +/// The resolved solver to dispatch to, after combining a +/// [`ProblemClass`] with the `solver_selection` option. +/// +/// Phase 1 only ever resolves to [`SolverChoice::Nlp`]; the other +/// variants exist so the option parser and the forced-selection +/// validation are complete, and so Phase 2 can wire them without +/// touching this enum. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SolverChoice { + /// The existing Wächter-Biegler filter-IPM. The only solver wired in + /// Phase 1. + Nlp, + /// IPM-LP in `pounce-convex` (Phase 2). + LpIpm, + /// IPM-QP in `pounce-convex` (Phase 2). + QpIpm, + /// Active-set QP in `pounce-qp` (parallel track). + QpActiveSet, +} + +impl SolverChoice { + /// Human-readable description of the dispatched solver, for the + /// banner-level "Solving as …" log line. Names the algorithm and the + /// crate that implements it so a reader can tell which of pounce's + /// solvers actually ran. + pub fn describe(self) -> &'static str { + match self { + SolverChoice::Nlp => "NLP filter line-search interior-point (pounce-nlp)", + SolverChoice::LpIpm => "LP interior-point (pounce-convex)", + SolverChoice::QpIpm => "convex QP interior-point (pounce-convex)", + SolverChoice::QpActiveSet => "active-set QP (pounce-qp)", + } + } +} + +/// Parsed `solver_selection` option value. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SolverSelection { + /// Pick the most specialized solver matching the class. Default. + Auto, + /// Force the NLP solver regardless of class (current behavior). + Nlp, + /// Force IPM-LP; error if the problem is not an LP. + LpIpm, + /// Force IPM-QP; error if the problem is not LP/convex-QP. + QpIpm, + /// Force active-set QP; error if the problem is not LP/convex-QP. + QpActiveSet, +} + +impl SolverSelection { + /// Parse the `solver_selection` option string. Returns `None` for an + /// unrecognized value so the caller can surface a tidy error. + pub fn parse(s: &str) -> Option { + match s { + "auto" => Some(SolverSelection::Auto), + "nlp" => Some(SolverSelection::Nlp), + "lp-ipm" => Some(SolverSelection::LpIpm), + "qp-ipm" => Some(SolverSelection::QpIpm), + "qp-active-set" => Some(SolverSelection::QpActiveSet), + _ => None, + } + } + + /// The accepted values, for error messages and option registration. + pub const VALUES: &'static [&'static str] = + &["auto", "nlp", "lp-ipm", "qp-ipm", "qp-active-set"]; +} + +/// Classify a parsed `.nl` problem. +/// +/// Works off the already-split linear / nonlinear representation in +/// [`NlProblem`]: a row contributes to the class only through its +/// nonlinear `Expr` (the linear part is, by construction, linear). The +/// classifier is deliberately conservative — see the module docs. +pub fn classify_problem(prob: &NlProblem) -> ProblemClass { + // Fast path: no nonlinear parts anywhere ⇒ LP. (Header-equivalent: + // n_nl_objs == 0 && n_nl_cons == 0.) + let obj_nl = !is_trivially_zero(&prob.obj_nonlinear); + let cons_nl = prob.con_nonlinear.iter().any(|e| !is_trivially_zero(e)); + if !obj_nl && !cons_nl { + return ProblemClass::Lp; + } + + // Objective curvature. + let obj_quad = match analyze_quadratic(&prob.obj_nonlinear, prob.n) { + Some(q) => q, + // Objective has a non-quadratic nonlinear term ⇒ NLP. + None => return ProblemClass::Nlp, + }; + + // Constraint curvature. A quadratic constraint makes this a QCQP; + // any non-quadratic constraint term makes the whole problem NLP. + let mut any_quadratic_constraint = false; + for c in &prob.con_nonlinear { + if is_trivially_zero(c) { + continue; + } + match analyze_quadratic(c, prob.n) { + Some(q) if q.is_empty() => {} // purely linear after all + Some(_) => any_quadratic_constraint = true, + None => return ProblemClass::Nlp, + } + } + + // Objective Hessian definiteness, as the *minimizer* sees it. A + // `maximize` problem is internally negated to a minimization, so a + // concave-up (PSD-Hessian) maximize is a nonconvex minimize. Test the + // sense-adjusted Hessian, not the raw one, or maximize-of-convex slips + // through to the convex IPM and produces a wrong (max/saddle) answer. + if !obj_quad.is_empty() { + let effective: QuadHessian = if prob.minimize { + obj_quad.clone() + } else { + obj_quad.iter().map(|(k, v)| (*k, -v)).collect() + }; + if !hessian_is_psd(&effective, prob.n) { + return ProblemClass::NonconvexQp; + } + } + + if any_quadratic_constraint { + // Convex QCQP requires every ≤-inequality's constraint Hessian + // to be PSD. Phase 1 does not yet distinguish constraint sense / + // curvature sign per row with full rigor, so be conservative: + // only call it ConvexQcqp when every quadratic constraint's + // Hessian is PSD; otherwise fall back to NLP (sound: NLP-IPM + // finds a local min either way). + for c in &prob.con_nonlinear { + if is_trivially_zero(c) { + continue; + } + match analyze_quadratic(c, prob.n) { + Some(q) if q.is_empty() => {} + Some(q) => { + if !hessian_is_psd(&q, prob.n) { + return ProblemClass::Nlp; + } + } + None => return ProblemClass::Nlp, + } + } + return ProblemClass::ConvexQcqp; + } + + // Quadratic (or linear) convex objective with linear constraints. + if obj_quad.is_empty() { + // Objective nonlinear part collapsed to nothing quadratic and no + // constraints are quadratic — it was effectively linear. + ProblemClass::Lp + } else { + ProblemClass::ConvexQp + } +} + +/// Resolve a [`ProblemClass`] and a [`SolverSelection`] into the solver +/// to dispatch to, or an error string when a forced selection does not +/// match the detected class. +/// +/// In Phase 1 the resolved choice is informational for everything except +/// `Nlp`: the dispatcher (Phase 2) is what acts on `LpIpm` / `QpIpm` / +/// `QpActiveSet`. `auto` resolves to `Nlp` for every class until +/// `pounce-convex` lands (documented no-op so there is no regression). +pub fn resolve_solver( + class: ProblemClass, + selection: SolverSelection, +) -> Result { + use ProblemClass as P; + use SolverSelection as S; + + // Is this class within the convex-QP family (LP or convex QP)? + let is_lp = class == P::Lp; + let is_convex_qp = matches!(class, P::Lp | P::ConvexQp); + + match selection { + // `auto`: route LP and convex QP to the specialized convex IPM + // (`pounce-convex`); everything else (QCQP until the conic + // solver lands, nonconvex QP, general NLP) falls through to the + // NLP filter-IPM. LP is solved by the same QP IPM (P = 0), so it + // resolves to `QpIpm` rather than a distinct LP entry point. + S::Auto => match class { + P::Lp | P::ConvexQp => Ok(SolverChoice::QpIpm), + _ => Ok(SolverChoice::Nlp), + }, + S::Nlp => Ok(SolverChoice::Nlp), + S::LpIpm => { + if is_lp { + Ok(SolverChoice::LpIpm) + } else { + Err(mismatch_msg(class, "lp-ipm", "an LP")) + } + } + S::QpIpm => { + if is_convex_qp { + Ok(SolverChoice::QpIpm) + } else { + Err(mismatch_msg(class, "qp-ipm", "an LP or convex QP")) + } + } + S::QpActiveSet => { + if is_convex_qp { + Ok(SolverChoice::QpActiveSet) + } else { + Err(mismatch_msg(class, "qp-active-set", "an LP or convex QP")) + } + } + } +} + +fn mismatch_msg(class: ProblemClass, forced: &str, expected: &str) -> String { + format!( + "problem class {} does not match forced solver {} (expected {})", + class.name(), + forced, + expected + ) +} + +// --------------------------------------------------------------------- +// Quadratic-form analysis +// --------------------------------------------------------------------- + +/// The symmetric Hessian of a quadratic form, stored as a sparse upper- +/// triangular (i ≤ j) map of `(i, j) -> ∂²/∂xᵢ∂xⱼ`. Empty means the +/// expression is (at most) linear. +pub(crate) type QuadHessian = BTreeMap<(usize, usize), f64>; + +/// Full quadratic read-out: `(Hessian, [(var, linear coef), …], constant)`. +/// The linear and constant parts are the pieces AMPL/Pyomo fold into the +/// nonlinear objective tree (see [`analyze_quadratic_full`]). +pub(crate) type QuadForm = (QuadHessian, Vec<(usize, f64)>, f64); + +/// Attempt to read an expression as a polynomial of total degree ≤ 2 and +/// return its Hessian (constant, since the form is quadratic). Returns +/// `None` if the expression contains any term the classifier cannot +/// prove is degree-≤2 polynomial (transcendental ops, division by a +/// non-constant, `Pow` with exponent ∉ {0,1,2}, products of degree > 2, +/// external calls, …). `None` ⇒ treat as general nonlinear. +pub(crate) fn analyze_quadratic(e: &Expr, n: usize) -> Option { + analyze_quadratic_full(e, n).map(|(h, _, _)| h) +} + +/// Like [`analyze_quadratic`] but also returns the degree-1 (linear) +/// coefficients *and* the degree-0 (constant) term of the form: +/// `(Hessian, [(var, coef), …], constant)`. +/// +/// AMPL folds the linear part of a nonlinear term into the objective's +/// nonlinear expression tree (the `−6·x₀` of `(x₀−3)²`, say) rather than +/// the linear section. Callers building the QP objective vector `c` must +/// add these in, exactly as the NLP path's `eval_f` sums the linear +/// section *and* the nonlinear tree — otherwise the linear shift is +/// silently dropped and the convex solve minimizes the wrong objective. +/// +/// The **constant** is returned for the same reason: AMPL/Pyomo also fold +/// the objective's degree-0 term into the nonlinear tree (the `+9` of +/// `(x₀−3)²`), where it does *not* land in `NlProblem::obj_constant`. It +/// is irrelevant to the minimizer but is part of the *reported objective +/// value*; dropping it makes the convex solve report an objective off by +/// that constant versus the NLP path (see `qp_extract`). +pub(crate) fn analyze_quadratic_full(e: &Expr, _n: usize) -> Option { + let poly = to_poly(e)?; + if poly.max_degree() > 2 { + return None; + } + let mut h: QuadHessian = BTreeMap::new(); + let mut lin: Vec<(usize, f64)> = Vec::new(); + let mut constant = 0.0; + for (vars, coef) in &poly.terms { + match vars.as_slice() { + // Constant term: no gradient/Hessian contribution, but it is + // part of the objective *value* — accumulate, don't drop. + [] => constant += *coef, + // Linear term c·xᵢ. + [i] => lin.push((*i, *coef)), + // Quadratic term c·xᵢ·xⱼ. + [i, j] => { + let (i, j) = (*i.min(j), *i.max(j)); + // ∂²(c·xᵢxⱼ)/∂xᵢ∂xⱼ = c for i≠j; ∂²(c·xᵢ²)/∂xᵢ² = 2c. + let contrib = if i == j { 2.0 * coef } else { *coef }; + *h.entry((i, j)).or_insert(0.0) += contrib; + } + _ => return None, + } + } + // Drop explicit zeros so `is_empty()` means "linear". + h.retain(|_, v| v.abs() > 0.0); + Some((h, lin, constant)) +} + +/// A multivariate polynomial as a map from a sorted variable-index +/// multiset (the monomial) to its coefficient. `[]` is the constant +/// term, `[i]` is `xᵢ`, `[i, i]` is `xᵢ²`, `[i, j]` is `xᵢxⱼ`. +#[derive(Debug, Clone, Default)] +struct Poly { + terms: BTreeMap, f64>, +} + +impl Poly { + fn constant(c: f64) -> Self { + let mut terms = BTreeMap::new(); + if c != 0.0 { + terms.insert(Vec::new(), c); + } + Poly { terms } + } + + fn var(i: usize) -> Self { + let mut terms = BTreeMap::new(); + terms.insert(vec![i], 1.0); + Poly { terms } + } + + fn max_degree(&self) -> usize { + self.terms.keys().map(|m| m.len()).max().unwrap_or(0) + } + + fn as_constant(&self) -> Option { + match self.terms.len() { + 0 => Some(0.0), + 1 => self.terms.get(&Vec::new()).copied(), + _ => None, + } + } + + fn add(mut self, other: &Poly) -> Poly { + for (m, c) in &other.terms { + *self.terms.entry(m.clone()).or_insert(0.0) += c; + } + self.prune(); + self + } + + fn neg(mut self) -> Poly { + for c in self.terms.values_mut() { + *c = -*c; + } + self + } + + fn scale(mut self, s: f64) -> Poly { + if s == 0.0 { + return Poly::default(); + } + for c in self.terms.values_mut() { + *c *= s; + } + self + } + + /// Multiply two polynomials, bailing (`None`) if any product + /// monomial would exceed total degree 2 — past that the classifier + /// gives up and the caller routes to NLP. + fn mul(&self, other: &Poly) -> Option { + let mut out = Poly::default(); + for (ma, ca) in &self.terms { + for (mb, cb) in &other.terms { + if ma.len() + mb.len() > 2 { + return None; + } + let mut m = ma.clone(); + m.extend_from_slice(mb); + m.sort_unstable(); + *out.terms.entry(m).or_insert(0.0) += ca * cb; + } + } + out.prune(); + Some(out) + } + + fn prune(&mut self) { + self.terms.retain(|_, c| c.abs() > 0.0); + } +} + +/// Lower an `Expr` to a [`Poly`] of total degree ≤ 2, or `None` if it +/// contains anything outside that class. `Cse` nodes are inlined (they +/// are mathematically equivalent to their body). +fn to_poly(e: &Expr) -> Option { + match e { + Expr::Const(c) => Some(Poly::constant(*c)), + Expr::Var(i) => Some(Poly::var(*i)), + Expr::Cse(body) => to_poly(body), + Expr::Sum(items) => { + let mut acc = Poly::default(); + for it in items { + acc = acc.add(&to_poly(it)?); + } + Some(acc) + } + Expr::Unary(op, a) => match op { + UnaryOp::Neg => Some(to_poly(a)?.neg()), + // Everything else is transcendental / non-polynomial. + _ => None, + }, + Expr::Binary(op, a, b) => { + let pa = to_poly(a)?; + let pb = to_poly(b)?; + match op { + BinOp::Add => Some(pa.add(&pb)), + BinOp::Sub => Some(pa.add(&pb.neg())), + BinOp::Mul => pa.mul(&pb), + BinOp::Div => { + // Division is polynomial only by a nonzero constant. + let d = pb.as_constant()?; + if d == 0.0 { + None + } else { + Some(pa.scale(1.0 / d)) + } + } + BinOp::Pow => { + // Polynomial only for constant integer exponents in + // {0, 1, 2}. + let exp = pb.as_constant()?; + if exp == 0.0 { + Some(Poly::constant(1.0)) + } else if exp == 1.0 { + Some(pa) + } else if exp == 2.0 { + pa.mul(&pa) + } else { + None + } + } + // atan2 and any other binary opcodes are non-polynomial. + _ => None, + } + } + // External function calls are opaque ⇒ not provably polynomial. + Expr::Funcall { .. } => None, + // Comparisons, logicals, conditionals, and n-ary min/max (the + // smooth-/control-flow `.nl` opcodes) are non-polynomial ⇒ not a + // convex QP, so the classifier routes them to the NLP solver. + _ => None, + } +} + +/// True if the expression is the literal constant zero the `.nl` reader +/// uses for "no nonlinear part". +fn is_trivially_zero(e: &Expr) -> bool { + matches!(e, Expr::Const(c) if *c == 0.0) +} + +// --------------------------------------------------------------------- +// PSD test +// --------------------------------------------------------------------- + +/// Is the (symmetric, sparse) Hessian positive semidefinite? +/// +/// Builds the dense symmetric matrix over the variables that actually +/// appear in the quadratic form and runs a symmetric eigenvalue check +/// via Jacobi rotations — adequate for the small-to-moderate dense +/// blocks a classifier sees, and dependency-free. Returns `true` only +/// when the smallest eigenvalue is `≥ -PSD_TOL`; an inconclusive or +/// clearly-negative result returns `false`, routing to the safe +/// (more general) class. +fn hessian_is_psd(h: &QuadHessian, _n: usize) -> bool { + if h.is_empty() { + return true; // zero matrix is PSD (the linear case) + } + // Compress to the active variable set so the dense matrix is small. + let mut active: Vec = Vec::new(); + for (i, j) in h.keys() { + active.push(*i); + active.push(*j); + } + active.sort_unstable(); + active.dedup(); + let k = active.len(); + let idx = |v: usize| active.binary_search(&v).unwrap(); + + let mut a = vec![0.0f64; k * k]; + for ((i, j), v) in h { + let (ri, rj) = (idx(*i), idx(*j)); + a[ri * k + rj] = *v; + a[rj * k + ri] = *v; + } + + match smallest_eigenvalue_symmetric(&mut a, k) { + Some(min_eig) => min_eig >= -PSD_TOL, + None => false, // did not converge ⇒ be conservative + } +} + +/// Smallest eigenvalue of a dense `k×k` symmetric matrix (row-major) via +/// the classical cyclic Jacobi eigenvalue algorithm. Destroys `a`. +/// Returns `None` if it fails to converge within the sweep budget. +fn smallest_eigenvalue_symmetric(a: &mut [f64], k: usize) -> Option { + if k == 0 { + return Some(0.0); + } + if k == 1 { + return Some(a[0]); + } + const MAX_SWEEPS: usize = 100; + for _ in 0..MAX_SWEEPS { + // Off-diagonal Frobenius norm. + let mut off = 0.0; + for p in 0..k { + for q in (p + 1)..k { + off += a[p * k + q] * a[p * k + q]; + } + } + if off <= 1e-30 { + break; + } + for p in 0..k { + for q in (p + 1)..k { + let apq = a[p * k + q]; + if apq.abs() <= 1e-300 { + continue; + } + let app = a[p * k + p]; + let aqq = a[q * k + q]; + let theta = (aqq - app) / (2.0 * apq); + let t = theta.signum() / (theta.abs() + (theta * theta + 1.0).sqrt()); + let t = if theta == 0.0 { 1.0 } else { t }; + let c = 1.0 / (t * t + 1.0).sqrt(); + let s = t * c; + // Apply the rotation J^T A J. + for i in 0..k { + let aip = a[i * k + p]; + let aiq = a[i * k + q]; + a[i * k + p] = c * aip - s * aiq; + a[i * k + q] = s * aip + c * aiq; + } + for i in 0..k { + let api = a[p * k + i]; + let aqi = a[q * k + i]; + a[p * k + i] = c * api - s * aqi; + a[q * k + i] = s * api + c * aqi; + } + } + } + } + let mut min_eig = f64::INFINITY; + for i in 0..k { + min_eig = min_eig.min(a[i * k + i]); + } + if min_eig.is_finite() { + Some(min_eig) + } else { + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::nl_reader::parse_nl_text; + + // --- SolverSelection parsing --- + + #[test] + fn parse_selection_values() { + assert_eq!(SolverSelection::parse("auto"), Some(SolverSelection::Auto)); + assert_eq!(SolverSelection::parse("nlp"), Some(SolverSelection::Nlp)); + assert_eq!( + SolverSelection::parse("lp-ipm"), + Some(SolverSelection::LpIpm) + ); + assert_eq!( + SolverSelection::parse("qp-ipm"), + Some(SolverSelection::QpIpm) + ); + assert_eq!( + SolverSelection::parse("qp-active-set"), + Some(SolverSelection::QpActiveSet) + ); + assert_eq!(SolverSelection::parse("lp-simplex"), None); + assert_eq!(SolverSelection::parse("bogus"), None); + } + + // --- resolve_solver: auto routes LP/convex-QP to the convex IPM, + // everything else to NLP --- + + #[test] + fn auto_routes_convex_qp_family_to_qp_ipm() { + assert_eq!( + resolve_solver(ProblemClass::Lp, SolverSelection::Auto), + Ok(SolverChoice::QpIpm), + "auto should route LP to the convex IPM (P=0)" + ); + assert_eq!( + resolve_solver(ProblemClass::ConvexQp, SolverSelection::Auto), + Ok(SolverChoice::QpIpm), + "auto should route convex QP to the convex IPM" + ); + } + + #[test] + fn auto_routes_everything_else_to_nlp() { + for class in [ + ProblemClass::ConvexQcqp, // until the conic solver lands + ProblemClass::NonconvexQp, + ProblemClass::Nlp, + ] { + assert_eq!( + resolve_solver(class, SolverSelection::Auto), + Ok(SolverChoice::Nlp), + "auto must resolve to Nlp for {:?}", + class + ); + } + } + + #[test] + fn forced_nlp_always_ok() { + assert_eq!( + resolve_solver(ProblemClass::ConvexQp, SolverSelection::Nlp), + Ok(SolverChoice::Nlp) + ); + } + + #[test] + fn forced_lp_on_nlp_errors() { + let err = resolve_solver(ProblemClass::Nlp, SolverSelection::LpIpm).unwrap_err(); + assert!(err.contains("NLP"), "msg should name detected class: {err}"); + assert!( + err.contains("lp-ipm"), + "msg should name forced solver: {err}" + ); + } + + #[test] + fn forced_lp_on_lp_ok() { + assert_eq!( + resolve_solver(ProblemClass::Lp, SolverSelection::LpIpm), + Ok(SolverChoice::LpIpm) + ); + } + + #[test] + fn forced_qp_accepts_lp_and_convex_qp_only() { + assert_eq!( + resolve_solver(ProblemClass::Lp, SolverSelection::QpIpm), + Ok(SolverChoice::QpIpm) + ); + assert_eq!( + resolve_solver(ProblemClass::ConvexQp, SolverSelection::QpIpm), + Ok(SolverChoice::QpIpm) + ); + assert!(resolve_solver(ProblemClass::NonconvexQp, SolverSelection::QpIpm).is_err()); + assert!(resolve_solver(ProblemClass::Nlp, SolverSelection::QpIpm).is_err()); + } + + // --- Poly / quadratic analysis unit tests --- + + #[test] + fn poly_of_quadratic_diagonal() { + // (x0 - 1)^2 => x0^2 - 2 x0 + 1 + let e = Expr::Binary( + BinOp::Pow, + Box::new(Expr::Binary( + BinOp::Sub, + Box::new(Expr::Var(0)), + Box::new(Expr::Const(1.0)), + )), + Box::new(Expr::Const(2.0)), + ); + let h = analyze_quadratic(&e, 1).expect("degree-2 polynomial"); + // d²/dx0² (x0²) = 2 + assert_eq!(h.get(&(0, 0)), Some(&2.0)); + } + + #[test] + fn poly_rejects_transcendental() { + // sin(x0) is not polynomial. + let e = Expr::Unary(UnaryOp::Sin, Box::new(Expr::Var(0))); + assert!(analyze_quadratic(&e, 1).is_none()); + } + + #[test] + fn poly_rejects_cubic() { + // x0^3 + let e = Expr::Binary( + BinOp::Pow, + Box::new(Expr::Var(0)), + Box::new(Expr::Const(3.0)), + ); + assert!(analyze_quadratic(&e, 1).is_none()); + } + + #[test] + fn cross_term_hessian() { + // x0 * x1 => H[0,1] = 1 + let e = Expr::Binary(BinOp::Mul, Box::new(Expr::Var(0)), Box::new(Expr::Var(1))); + let h = analyze_quadratic(&e, 2).expect("degree-2"); + assert_eq!(h.get(&(0, 1)), Some(&1.0)); + } + + // --- PSD test --- + + #[test] + fn psd_accepts_convex_separable() { + // diag(2, 4): both eigenvalues positive. + let mut h = QuadHessian::new(); + h.insert((0, 0), 2.0); + h.insert((1, 1), 4.0); + assert!(hessian_is_psd(&h, 2)); + } + + #[test] + fn psd_rejects_indefinite() { + // [[0,1],[1,0]] has eigenvalues ±1. + let mut h = QuadHessian::new(); + h.insert((0, 1), 1.0); + assert!(!hessian_is_psd(&h, 2)); + } + + #[test] + fn psd_accepts_psd_with_zero_eigenvalue() { + // [[1,1],[1,1]] is PSD (eigenvalues 0 and 2). + let mut h = QuadHessian::new(); + h.insert((0, 0), 1.0); + h.insert((0, 1), 1.0); + h.insert((1, 1), 1.0); + assert!(hessian_is_psd(&h, 2)); + } + + // --- A1: ±PSD_TOL boundary of the convexity test (silent-misroute guard) --- + + /// The safety-critical case: a *real* negative direction — even a small + /// one, well beyond `PSD_TOL` — must read non-PSD so an indefinite QP + /// routes to NLP, never to the convex IPM (which would return a spurious + /// "optimal" at a saddle/maximum). + #[test] + fn psd_rejects_small_but_real_negative_curvature() { + // diag(2, −1e-3): min eigenvalue −1e-3 ≪ −PSD_TOL. + let mut h = QuadHessian::new(); + h.insert((0, 0), 2.0); + h.insert((1, 1), -1e-3); + assert!( + !hessian_is_psd(&h, 2), + "a −1e-3 eigenvalue must read indefinite, not be rounded to PSD" + ); + } + + /// Pin the threshold at exactly `±PSD_TOL` (1e-9). Within the band the + /// test rounds a tiny negative eigenvalue to PSD **by design**: a + /// genuinely semidefinite Hessian whose smallest eigenvalue computes as a + /// tiny negative (Jacobi roundoff) must not be misread as nonconvex. The + /// band is far below the error of solving a convex QP with that much + /// curvature, so it is the sound tradeoff — see the A1 Finding in + /// `dev-notes/pr70-hardening.md`. (1×1 Hessians are returned exactly, so + /// this is deterministic.) + #[test] + fn psd_threshold_is_psd_tol() { + let mut just_inside = QuadHessian::new(); + just_inside.insert((0, 0), -1e-10); // |λ| < PSD_TOL ⇒ treated as zero + assert!( + hessian_is_psd(&just_inside, 1), + "−1e-10 is within tolerance and must round to PSD" + ); + + let mut just_outside = QuadHessian::new(); + just_outside.insert((0, 0), -1e-7); // |λ| > PSD_TOL ⇒ genuine negative + assert!( + !hessian_is_psd(&just_outside, 1), + "−1e-7 is beyond tolerance and must read indefinite" + ); + } + + // --- End-to-end classify_problem on parsed .nl text --- + + /// Minimal `g`-format `.nl` text builder is overkill; instead use the + /// reader's own fixtures via parse_nl_text on hand-written stubs. + /// These cover the header LP fast-path and the AST walk. + + #[test] + fn classify_pure_lp() { + // minimize x0 + x1 s.t. x0 + x1 <= 1, no nonlinear parts. + // Build an NlProblem directly for a hermetic test. + let prob = NlProblem { + n: 2, + m: 1, + num_obj: 1, + minimize: true, + obj_nonlinear: Expr::Const(0.0), + obj_linear: vec![(0, 1.0), (1, 1.0)], + obj_constant: 0.0, + con_nonlinear: vec![Expr::Const(0.0)], + con_linear: vec![vec![(0, 1.0), (1, 1.0)]], + x_l: vec![0.0, 0.0], + x_u: vec![f64::INFINITY, f64::INFINITY], + g_l: vec![f64::NEG_INFINITY], + g_u: vec![1.0], + x0: vec![0.0, 0.0], + lambda0: vec![0.0], + suffixes: Default::default(), + imported_funcs: Vec::new(), + var_names: Vec::new(), + con_names: Vec::new(), + }; + assert_eq!(classify_problem(&prob), ProblemClass::Lp); + } + + #[test] + fn classify_convex_qp() { + // minimize x0^2 + x1^2 s.t. linear; convex (H = diag(2,2)). + let obj = Expr::Binary( + BinOp::Add, + Box::new(Expr::Binary( + BinOp::Pow, + Box::new(Expr::Var(0)), + Box::new(Expr::Const(2.0)), + )), + Box::new(Expr::Binary( + BinOp::Pow, + Box::new(Expr::Var(1)), + Box::new(Expr::Const(2.0)), + )), + ); + let prob = qp_stub(obj, vec![Expr::Const(0.0)]); + assert_eq!(classify_problem(&prob), ProblemClass::ConvexQp); + } + + #[test] + fn classify_nonconvex_qp() { + // minimize x0 * x1 (indefinite Hessian) s.t. linear. + let obj = Expr::Binary(BinOp::Mul, Box::new(Expr::Var(0)), Box::new(Expr::Var(1))); + let prob = qp_stub(obj, vec![Expr::Const(0.0)]); + assert_eq!(classify_problem(&prob), ProblemClass::NonconvexQp); + } + + #[test] + fn classify_nlp_from_transcendental_objective() { + let obj = Expr::Unary(UnaryOp::Exp, Box::new(Expr::Var(0))); + let prob = qp_stub(obj, vec![Expr::Const(0.0)]); + assert_eq!(classify_problem(&prob), ProblemClass::Nlp); + } + + /// Regression: a `maximize` of a PSD-Hessian objective is a *concave* + /// maximization ⇒ nonconvex minimization. The convexity test must run + /// on the sense-adjusted Hessian, or this slips through to the convex + /// IPM and returns a wrong (maximum/saddle) answer. + #[test] + fn classify_maximize_psd_objective_is_nonconvex() { + // maximize x0^2 + x1^2 (H = diag(2,2), PSD) — concave max. + let obj = Expr::Binary( + BinOp::Add, + Box::new(Expr::Binary( + BinOp::Pow, + Box::new(Expr::Var(0)), + Box::new(Expr::Const(2.0)), + )), + Box::new(Expr::Binary( + BinOp::Pow, + Box::new(Expr::Var(1)), + Box::new(Expr::Const(2.0)), + )), + ); + let mut prob = qp_stub(obj, vec![Expr::Const(0.0)]); + prob.minimize = false; + assert_eq!(classify_problem(&prob), ProblemClass::NonconvexQp); + } + + /// Mirror: `maximize` of a concave (NSD-Hessian) objective is a convex + /// minimization once negated, so it is a legitimate `ConvexQp`. + #[test] + fn classify_maximize_concave_objective_is_convex() { + // maximize −(x0^2 + x1^2) (H = diag(−2,−2)); negated ⇒ PSD. + let neg_sq = |v: usize| { + Expr::Unary( + UnaryOp::Neg, + Box::new(Expr::Binary( + BinOp::Pow, + Box::new(Expr::Var(v)), + Box::new(Expr::Const(2.0)), + )), + ) + }; + let obj = Expr::Binary(BinOp::Add, Box::new(neg_sq(0)), Box::new(neg_sq(1))); + let mut prob = qp_stub(obj, vec![Expr::Const(0.0)]); + prob.minimize = false; + assert_eq!(classify_problem(&prob), ProblemClass::ConvexQp); + } + + #[test] + fn classify_convex_qcqp() { + // convex quadratic objective + a convex quadratic constraint. + let obj = Expr::Binary( + BinOp::Pow, + Box::new(Expr::Var(0)), + Box::new(Expr::Const(2.0)), + ); + let con = Expr::Binary( + BinOp::Add, + Box::new(Expr::Binary( + BinOp::Pow, + Box::new(Expr::Var(0)), + Box::new(Expr::Const(2.0)), + )), + Box::new(Expr::Binary( + BinOp::Pow, + Box::new(Expr::Var(1)), + Box::new(Expr::Const(2.0)), + )), + ); + let prob = qp_stub(obj, vec![con]); + assert_eq!(classify_problem(&prob), ProblemClass::ConvexQcqp); + } + + /// Classification mirror of the boundary guard: a QP whose only + /// curvature is a genuine (beyond-tolerance) negative direction is + /// `NonconvexQp`, so `auto` routes it to NLP rather than the convex IPM. + /// `minimize −x0²` is concave for a minimizer ⇒ indefinite. + #[test] + fn classify_concave_minimize_is_nonconvex() { + let obj = Expr::Unary( + UnaryOp::Neg, + Box::new(Expr::Binary( + BinOp::Pow, + Box::new(Expr::Var(0)), + Box::new(Expr::Const(2.0)), + )), + ); + let prob = qp_stub(obj, vec![Expr::Const(0.0)]); + assert_eq!(classify_problem(&prob), ProblemClass::NonconvexQp); + } + + /// Conservative QCQP guard: a convex quadratic objective with an + /// *indefinite* quadratic constraint must fall back to NLP — never be + /// called `ConvexQcqp` and handed to the conic path, which would treat a + /// nonconvex feasible region as convex. + #[test] + fn classify_qcqp_with_indefinite_constraint_falls_back_to_nlp() { + // obj x0² (convex); constraint x0·x1 (indefinite Hessian). + let obj = Expr::Binary( + BinOp::Pow, + Box::new(Expr::Var(0)), + Box::new(Expr::Const(2.0)), + ); + let con = Expr::Binary(BinOp::Mul, Box::new(Expr::Var(0)), Box::new(Expr::Var(1))); + let prob = qp_stub(obj, vec![con]); + assert_eq!(classify_problem(&prob), ProblemClass::Nlp); + } + + /// A nonlinear objective expression whose quadratic part algebraically + /// cancels has an empty Hessian ⇒ classify as `Lp`, not a spurious QP + /// (which would otherwise route a linear problem to the QP IPM). + #[test] + fn classify_cancelling_quadratic_objective_is_lp() { + // x0² − x0² ≡ 0: the degree-2 terms cancel in the polynomial walk. + let sq = || { + Expr::Binary( + BinOp::Pow, + Box::new(Expr::Var(0)), + Box::new(Expr::Const(2.0)), + ) + }; + let obj = Expr::Binary(BinOp::Sub, Box::new(sq()), Box::new(sq())); + let prob = qp_stub(obj, vec![Expr::Const(0.0)]); + assert_eq!(classify_problem(&prob), ProblemClass::Lp); + } + + #[test] + fn classify_nlp_from_transcendental_constraint() { + let obj = Expr::Binary( + BinOp::Pow, + Box::new(Expr::Var(0)), + Box::new(Expr::Const(2.0)), + ); + let con = Expr::Unary(UnaryOp::Log, Box::new(Expr::Var(1))); + let prob = qp_stub(obj, vec![con]); + assert_eq!(classify_problem(&prob), ProblemClass::Nlp); + } + + /// Build a 2-var, 1-con problem stub with the given nonlinear + /// objective and per-constraint nonlinear parts. Linear parts and + /// bounds are filled with benign defaults. + fn qp_stub(obj_nonlinear: Expr, con_nonlinear: Vec) -> NlProblem { + let m = con_nonlinear.len(); + NlProblem { + n: 2, + m, + num_obj: 1, + minimize: true, + obj_nonlinear, + obj_linear: vec![], + obj_constant: 0.0, + con_nonlinear, + con_linear: vec![vec![]; m], + x_l: vec![f64::NEG_INFINITY; 2], + x_u: vec![f64::INFINITY; 2], + g_l: vec![f64::NEG_INFINITY; m], + g_u: vec![0.0; m], + x0: vec![0.0; 2], + lambda0: vec![0.0; m], + suffixes: Default::default(), + imported_funcs: Vec::new(), + var_names: Vec::new(), + con_names: Vec::new(), + } + } + + // Keep parse_nl_text reachable for a future header-fast-path test + // against a committed .nl fixture. + #[allow(dead_code)] + fn _parse(txt: &str) -> NlProblem { + parse_nl_text(txt).expect("valid .nl") + } +} diff --git a/crates/pounce-cli/src/lib.rs b/crates/pounce-cli/src/lib.rs index 61d4b821..6b34e752 100644 --- a/crates/pounce-cli/src/lib.rs +++ b/crates/pounce-cli/src/lib.rs @@ -4,6 +4,7 @@ #![cfg_attr(test, allow(clippy::unwrap_used, clippy::expect_used))] pub mod builtin; +pub mod cbf; pub mod citations; pub mod cli; pub mod counting_tnlp; @@ -13,10 +14,12 @@ pub mod debug_repl; // it. Re-export the modules so existing `crate::nl_reader::…` / // `pounce_cli::nl_reader::…` paths keep resolving unchanged. pub use pounce_nl::{nl_external, nl_fbbt_translate, nl_reader, nl_tape}; +pub mod dispatch; pub mod minima; pub mod nl_hessian_program; pub mod nl_writer; pub mod print; +pub mod qp_extract; pub mod seeded_tnlp; pub mod sens; pub mod solve_report; diff --git a/crates/pounce-cli/src/main.rs b/crates/pounce-cli/src/main.rs index 7d941225..c3dc0571 100644 --- a/crates/pounce-cli/src/main.rs +++ b/crates/pounce-cli/src/main.rs @@ -29,6 +29,7 @@ use pounce_common::diagnostics::{ }; use pounce_linsol::sparse_sym_iface::SparseSymLinearSolverInterface; use pounce_nlp::return_codes::ApplicationReturnStatus; +use pounce_nlp::solve_statistics::IterRecord; use pounce_nlp::tnlp::TNLP; use pounce_restoration::resto_alg_builder::RestoAlgorithmBuilder; use pounce_restoration::resto_inner_solver::{ @@ -81,6 +82,66 @@ pub fn main() -> ExitCode { let mut app = IpoptApplication::new(); + // Register the LP/QP routing option so `solver_selection=...` is + // accepted by the (validating) options parser. See the dispatch plan + // (dev-notes/lp-qp-routing.md): `auto` routes classified LP / convex + // QP problems to the specialized `pounce-convex` IPM and everything + // else to the NLP filter-IPM; forcing values are validated against + // the detected class. + if let Err(e) = app.registered_options().add_string_option( + "solver_selection", + "Which solver to route the problem to.", + "auto", + &[ + ( + "auto", + "Most specialized solver matching the detected problem class.", + ), + ( + "nlp", + "Always the filter-IPM NLP solver (current default behavior).", + ), + ( + "lp-ipm", + "Force IPM-LP; errors if the problem is not an LP.", + ), + ( + "qp-ipm", + "Force IPM-QP; errors if the problem is not LP/convex-QP.", + ), + ( + "qp-active-set", + "Force active-set QP; errors if not LP/convex-QP.", + ), + ], + "Selects the solver by problem class. `auto` routes LP and convex \ + QP to the specialized convex interior-point solver (pounce-convex) \ + and all other classes to the NLP filter-IPM. `qp-active-set` is \ + reserved for the active-set QP track and currently falls through \ + to NLP.", + ) { + eprintln!("pounce: failed to register solver_selection option: {e}"); + return ExitCode::from(2); + } + + // Toggle presolve on the convex LP/QP path. Default on. + if let Err(e) = app.registered_options().add_string_option( + "qp_presolve", + "Run presolve before the convex LP/QP interior-point solve.", + "yes", + &[ + ("yes", "Reduce the problem (and detect trivial infeasibility / unboundedness) before solving."), + ("no", "Solve the extracted problem directly, without presolve."), + ], + "Only affects the convex LP/QP path (`solver_selection` routing to \ + pounce-convex). When on, presolve removes empty / duplicate / \ + redundant rows, fixes and substitutes structural columns, and may \ + report infeasible / unbounded without invoking the solver.", + ) { + eprintln!("pounce: failed to register qp_presolve option: {e}"); + return ExitCode::from(2); + } + // Opt into iter-history capture when the user asked for a JSON // report at Full detail — saves the per-iter alloc when they // didn't. @@ -359,6 +420,108 @@ pub fn main() -> ExitCode { return pounce_cli::minima::run(&mut app, &inner_tnlp, mcfg, &args, sol_path.as_deref()); } + // LP/QP routing (Phase 1). Resolve the `solver_selection` option + // against the detected problem class. For `.nl` inputs we classify + // the parsed problem; for builtins we conservatively treat the class + // as NLP (they are general nonlinear test problems). `auto`/`nlp` + // both route to the existing solver — the only observable effect in + // Phase 1 is that an explicit forcing value (e.g. `--solver=lp`) + // that does not match the detected class is rejected with a clear + // message, instead of being silently ignored. + { + use pounce_cli::dispatch::{ + classify_problem, resolve_solver, ProblemClass, SolverChoice, SolverSelection, + }; + let sel_str = app + .options() + .get_string_value("solver_selection", "") + .map(|(v, _)| v) + .unwrap_or_else(|_| "auto".to_string()); + let selection = match SolverSelection::parse(&sel_str) { + Some(s) => s, + None => { + eprintln!( + "pounce: invalid solver_selection '{sel_str}'; valid values: {}", + SolverSelection::VALUES.join(", ") + ); + return ExitCode::from(2); + } + }; + + // Classify the problem. Only the `.nl` path carries enough + // structure; builtins are treated as general NLP. (Re-reading the + // `.nl` here is cheap relative to a solve and keeps the dispatch + // self-contained.) + let (class, reparsed) = match &args.problem { + ProblemSource::NlFile(path) => match nl_reader::read_nl_file(path) { + Ok(prob) => (classify_problem(&prob), Some(prob)), + Err(_) => (ProblemClass::Nlp, None), + }, + ProblemSource::Builtin(_) => (ProblemClass::Nlp, None), + }; + + let choice = match resolve_solver(class, selection) { + Ok(c) => c, + Err(msg) => { + eprintln!("pounce: {msg}"); + return ExitCode::from(2); + } + }; + + // Banner-level routing line: report the detected problem class and + // which of pounce's solvers was selected for it. Gated like the + // banner (suppressed by `sb yes` and in JSON-debug protocol mode) so + // stdout stays clean for machine consumers. + if !suppress_banner && !json_dbg { + println!( + "Problem class: {}. Selected solver: {} [solver_selection={}].", + class.name(), + choice.describe(), + sel_str + ); + println!(); + } + + // Dispatch to the specialized convex LP/QP IPM when resolved. + // `LpIpm` and `QpIpm` both use the convex solver (LP is P = 0). + if matches!(choice, SolverChoice::LpIpm | SolverChoice::QpIpm) { + if let Some(prob) = reparsed { + let presolve_on = app + .options() + .get_string_value("qp_presolve", "") + .map(|(v, _)| v != "no") + .unwrap_or(true); + // JSON solve report, when requested — same schema as the NLP + // path, so the benchmark harness can compare QP and NLP solves. + let json_cfg = args.json_output.as_deref().map(|p| { + let input = match &args.problem { + ProblemSource::Builtin(name) => { + InputDescriptor::Builtin { name: name.clone() } + } + ProblemSource::NlFile(f) => InputDescriptor::NlFile { + path: f.clone(), + size_bytes: std::fs::metadata(f).ok().map(|m| m.len()), + }, + }; + (p, args.json_detail, input) + }); + return run_convex_qp( + &prob, + class, + sol_path.as_deref(), + presolve_on, + json_cfg, + debug_hook.as_ref(), + ); + } + // Should not happen (only `.nl` classifies non-NLP), but be + // safe: fall through to NLP rather than mis-dispatch. + } + // `nlp`, `qp-active-set` (not yet wired), and unmatched cases + // fall through to the existing NLP solve below. + let _ = choice; + } + // Does the `.nl` ask for a parametric sensitivity step? When it // does, the post-optimal step runs inside `on_converged` below and // its result is written back as the `sens_sol_state_1` suffix. @@ -920,6 +1083,227 @@ fn build_debugger( } } +/// Solve a classified LP / convex-QP `.nl` problem through the +/// specialized `pounce-convex` interior-point method, write a `.sol`, +/// and return the process exit code. This is the LP/QP dispatch target +/// (see `dev-notes/lp-qp-routing.md`). +/// +/// Writes the primal solution `x` and the constraint duals recovered +/// from the QP multipliers (`pounce_cli::qp_extract::recover_duals`). +/// The objective is reported in the user's original sense, including the +/// `.nl`'s constant term, which the standard-form QP drops. +/// Map the convex solver's status onto the NLP-side `ApplicationReturnStatus` +/// used by the JSON solve report, so QP and NLP reports share one status +/// vocabulary. +fn qp_status_to_ars(s: pounce_convex::QpStatus) -> ApplicationReturnStatus { + use pounce_convex::QpStatus; + match s { + QpStatus::Optimal => ApplicationReturnStatus::SolveSucceeded, + QpStatus::PrimalInfeasible => ApplicationReturnStatus::InfeasibleProblemDetected, + QpStatus::DualInfeasible => ApplicationReturnStatus::DivergingIterates, // unbounded + QpStatus::IterationLimit => ApplicationReturnStatus::MaximumIterationsExceeded, + QpStatus::NumericalFailure => ApplicationReturnStatus::InternalError, + } +} + +fn run_convex_qp( + prob: &nl_reader::NlProblem, + class: pounce_cli::dispatch::ProblemClass, + sol_path: Option<&std::path::Path>, + presolve_on: bool, + json_cfg: Option<(&std::path::Path, ReportDetail, InputDescriptor)>, + debug_hook: Option<&Rc>>, +) -> ExitCode { + use pounce_convex::presolve::{presolve, PresolveOutcome}; + use pounce_convex::{solve_qp_ipm, solve_qp_ipm_debug, QpOptions, QpStatus}; + + let (qp, con_map, obj_nl_const) = match pounce_cli::qp_extract::extract_qp_with_map(prob) { + Some(q) => q, + None => { + eprintln!( + "pounce: internal error: {} not extractable as QP", + class.name() + ); + return ExitCode::from(2); + } + }; + + // The reported objective must include *both* constant sources: the + // `.nl` linear-section constant (`obj_constant`) and any degree-0 term + // AMPL/Pyomo folded into the nonlinear objective tree (`obj_nl_const`, + // recovered by `extract_qp_with_map`). Dropping the latter makes the + // convex solve report an objective off by that constant versus the NLP + // path (e.g. HS21 by −100, HS35 by +9). Both are in user sense. + let obj_const = prob.obj_constant + obj_nl_const; + let sign = if prob.minimize { 1.0 } else { -1.0 }; + + let backend = || -> Box { + Box::new(pounce_feral::FeralSolverInterface::new()) + }; + let t0 = std::time::Instant::now(); + // With presolve on, reduce the problem (logging what was removed), + // solve the reduced problem, then postsolve back to the extracted-QP + // space — so the `con_map`-based dual recovery below still applies. + // Trivial infeasibility / unboundedness is reported without solving. + let trivial = |status| pounce_convex::QpSolution { + status, + x: vec![0.0; qp.n], + y: vec![0.0; qp.m_eq()], + z: vec![0.0; qp.m_ineq()], + z_lb: vec![0.0; qp.n], + z_ub: vec![0.0; qp.n], + obj: 0.0, + iters: 0, + iterates: Vec::new(), + }; + // Collect the per-iteration convergence trace only when a Full-detail + // JSON report was requested (it carries the `iterations` array); the + // default solve stays trace-free. + let want_trace = matches!(&json_cfg, Some((_, ReportDetail::Full, _))); + let qp_opts = QpOptions { + collect_iterates: want_trace, + ..QpOptions::default() + }; + let sol = if let Some(hook) = debug_hook { + // Interactive debug: step the IPM on the extracted QP directly. + // Presolve is skipped so the debugger's `x`/`s`/`y`/`z` blocks + // correspond to the user's problem rather than a reduced one. + let mut h = hook.borrow_mut(); + solve_qp_ipm_debug(&qp, &qp_opts, &mut *h, backend) + } else if presolve_on { + match presolve(&qp) { + PresolveOutcome::Reduced(ps) => { + let st = ps.stats(); + if st.reduced_anything() { + println!( + "Presolve: {} → {} vars, {} → {} rows (fixed {}, \ + free-fixed {}, substituted {}, forcing {}, dominated {}, tightened {})", + st.orig_vars, + st.reduced_vars, + st.orig_rows, + st.reduced_rows, + st.fixed_vars, + st.free_cols_fixed, + st.free_col_singletons, + st.forcing_rows, + st.dominated_cols, + st.tightened_bounds, + ); + } + let red = solve_qp_ipm(&ps.reduced, &qp_opts, backend); + ps.postsolve(&red) + } + PresolveOutcome::Infeasible => trivial(QpStatus::PrimalInfeasible), + PresolveOutcome::Unbounded => trivial(QpStatus::DualInfeasible), + } + } else { + solve_qp_ipm(&qp, &qp_opts, backend) + }; + let elapsed = t0.elapsed().as_secs_f64(); + + // Report the objective in the user's original sense, including the + // dropped constant term: f_user = sign * (½xᵀPx + cᵀx) + const. + let reported_obj = sign * sol.obj + obj_const; + + // AMPL `.sol` convention: 0 solved, 200–299 infeasible, 300–399 + // unbounded, 400–499 limit, 500–599 failure. + let (msg, ok, srn) = match sol.status { + QpStatus::Optimal => ("Optimal Solution Found.", true, 0), + QpStatus::PrimalInfeasible => ("Problem is primal infeasible.", false, 200), + QpStatus::DualInfeasible => ("Problem is unbounded (dual infeasible).", false, 300), + QpStatus::IterationLimit => ("Maximum iterations exceeded.", false, 400), + QpStatus::NumericalFailure => ("Numerical failure in KKT factorization.", false, 500), + }; + println!( + "POUNCE ({} IPM, pounce-convex): {msg} obj={reported_obj:.8} iters={} ({elapsed:.3}s)", + class.name(), + sol.iters, + ); + + // Recover per-constraint duals once (mapped from the QP multipliers back + // to per-`.nl`-constraint order); used by both the `.sol` and the JSON + // report. + let lambda = pounce_cli::qp_extract::recover_duals(prob, &con_map, &sol.y, &sol.z); + + // Write a `.sol` if requested: primal x and recovered constraint duals in + // the AMPL `.sol` convention. + if let Some(path) = sol_path { + let payload = nl_writer::SolutionFile { + message: &format!("POUNCE {} IPM (pounce-convex): {msg}", class.name()), + x: &sol.x, + lambda: &lambda, + solve_result_num: srn, + suffixes: &[], + }; + if let Err(e) = nl_writer::write_sol_file(path, &payload) { + eprintln!("pounce: failed to write {}: {e}", path.display()); + return ExitCode::from(2); + } + } + + // Emit the JSON solve report, when requested — same `pounce.solve-report/v1` + // schema as the NLP path, so the benchmark harness can compare QP and NLP + // solves uniformly. (Per-iteration history is NLP-only for now; the convex + // driver does not yet feed the iterate trace, so `iterations` stays empty + // even at Full detail.) + if let Some((json_path, detail, input)) = json_cfg { + let mut builder = ReportBuilder::new(detail, input); + builder.problem.n_variables = qp.n as _; + builder.problem.n_constraints = lambda.len() as _; + builder.problem.n_objectives = 1; + builder.problem.minimize = prob.minimize; + builder.solution.status = qp_status_to_ars(sol.status); + builder.solution.solve_result_num = srn; + builder.solution.objective = reported_obj; + builder.solution.x = sol.x.clone(); + builder.solution.lambda = lambda.clone(); + builder.stats.iteration_count = sol.iters as _; + builder.stats.final_objective = reported_obj; + builder.stats.total_wallclock_time_secs = elapsed; + // Real final KKT residuals (from pounce-convex), so the harness sees + // genuine convergence numbers rather than zeros. + let res = sol.kkt_residuals(&qp); + builder.stats.final_constr_viol = res.primal_infeasibility; + builder.stats.final_dual_inf = res.dual_infeasibility; + builder.stats.final_compl = res.complementarity; + builder.stats.final_kkt_error = res.kkt_error(); + // Per-iteration convergence trace at Full detail (the convex IPM's + // iterate records map onto the report's IterRecord schema, shared with + // the NLP path so the harness reads one format). + if matches!(detail, ReportDetail::Full) { + builder.iterations = sol + .iterates + .iter() + .map(|it| IterRecord { + iter: it.iter as _, + objective: it.objective, + inf_pr: it.primal_infeasibility, + inf_du: it.dual_infeasibility, + mu: it.mu, + alpha_primal: it.alpha_primal, + alpha_dual: it.alpha_dual, + ..IterRecord::default() + }) + .collect(); + } + let report = builder.finish(); + if let Err(e) = write_report_file(json_path, &report) { + eprintln!( + "pounce: failed to write JSON report to {}: {e}", + json_path.display() + ); + } else { + eprintln!("pounce: wrote {}", json_path.display()); + } + } + + if ok { + ExitCode::SUCCESS + } else { + ExitCode::from(1) + } +} + /// Translate the CLI's `--dump …` flags into a live `DiagnosticsState`. /// Returns `Ok(None)` when no `--dump ` was given (the `--dump-dir` /// / `--dump-format` flags alone don't activate dumping). diff --git a/crates/pounce-cli/src/print.rs b/crates/pounce-cli/src/print.rs index c3410f57..1b2e20d3 100644 --- a/crates/pounce-cli/src/print.rs +++ b/crates/pounce-cli/src/print.rs @@ -220,6 +220,7 @@ pub fn logo_rows(color: bool) -> Vec { r as f64 / (rows - 1) as f64 } }; + // Molten color for a claw cell at row `r` (0 = top, hottest). let molten = |r: usize| { let t = vfrac(r); if t < 0.5 { @@ -288,7 +289,8 @@ pub fn print_banner(linear_solver: &str) { let rule = "*".repeat(BANNER_WIDTH); println!("{rule}"); - println!("This program contains POUNCE, a Rust port of Ipopt for nonlinear optimization."); + println!("This program contains POUNCE, a pure-Rust interior-point optimization solver"); + println!("for nonlinear, conic, and global problems (its NLP core is ported from Ipopt)."); println!("Released under the Eclipse Public License (EPL) — drop-in compatible with Ipopt."); println!(" For more information visit {link}"); println!("{rule}"); diff --git a/crates/pounce-cli/src/qp_extract.rs b/crates/pounce-cli/src/qp_extract.rs new file mode 100644 index 00000000..d00bcdca --- /dev/null +++ b/crates/pounce-cli/src/qp_extract.rs @@ -0,0 +1,538 @@ +//! Extract a `pounce_convex::QpProblem` (standard form) from a parsed +//! `.nl` problem, for the LP/QP dispatch path (Phase 2). +//! +//! The classifier (`crate::dispatch`) has already decided the problem is +//! an LP or convex QP; this module marshals the parsed `NlProblem` into +//! the standard form the convex IPM consumes: +//! +//! ```text +//! minimize ½ xᵀP x + cᵀx +//! subject to A x = b (equalities) +//! G x ≤ h (inequalities, incl. finite var bounds) +//! ``` +//! +//! Mapping from the `.nl` representation: +//! - **Objective.** `P` is the Hessian of the (degree-≤2) objective — +//! recovered with the same `analyze_quadratic` the classifier uses, so +//! `P` here is exactly the matrix whose definiteness was tested. `c` +//! is the objective's linear part. A `maximize` objective is negated +//! into a minimization. +//! - **Constraints.** Each row has a linear part and bounds `g_l ≤ row ≤ +//! g_u`. An equality (`g_l == g_u`) becomes a row of `A`; a one- or +//! two-sided inequality becomes one or two rows of `G` (`row ≤ g_u` +//! and/or `−row ≤ −g_l`). +//! - **Variable bounds.** Finite `x_l`/`x_u` become `G` rows +//! (`−x_i ≤ −x_l`, `x_i ≤ x_u`); the `.nl` "infinity" sentinel +//! (`|v| ≥ 1e19`) is treated as no bound. + +use crate::dispatch::analyze_quadratic_full; +use crate::nl_reader::NlProblem; +use pounce_convex::{QpProblem, Triplet}; + +/// The `.nl` infinity sentinel: AMPL writes ±1e20-ish for "no bound"; +/// upstream Ipopt treats anything with magnitude ≥ 1e19 as infinite. +const NL_INF: f64 = 1e19; + +fn is_finite_bound(v: f64) -> bool { + v.abs() < NL_INF +} + +/// Convert a classified LP/convex-QP `NlProblem` into `QpProblem` +/// standard form. Returns `None` if the objective is not actually a +/// degree-≤2 polynomial (should not happen for a problem the classifier +/// routed here, but the conversion is total and falls back gracefully). +pub fn extract_qp(prob: &NlProblem) -> Option { + Some(extract_qp_with_map(prob)?.0) // drops con_map + reporting constant +} + +/// Where each `.nl` constraint's rows landed in the standard-form QP, so +/// the QP's multipliers can be mapped back to a per-`.nl`-constraint +/// dual for the `.sol`. One entry per original constraint, in order. +#[derive(Debug, Clone)] +pub enum ConRowMap { + /// Equality constraint → row `a_row` of `A` (multiplier `y[a_row]`). + Eq { a_row: usize }, + /// Inequality / range constraint → up to two rows of `G`: the + /// `row ≤ g_u` upper bound and/or the `−row ≤ −g_l` lower bound + /// (multipliers `z[..]`, each ≥ 0). + Ineq { + upper: Option, + lower: Option, + }, +} + +/// Extract the QP, the constraint→row provenance map, and the objective +/// constant folded into the nonlinear tree (see below), together. +/// +/// The third return value is the **degree-0 term of the nonlinear +/// objective** (e.g. the `+9` of `(x₀−3)²` that AMPL/Pyomo emit inside the +/// nonlinear tree rather than in `NlProblem::obj_constant`). The QP itself +/// ignores it — it does not move the minimizer — but the caller must add +/// it to the *reported* objective so the convex solve agrees with the NLP +/// path. It is returned in the problem's natural (user) sense, *not* +/// multiplied by the maximize/minimize `sign`. +pub fn extract_qp_with_map(prob: &NlProblem) -> Option<(QpProblem, Vec, f64)> { + let n = prob.n; + let sign = if prob.minimize { 1.0 } else { -1.0 }; + + // --- objective Hessian P (lower triangle) + nonlinear-tree linear part + // + nonlinear-tree constant (degree-0 term, for reporting only) --- + let (hess, obj_nl_linear, obj_nl_constant) = analyze_quadratic_full(&prob.obj_nonlinear, n)?; + let mut p_lower: Vec = Vec::with_capacity(hess.len()); + for ((i, j), v) in &hess { + // analyze_quadratic returns (i ≤ j) upper-ish keys; store as + // lower triangle (row ≥ col) for the solver. + let (row, col) = if i >= j { (*i, *j) } else { (*j, *i) }; + p_lower.push(Triplet::new(row, col, sign * v)); + } + + // --- objective linear term c --- + // Two disjoint sources, exactly as the NLP path's eval_f sums them: + // the `.nl` linear section (`obj_linear`) and the degree-1 terms AMPL + // kept inside the nonlinear objective tree (e.g. the `−6·x₀` of + // `(x₀−3)²`). Dropping the latter silently solves the wrong objective. + let mut c = vec![0.0; n]; + for (var, coef) in &prob.obj_linear { + c[*var] += sign * coef; + } + for (var, coef) in &obj_nl_linear { + c[*var] += sign * coef; + } + + // --- constraints: equalities → A x = b, inequalities → G x ≤ h --- + let mut a: Vec = Vec::new(); + let mut b: Vec = Vec::new(); + let mut g: Vec = Vec::new(); + let mut h: Vec = Vec::new(); + let mut con_map: Vec = Vec::with_capacity(prob.con_linear.len()); + + for (row, lin) in prob.con_linear.iter().enumerate() { + let lo = prob.g_l[row]; + let hi = prob.g_u[row]; + if lo == hi && is_finite_bound(lo) { + // Equality row. + let eq_row = next_row(&b); + for (var, coef) in lin { + a.push(Triplet::new(eq_row, *var, *coef)); + } + b.push(lo); + con_map.push(ConRowMap::Eq { a_row: eq_row }); + } else { + // Upper bound: row ≤ hi. + let upper = if is_finite_bound(hi) { + let gr = next_row(&h); + for (var, coef) in lin { + g.push(Triplet::new(gr, *var, *coef)); + } + h.push(hi); + Some(gr) + } else { + None + }; + // Lower bound: row ≥ lo ⇔ −row ≤ −lo. + let lower = if is_finite_bound(lo) { + let gr = next_row(&h); + for (var, coef) in lin { + g.push(Triplet::new(gr, *var, -*coef)); + } + h.push(-lo); + Some(gr) + } else { + None + }; + con_map.push(ConRowMap::Ineq { upper, lower }); + } + } + + // --- variable bounds as G rows (not part of the constraint map) --- + for i in 0..n { + let xl = prob.x_l[i]; + let xu = prob.x_u[i]; + if is_finite_bound(xu) { + let gr = next_row(&h); + g.push(Triplet::new(gr, i, 1.0)); // x_i ≤ xu + h.push(xu); + } + if is_finite_bound(xl) { + let gr = next_row(&h); + g.push(Triplet::new(gr, i, -1.0)); // −x_i ≤ −xl + h.push(-xl); + } + } + + Some(( + QpProblem { + n, + p_lower, + c, + a, + b, + g, + h, + // Variable bounds are currently emitted as `G` rows (see the + // bound-handling above), so the explicit box is left empty. + lb: Vec::new(), + ub: Vec::new(), + }, + con_map, + obj_nl_constant, + )) +} + +/// Map the QP solver's multipliers `(y, z)` back to a per-`.nl`- +/// constraint dual vector (length `prob.m`), in the AMPL `.sol` +/// convention used by POUNCE's NLP path. +/// +/// The QP solver enforces stationarity `∇f + Aᵀy + Gᵀz = 0` with +/// `z ≥ 0`, where each inequality `.nl` row contributes a `row ≤ g_u` +/// (`+row`) and/or `−row ≤ −g_l` (`−row`) `G` row. The per-constraint +/// `.nl`/Ipopt multiplier `λ` is recovered as: +/// - equality: `λ = sign · y[a_row]`; +/// - inequality: `λ = sign · (z_upper − z_lower)` — at most one of the +/// two bound rows is active at a solution. +/// +/// The inequality sign (`z_upper − z_lower`, *not* `z_lower − z_upper`) +/// is fixed to match POUNCE's NLP path, which is the reference for what +/// a POUNCE `.sol` carries; this is verified empirically against the NLP +/// solve in the crate tests. `sign` undoes the maximize→minimize +/// negation so the reported dual is in the user's original sense. +pub fn recover_duals(prob: &NlProblem, con_map: &[ConRowMap], y: &[f64], z: &[f64]) -> Vec { + let sign = if prob.minimize { 1.0 } else { -1.0 }; + con_map + .iter() + .map(|m| match m { + ConRowMap::Eq { a_row } => sign * y[*a_row], + ConRowMap::Ineq { upper, lower } => { + let zu = upper.map(|r| z[r]).unwrap_or(0.0); + let zl = lower.map(|r| z[r]).unwrap_or(0.0); + sign * (zu - zl) + } + }) + .collect() +} + +/// The next 0-based row index for a constraint block keyed by its RHS +/// vector's current length. +fn next_row(rhs: &[f64]) -> usize { + rhs.len() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::nl_reader::{BinOp, Expr}; + use pounce_convex::{solve_qp_ipm, QpOptions, QpStatus}; + use pounce_feral::FeralSolverInterface; + use pounce_linsol::SparseSymLinearSolverInterface; + + fn backend() -> Box { + Box::new(FeralSolverInterface::new()) + } + + fn pow2(var: usize) -> Expr { + Expr::Binary( + BinOp::Pow, + Box::new(Expr::Var(var)), + Box::new(Expr::Const(2.0)), + ) + } + + /// min (x0)^2 + (x1)^2 s.t. x0 + x1 = 2, no var bounds → (1,1), f*=2. + #[test] + fn extract_and_solve_equality_qp() { + let prob = NlProblem { + n: 2, + m: 1, + num_obj: 1, + minimize: true, + obj_nonlinear: Expr::Binary(BinOp::Add, Box::new(pow2(0)), Box::new(pow2(1))), + obj_linear: vec![], + obj_constant: 0.0, + con_nonlinear: vec![Expr::Const(0.0)], + con_linear: vec![vec![(0, 1.0), (1, 1.0)]], + x_l: vec![-2e19, -2e19], + x_u: vec![2e19, 2e19], + g_l: vec![2.0], + g_u: vec![2.0], + x0: vec![0.0, 0.0], + lambda0: vec![0.0], + suffixes: Default::default(), + imported_funcs: Vec::new(), + var_names: Vec::new(), + con_names: Vec::new(), + }; + let (qp, con_map, obj_const) = extract_qp_with_map(&prob).expect("extract"); + // No constant anywhere in this objective. + assert_eq!(obj_const, 0.0); + // P = 2I → two diagonal entries. + assert_eq!(qp.p_lower.len(), 2); + assert_eq!(qp.m_eq(), 1); + assert_eq!(qp.m_ineq(), 0); + + let sol = solve_qp_ipm(&qp, &QpOptions::default(), backend); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 1.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!((sol.x[1] - 1.0).abs() < 1e-6, "x1={}", sol.x[1]); + assert!((sol.obj - 2.0).abs() < 1e-6, "obj={}", sol.obj); + + // KKT for the equality: ∇f + y·∇g = 0 → 2x_i + y = 0 at x=1 → y=−2. + let lambda = recover_duals(&prob, &con_map, &sol.y, &sol.z); + assert_eq!(lambda.len(), 1); + assert!( + (lambda[0] - (-2.0)).abs() < 1e-5, + "equality dual={}", + lambda[0] + ); + } + + /// Regression for the dropped-linear-term bug: the objective `(x0-3)²` + /// lives entirely in the nonlinear tree, so its linear part (`−6·x0`) + /// must be folded into `c`. Without it the solve minimizes `x0²` + /// (optimum 0) instead of `(x0-3)²` (optimum 3). + #[test] + fn extract_keeps_linear_term_from_nonlinear_tree() { + // (x0 - 3)^2 = x0^2 - 6 x0 + 9, all in obj_nonlinear. + let obj = Expr::Binary( + BinOp::Pow, + Box::new(Expr::Binary( + BinOp::Sub, + Box::new(Expr::Var(0)), + Box::new(Expr::Const(3.0)), + )), + Box::new(Expr::Const(2.0)), + ); + let prob = NlProblem { + n: 1, + m: 0, + num_obj: 1, + minimize: true, + obj_nonlinear: obj, + obj_linear: vec![], + obj_constant: 0.0, + con_nonlinear: vec![], + con_linear: vec![], + x_l: vec![-2e19], + x_u: vec![2e19], + g_l: vec![], + g_u: vec![], + x0: vec![0.0], + lambda0: vec![], + suffixes: Default::default(), + imported_funcs: Vec::new(), + var_names: Vec::new(), + con_names: Vec::new(), + }; + let qp = extract_qp(&prob).expect("extract"); + assert_eq!(qp.c.len(), 1); + assert!( + (qp.c[0] - (-6.0)).abs() < 1e-12, + "c[0]={} — linear term from the nonlinear tree was dropped", + qp.c[0] + ); + // P = 2 (one diagonal entry). + assert_eq!(qp.p_lower.len(), 1); + + let sol = solve_qp_ipm(&qp, &QpOptions::default(), backend); + assert_eq!(sol.status, QpStatus::Optimal); + assert!( + (sol.x[0] - 3.0).abs() < 1e-6, + "x0={} (expected 3)", + sol.x[0] + ); + } + + /// Inequality dual sign/magnitude. min x0² s.t. x0 ≥ 1 (a one-sided + /// inequality g_l=1, g_u=+inf). Optimum x0=1, active. The expected + /// dual −2.0 is the value POUNCE's *NLP* path writes for this exact + /// problem (verified by running `solver_selection=nlp` on the same + /// `.nl`); recover_duals must match that reference convention. + #[test] + fn inequality_dual_recovered() { + let prob = NlProblem { + n: 1, + m: 1, + num_obj: 1, + minimize: true, + obj_nonlinear: pow2(0), + obj_linear: vec![], + obj_constant: 0.0, + con_nonlinear: vec![Expr::Const(0.0)], + con_linear: vec![vec![(0, 1.0)]], // g(x) = x0 + x_l: vec![-2e19], + x_u: vec![2e19], + g_l: vec![1.0], // x0 ≥ 1 + g_u: vec![2e19], + x0: vec![0.0], + lambda0: vec![0.0], + suffixes: Default::default(), + imported_funcs: Vec::new(), + var_names: Vec::new(), + con_names: Vec::new(), + }; + let (qp, con_map, obj_const) = extract_qp_with_map(&prob).expect("extract"); + // This model puts its constant in the `obj_constant` field, not the + // nonlinear tree, so the tree constant is 0 here. + assert_eq!(obj_const, 0.0); + // One inequality row (the lower bound row −x0 ≤ −1); no upper. + assert_eq!(qp.m_ineq(), 1); + let sol = solve_qp_ipm(&qp, &QpOptions::default(), backend); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 1.0).abs() < 1e-6, "x0={}", sol.x[0]); + let lambda = recover_duals(&prob, &con_map, &sol.y, &sol.z); + assert!((lambda[0] - (-2.0)).abs() < 1e-5, "ineq dual={}", lambda[0]); + } + + /// Regression: a constant folded into the *nonlinear objective tree* + /// (not the `obj_constant` field) must still reach the reported + /// objective. This is the real `.nl` shape AMPL/Pyomo emit for + /// `min (x0-3)^2` — the whole `x0^2 - 6 x0 + 9` lives in the nonlinear + /// tree and `obj_constant == 0`. The convex path used to drop the `+9` + /// and report an objective 9 too small (cf. HS35 in the benchmark + /// comparison). The minimizer is x0 = 1 (upper bound binds), where the + /// true objective is (1-3)^2 = 4. + #[test] + fn tree_embedded_objective_constant_is_recovered() { + // (x0 - 3)^2 as a single nonlinear tree: Pow(Sub(x0, 3), 2). + let obj = Expr::Binary( + BinOp::Pow, + Box::new(Expr::Binary( + BinOp::Sub, + Box::new(Expr::Var(0)), + Box::new(Expr::Const(3.0)), + )), + Box::new(Expr::Const(2.0)), + ); + let prob = NlProblem { + n: 1, + m: 0, + num_obj: 1, + minimize: true, + obj_nonlinear: obj, + obj_linear: vec![], + obj_constant: 0.0, // the +9 is in the TREE, not here + con_nonlinear: vec![], + con_linear: vec![], + x_l: vec![0.0], + x_u: vec![1.0], + g_l: vec![], + g_u: vec![], + x0: vec![0.0], + lambda0: vec![], + suffixes: Default::default(), + imported_funcs: Vec::new(), + var_names: Vec::new(), + con_names: Vec::new(), + }; + let (qp, _con_map, obj_const) = extract_qp_with_map(&prob).expect("extract"); + // The degree-0 term of (x0-3)^2 is +9, recovered from the tree. + assert!((obj_const - 9.0).abs() < 1e-12, "tree constant={obj_const}"); + let sol = solve_qp_ipm(&qp, &QpOptions::default(), backend); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 1.0).abs() < 1e-6, "x0={}", sol.x[0]); + // Reported objective = (½xᵀPx + cᵀx) + obj_const must equal the true + // (1-3)^2 = 4, not the constant-dropped −5. + let reported = sol.obj + obj_const; + assert!((reported - 4.0).abs() < 1e-5, "reported obj={reported}"); + } + + /// Bound-constrained: min (x0-3)^2 = x0^2 - 6 x0 + 9, 0 ≤ x0 ≤ 1. + /// Optimum x0 = 1 (upper bound binds). Here the constant 9 is carried + /// in the `obj_constant` field (not the tree), so the extracted tree + /// constant is 0 (asserted inside). + #[test] + fn extract_and_solve_bounded_qp() { + let prob = NlProblem { + n: 1, + m: 0, + num_obj: 1, + minimize: true, + obj_nonlinear: pow2(0), + obj_linear: vec![(0, -6.0)], + obj_constant: 9.0, + con_nonlinear: vec![], + con_linear: vec![], + x_l: vec![0.0], + x_u: vec![1.0], + g_l: vec![], + g_u: vec![], + x0: vec![0.0], + lambda0: vec![], + suffixes: Default::default(), + imported_funcs: Vec::new(), + var_names: Vec::new(), + con_names: Vec::new(), + }; + let qp = extract_qp(&prob).expect("extract"); + // Two var-bound rows (x0 ≤ 1, −x0 ≤ 0). + assert_eq!(qp.m_ineq(), 2); + let sol = solve_qp_ipm(&qp, &QpOptions::default(), backend); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 1.0).abs() < 1e-6, "x0={}", sol.x[0]); + } + + /// LP: min −x0 − x1, 0 ≤ x ≤ 1 → (1,1). + #[test] + fn extract_and_solve_lp() { + let prob = NlProblem { + n: 2, + m: 0, + num_obj: 1, + minimize: true, + obj_nonlinear: Expr::Const(0.0), + obj_linear: vec![(0, -1.0), (1, -1.0)], + obj_constant: 0.0, + con_nonlinear: vec![], + con_linear: vec![], + x_l: vec![0.0, 0.0], + x_u: vec![1.0, 1.0], + g_l: vec![], + g_u: vec![], + x0: vec![0.0, 0.0], + lambda0: vec![], + suffixes: Default::default(), + imported_funcs: Vec::new(), + var_names: Vec::new(), + con_names: Vec::new(), + }; + let qp = extract_qp(&prob).expect("extract"); + assert!(qp.p_lower.is_empty(), "LP has no Hessian"); + assert_eq!(qp.m_ineq(), 4); // 2 vars × (upper + lower) + let sol = solve_qp_ipm(&qp, &QpOptions::default(), backend); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 1.0).abs() < 1e-6); + assert!((sol.x[1] - 1.0).abs() < 1e-6); + } + + /// maximize x0 s.t. 0 ≤ x0 ≤ 5 → x0 = 5. Tests sign flip on a + /// maximize objective. + #[test] + fn extract_maximize_negates() { + let prob = NlProblem { + n: 1, + m: 0, + num_obj: 1, + minimize: false, + obj_nonlinear: Expr::Const(0.0), + obj_linear: vec![(0, 1.0)], + obj_constant: 0.0, + con_nonlinear: vec![], + con_linear: vec![], + x_l: vec![0.0], + x_u: vec![5.0], + g_l: vec![], + g_u: vec![], + x0: vec![0.0], + lambda0: vec![], + suffixes: Default::default(), + imported_funcs: Vec::new(), + var_names: Vec::new(), + con_names: Vec::new(), + }; + let qp = extract_qp(&prob).expect("extract"); + // minimize −x0. + assert_eq!(qp.c[0], -1.0); + let sol = solve_qp_ipm(&qp, &QpOptions::default(), backend); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 5.0).abs() < 1e-6, "x0={}", sol.x[0]); + } +} diff --git a/crates/pounce-cli/tests/cblib_cbf.rs b/crates/pounce-cli/tests/cblib_cbf.rs new file mode 100644 index 00000000..720c5074 --- /dev/null +++ b/crates/pounce-cli/tests/cblib_cbf.rs @@ -0,0 +1,80 @@ +//! CBLIB exponential-cone benchmark tier: parse real `.cbf` instances from +//! the Conic Benchmark Library, map them to a pounce conic program, and solve +//! them through the non-symmetric (exp-cone) HSDE driver. +//! +//! These are the literal geometric-program instances from the source papers +//! (Demberg `demb761`, Beck `beck751`, Fang `fang88`), the gold-standard +//! broad validation called for in `dev-notes/hsde.md`. Published reference +//! objectives are unavailable (the CBLIB solution files 404), so correctness +//! is cross-checked against an independent smooth NLP in `cblib_vs_nlp.rs`; +//! this file checks that the parse → map → solve pipeline reaches a verified +//! optimum on each instance. + +use pounce_cli::cbf; +use pounce_convex::{solve_socp_ipm, QpOptions, QpStatus}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +/// Parse, map, and solve a CBLIB instance; return `(status, cbf_objective)`. +fn solve_instance(text: &str) -> (QpStatus, f64) { + let model = cbf::parse(text).expect("parse CBF"); + let cp = model.to_conic().expect("map to conic"); + let opts = QpOptions { + max_iter: 500, + ..QpOptions::default() + }; + let sol = solve_socp_ipm(&cp.prob, &cp.cones, &opts, backend); + let obj = cp.cbf_objective(sol.obj, model.minimize); + (sol.status, obj) +} + +const DEMB761: &str = include_str!("data/cblib/demb761.cbf"); +const BECK751: &str = include_str!("data/cblib/beck751.cbf"); +const FANG88: &str = include_str!("data/cblib/fang88.cbf"); +const POW3: &str = include_str!("data/cblib/pow3_synthetic.cbf"); +const SDP: &str = include_str!("data/cblib/sdp_synthetic.cbf"); + +#[test] +fn demb761_solves_to_optimum() { + let (status, obj) = solve_instance(DEMB761); + assert_eq!(status, QpStatus::Optimal, "demb761 status"); + assert!(obj.is_finite(), "demb761 objective finite: {obj}"); +} + +#[test] +fn beck751_solves_to_optimum() { + let (status, obj) = solve_instance(BECK751); + assert_eq!(status, QpStatus::Optimal, "beck751 status"); + assert!(obj.is_finite(), "beck751 objective finite: {obj}"); +} + +#[test] +fn fang88_solves_to_optimum() { + let (status, obj) = solve_instance(FANG88); + assert_eq!(status, QpStatus::Optimal, "fang88 status"); + assert!(obj.is_finite(), "fang88 objective finite: {obj}"); +} + +#[test] +fn power_cone_synthetic_hits_known_optimum() { + // max x2 s.t. (x0,x1,x2) ∈ POW(α=½), x0=2, x1=½ → x2 = 2^½·½^½ = 1. + // Validates the POWCONES parse, the α = α₀/(α₀+α₁) resolution, and the + // CBF→pounce power-cone permutation end to end. + let (status, obj) = solve_instance(POW3); + assert_eq!(status, QpStatus::Optimal, "pow3 status"); + assert!((obj - 1.0).abs() < 1e-6, "pow3 objective {obj} vs 1"); +} + +#[test] +fn sdp_psdcon_synthetic_hits_known_optimum() { + // max λ s.t. (M − λI) ⪰ 0, M = diag(2,5) → λ = λ_min(M) = 2. + // Validates the PSDCON / HCOORD / DCOORD reader (affine PSD constraint → + // a pounce Psd cone with √2-scaled svec rows) end to end. + let (status, obj) = solve_instance(SDP); + assert_eq!(status, QpStatus::Optimal, "sdp status"); + assert!((obj - 2.0).abs() < 1e-5, "sdp objective {obj} vs 2"); +} diff --git a/crates/pounce-cli/tests/cblib_vs_nlp.rs b/crates/pounce-cli/tests/cblib_vs_nlp.rs new file mode 100644 index 00000000..68d4be74 --- /dev/null +++ b/crates/pounce-cli/tests/cblib_vs_nlp.rs @@ -0,0 +1,488 @@ +//! CBLIB cross-check: solve each exponential-cone instance **twice** — +//! once as a conic program through the non-symmetric HSDE driver, once as a +//! smooth NLP through POUNCE's filter-IPM — and assert the two independent +//! solvers agree on the objective. +//! +//! The smooth NLP reuses the CBF variables: each `VAR EXP` triple +//! `(u₀, u₁, u₂)` (CBF order: `u₀ ≥ u₁·exp(u₂/u₁)`) becomes the constraint +//! `g = u₀ − u₁·exp(u₂/u₁) ≥ 0` with `u₁ ≥ 0`, supplied with its exact +//! gradient and Hessian; the `L=` / `L-` constraint rows stay linear. Because +//! the conic and NLP paths share no code, agreement is strong evidence the +//! exp-cone benchmark pipeline (parse → map → solve) is correct — the +//! validation strategy from `dev-notes/hsde.md`. + +use pounce_algorithm::application::IpoptApplication; +use pounce_cli::cbf::{self, CbfModel, ConeKind}; +use pounce_common::types::{Index, Number}; +use pounce_convex::{solve_socp_ipm, QpOptions, QpStatus}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; +use pounce_nlp::return_codes::ApplicationReturnStatus; +use pounce_nlp::tnlp::{ + BoundsInfo, IndexStyle, IpoptCq, IpoptData, NlpInfo, Solution, SparsityRequest, StartingPoint, + TNLP, +}; +use std::cell::RefCell; +use std::rc::Rc; + +const INF: f64 = 1e20; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +/// A CBF power cone in smooth-NLP form: `|x_bnd| ≤ u₀^α · u₁^{1−α}`, +/// `u₀,u₁ ≥ 0`, modeled as the two constraints `φ ∓ x_bnd ≥ 0` with +/// `φ = u₀^α u₁^{1−α}`. +#[derive(Clone, Copy)] +struct PowCon { + u0: usize, + u1: usize, + bnd: usize, + alpha: f64, +} + +/// The smooth-NLP form of a CBF instance (VAR exp / power cones). +struct CbfNlp { + n: usize, + lb: Vec, + ub: Vec, + x0: Vec, + c: Vec, + /// Linear constraint rows (`(col, coeff)` pairs) with their bounds. + lin_rows: Vec>, + lin_gl: Vec, + lin_gu: Vec, + /// Each exp constraint's variable triple `(u₀, u₁, u₂)` in CBF order. + exp: Vec<[usize; 3]>, + /// Power cones (each → two NLP constraints `φ ∓ x_bnd ≥ 0`). + pow: Vec, + captured_obj: RefCell>, +} + +impl CbfNlp { + /// Build from a parsed model. Errors (as a panic in this test harness) if + /// the instance uses constraint-side exp/SOC cones, which this smooth + /// form does not cover (the CBLIB GP instances put all exp cones on + /// variables). + fn from_model(m: &CbfModel) -> CbfNlp { + let n = m.num_var; + let mut lb = vec![-INF; n]; + let mut ub = vec![INF; n]; + let mut exp = Vec::new(); + let mut pow = Vec::new(); + + // Variable cones → bounds and exp/power constraints. + let mut v = 0usize; + for cone in &m.var_cones { + match cone.kind { + ConeKind::Free => {} + ConeKind::Nonneg => { + for j in 0..cone.dim { + lb[v + j] = 0.0; + } + } + ConeKind::Nonpos => { + for j in 0..cone.dim { + ub[v + j] = 0.0; + } + } + ConeKind::Zero => { + for j in 0..cone.dim { + lb[v + j] = 0.0; + ub[v + j] = 0.0; + } + } + ConeKind::Exp => { + // u₁ (the middle) must be ≥ 0 for the cone domain. + lb[v + 1] = 0.0; + exp.push([v, v + 1, v + 2]); + } + ConeKind::Pow => { + // CBF (x₀,x₁,x₂): x₀^β₀ x₁^β₁ ≥ |x₂|, x₀,x₁ ≥ 0. + lb[v] = 0.0; + lb[v + 1] = 0.0; + pow.push(PowCon { + u0: v, + u1: v + 1, + bnd: v + 2, + alpha: cone.alpha.expect("POW cone has α"), + }); + } + ConeKind::SecondOrder => panic!("SOC var cone not supported in NLP cross-check"), + } + v += cone.dim; + } + + // Constraint cones → linear rows with bounds (Ax + b ∈ K ⇒ bounds on + // Ax). All CBLIB GP constraint cones are L= / L- / L+. + let a_rows = { + let mut rows = vec![Vec::new(); m.num_con]; + for &(r, col, val) in &m.a { + rows[r].push((col, val)); + } + rows + }; + let mut lin_rows = Vec::new(); + let mut lin_gl = Vec::new(); + let mut lin_gu = Vec::new(); + let mut r = 0usize; + for cone in &m.con_cones { + for i in 0..cone.dim { + let row = r + i; + let (gl, gu) = match cone.kind { + ConeKind::Zero => (-m.b[row], -m.b[row]), // Ax = −b + ConeKind::Nonpos => (-INF, -m.b[row]), // Ax ≤ −b + ConeKind::Nonneg => (-m.b[row], INF), // Ax ≥ −b + other => panic!("CON cone {other:?} not supported in NLP cross-check"), + }; + lin_rows.push(a_rows[row].clone()); + lin_gl.push(gl); + lin_gu.push(gu); + } + r += cone.dim; + } + + // Start: exp middles and power base vars at 1 (a generic interior of + // the cone domain), everything else at 0 — independent of the conic + // solution. + let mut x0 = vec![0.0; n]; + for t in &exp { + x0[t[1]] = 1.0; + } + for p in &pow { + x0[p.u0] = 1.0; + x0[p.u1] = 1.0; + } + // Respect fixed (Zero) variables. + for j in 0..n { + if lb[j] == ub[j] { + x0[j] = lb[j]; + } + } + + CbfNlp { + n, + lb, + ub, + x0, + c: m.c.clone(), + lin_rows, + lin_gl, + lin_gu, + exp, + pow, + captured_obj: RefCell::new(None), + } + } + + fn n_lin(&self) -> usize { + self.lin_rows.len() + } + + /// Number of NLP constraints contributed by power cones (two each). + fn n_pow_con(&self) -> usize { + 2 * self.pow.len() + } +} + +/// Evaluate one power cone: `φ = u₀^α · u₁^{1−α}` and `∂φ/∂u₀`, `∂φ/∂u₁`. +fn pow_pieces(x: &[f64], p: &PowCon) -> (f64, f64, f64) { + let u0 = x[p.u0].max(1e-12); + let u1 = x[p.u1].max(1e-12); + let phi = u0.powf(p.alpha) * u1.powf(1.0 - p.alpha); + (phi, p.alpha * phi / u0, (1.0 - p.alpha) * phi / u1) +} + +/// Evaluate one exp constraint `g = u₀ − u₁·exp(u₂/u₁)` and its pieces. +/// Returns `(g, E, r)` with `E = exp(u₂/u₁)`, `r = u₂/u₁`. +fn exp_pieces(x: &[f64], t: &[usize; 3]) -> (f64, f64, f64) { + let (u0, u1, u2) = (x[t[0]], x[t[1]], x[t[2]]); + let u1 = u1.max(1e-12); // guard the domain during the line search + let r = u2 / u1; + let e = r.exp(); + (u0 - u1 * e, e, r) +} + +impl TNLP for CbfNlp { + fn get_nlp_info(&mut self) -> Option { + // Jacobian: linear entries + 3 per exp + 6 per power cone (3 for each + // of the two `φ ∓ x_bnd` constraints). Hessian: 3 per exp + 3 per + // power cone (the φ curvature over (u₀,u₁)). + let nnz_jac: usize = self.lin_rows.iter().map(|r| r.len()).sum::() + + 3 * self.exp.len() + + 6 * self.pow.len(); + Some(NlpInfo { + n: self.n as Index, + m: (self.n_lin() + self.exp.len() + self.n_pow_con()) as Index, + nnz_jac_g: nnz_jac as Index, + nnz_h_lag: (3 * self.exp.len() + 3 * self.pow.len()) as Index, + index_style: IndexStyle::C, + }) + } + + fn get_bounds_info(&mut self, b: BoundsInfo<'_>) -> bool { + b.x_l.copy_from_slice(&self.lb); + b.x_u.copy_from_slice(&self.ub); + let nl = self.n_lin(); + for i in 0..nl { + b.g_l[i] = self.lin_gl[i]; + b.g_u[i] = self.lin_gu[i]; + } + // Exp and power constraints: g ≥ 0. + let n_nonlin = self.exp.len() + self.n_pow_con(); + for k in 0..n_nonlin { + b.g_l[nl + k] = 0.0; + b.g_u[nl + k] = INF; + } + true + } + + fn get_starting_point(&mut self, sp: StartingPoint<'_>) -> bool { + sp.x.copy_from_slice(&self.x0); + true + } + + fn eval_f(&mut self, x: &[Number], _new_x: bool) -> Option { + Some(self.c.iter().zip(x).map(|(&ci, &xi)| ci * xi).sum()) + } + + fn eval_grad_f(&mut self, _x: &[Number], _new_x: bool, grad: &mut [Number]) -> bool { + grad.copy_from_slice(&self.c); + true + } + + fn eval_g(&mut self, x: &[Number], _new_x: bool, g: &mut [Number]) -> bool { + let nl = self.n_lin(); + for (i, row) in self.lin_rows.iter().enumerate() { + g[i] = row.iter().map(|&(c, val)| val * x[c]).sum(); + } + for (k, t) in self.exp.iter().enumerate() { + g[nl + k] = exp_pieces(x, t).0; + } + // Power cones: two constraints each, φ − x_bnd ≥ 0 and φ + x_bnd ≥ 0. + let pbase = nl + self.exp.len(); + for (k, p) in self.pow.iter().enumerate() { + let (phi, _, _) = pow_pieces(x, p); + g[pbase + 2 * k] = phi - x[p.bnd]; + g[pbase + 2 * k + 1] = phi + x[p.bnd]; + } + true + } + + fn eval_jac_g( + &mut self, + x: Option<&[Number]>, + _new_x: bool, + mode: SparsityRequest<'_>, + ) -> bool { + let nl = self.n_lin(); + match mode { + SparsityRequest::Structure { irow, jcol } => { + let mut k = 0; + for (r, row) in self.lin_rows.iter().enumerate() { + for &(c, _) in row { + irow[k] = r as Index; + jcol[k] = c as Index; + k += 1; + } + } + for (e, t) in self.exp.iter().enumerate() { + for &col in t { + irow[k] = (nl + e) as Index; + jcol[k] = col as Index; + k += 1; + } + } + // Power cones: each contributes rows `g₊` then `g₋`, both with + // nonzeros at (u₀, u₁, bnd). + let pbase = nl + self.exp.len(); + for (e, p) in self.pow.iter().enumerate() { + for sign in 0..2 { + let row = (pbase + 2 * e + sign) as Index; + for &col in &[p.u0, p.u1, p.bnd] { + irow[k] = row; + jcol[k] = col as Index; + k += 1; + } + } + } + } + SparsityRequest::Values { values } => { + let x = x.expect("jac needs x"); + let mut k = 0; + for row in &self.lin_rows { + for &(_, val) in row { + values[k] = val; + k += 1; + } + } + for t in &self.exp { + let (_, e, r) = exp_pieces(x, t); + values[k] = 1.0; // ∂g/∂u₀ + values[k + 1] = e * (r - 1.0); // ∂g/∂u₁ + values[k + 2] = -e; // ∂g/∂u₂ + k += 3; + } + for p in &self.pow { + let (_, dphi0, dphi1) = pow_pieces(x, p); + // g₊ = φ − x_bnd: ∂/∂u₀, ∂/∂u₁, ∂/∂bnd = −1. + values[k] = dphi0; + values[k + 1] = dphi1; + values[k + 2] = -1.0; + // g₋ = φ + x_bnd: same φ grads, ∂/∂bnd = +1. + values[k + 3] = dphi0; + values[k + 4] = dphi1; + values[k + 5] = 1.0; + k += 6; + } + } + } + true + } + + fn eval_h( + &mut self, + x: Option<&[Number]>, + _new_x: bool, + _obj_factor: Number, + lambda: Option<&[Number]>, + _new_lambda: bool, + mode: SparsityRequest<'_>, + ) -> bool { + // Objective is linear and linear constraints have no Hessian, so only + // the exp and power constraints contribute. Exp: λ·∇²g over (u₁,u₂). + // Power: (λ₊+λ₋)·∇²φ over (u₀,u₁). + match mode { + SparsityRequest::Structure { irow, jcol } => { + let mut k = 0; + for t in &self.exp { + let (_, u1, u2) = (t[0], t[1], t[2]); + irow[k] = u1 as Index; + jcol[k] = u1 as Index; + irow[k + 1] = u2 as Index; + jcol[k + 1] = u1 as Index; + irow[k + 2] = u2 as Index; + jcol[k + 2] = u2 as Index; + k += 3; + } + for p in &self.pow { + // u₀ < u₁ (consecutive), so the cross term is row u₁, col u₀. + irow[k] = p.u0 as Index; + jcol[k] = p.u0 as Index; + irow[k + 1] = p.u1 as Index; + jcol[k + 1] = p.u0 as Index; + irow[k + 2] = p.u1 as Index; + jcol[k + 2] = p.u1 as Index; + k += 3; + } + } + SparsityRequest::Values { values } => { + let x = x.expect("hess needs x"); + let lambda = lambda.expect("hess needs lambda"); + let nl = self.n_lin(); + let mut k = 0; + for (e, t) in self.exp.iter().enumerate() { + let (_, ev, r) = exp_pieces(x, t); + let u1 = x[t[1]].max(1e-12); + let lam = lambda[nl + e]; + // ∇²g over (u₁,u₂): [[−E r²/u₁, E r/u₁],[E r/u₁, −E/u₁]]. + values[k] = lam * (-ev * r * r / u1); // (u₁,u₁) + values[k + 1] = lam * (ev * r / u1); // (u₂,u₁) + values[k + 2] = lam * (-ev / u1); // (u₂,u₂) + k += 3; + } + let pbase = nl + self.exp.len(); + for (e, p) in self.pow.iter().enumerate() { + let (phi, _, _) = pow_pieces(x, p); + let u0 = x[p.u0].max(1e-12); + let u1 = x[p.u1].max(1e-12); + let a = p.alpha; + // Both g₊ and g₋ share the Hessian ∇²φ (the ∓x_bnd term is + // linear), so the multipliers add. + let lam = lambda[pbase + 2 * e] + lambda[pbase + 2 * e + 1]; + values[k] = lam * (a * (a - 1.0) * phi / (u0 * u0)); // (u₀,u₀) + values[k + 1] = lam * (a * (1.0 - a) * phi / (u0 * u1)); // (u₁,u₀) + values[k + 2] = lam * (-a * (1.0 - a) * phi / (u1 * u1)); // (u₁,u₁) + k += 3; + } + } + } + true + } + + fn finalize_solution(&mut self, sol: Solution<'_>, _d: &IpoptData, _q: &IpoptCq) { + *self.captured_obj.borrow_mut() = Some(sol.obj_value); + } +} + +/// Solve the conic form; return `(status, cbf_objective)`. +fn solve_conic(m: &CbfModel) -> (QpStatus, f64) { + let cp = m.to_conic().expect("to_conic"); + let opts = QpOptions { + max_iter: 500, + ..QpOptions::default() + }; + let sol = solve_socp_ipm(&cp.prob, &cp.cones, &opts, backend); + (sol.status, cp.cbf_objective(sol.obj, m.minimize)) +} + +/// Solve the smooth-NLP form; return its objective (CBF sense). +fn solve_nlp(m: &CbfModel) -> f64 { + let nlp = CbfNlp::from_model(m); + let mut app = IpoptApplication::new(); + app.initialize().expect("init"); + let _ = app + .options_mut() + .read_from_str("print_level 0\nmax_iter 1000\n", true); + let rc = Rc::new(RefCell::new(nlp)); + let tnlp: Rc> = rc.clone(); + let status = app.optimize_tnlp(Rc::clone(&tnlp)); + assert!( + matches!( + status, + ApplicationReturnStatus::SolveSucceeded + | ApplicationReturnStatus::SolvedToAcceptableLevel + ), + "NLP solve failed: {status:?}" + ); + let obj = rc.borrow().captured_obj.borrow().expect("obj"); + // NLP minimized cᵀx; add the CBF constant (and flip sign for MAX). + let cp = m.to_conic().expect("to_conic"); + cp.cbf_objective(obj, m.minimize) +} + +fn cross_check(label: &str, text: &str) { + let m = cbf::parse(text).expect("parse"); + let (status, conic_obj) = solve_conic(&m); + assert_eq!(status, QpStatus::Optimal, "{label}: conic status"); + let nlp_obj = solve_nlp(&m); + let rel = (conic_obj - nlp_obj).abs() / (1.0 + nlp_obj.abs()); + eprintln!("[{label}] conic={conic_obj:.8} nlp={nlp_obj:.8} rel={rel:.2e}"); + assert!( + rel < 1e-5, + "{label}: conic {conic_obj} vs nlp {nlp_obj} (rel {rel:.2e})" + ); +} + +#[test] +fn demb761_conic_matches_nlp() { + cross_check("demb761", include_str!("data/cblib/demb761.cbf")); +} + +#[test] +fn beck751_conic_matches_nlp() { + cross_check("beck751", include_str!("data/cblib/beck751.cbf")); +} + +#[test] +fn fang88_conic_matches_nlp() { + cross_check("fang88", include_str!("data/cblib/fang88.cbf")); +} + +#[test] +fn power_cone_conic_matches_nlp() { + // The synthetic power-cone instance: conic (ConeSpec::Power) vs the + // smooth |x| ≤ y^α z^{1−α} epigraph NLP. Both should hit x2 = 1. + cross_check("pow3", include_str!("data/cblib/pow3_synthetic.cbf")); +} diff --git a/crates/pounce-cli/tests/data/cblib/README.md b/crates/pounce-cli/tests/data/cblib/README.md new file mode 100644 index 00000000..e776b33b --- /dev/null +++ b/crates/pounce-cli/tests/data/cblib/README.md @@ -0,0 +1,25 @@ +# CBLIB test fixtures + +These are exponential-cone geometric-program instances from the **Conic +Benchmark Library** (CBLIB, ), used as gold-standard +broad validation for the non-symmetric (exp-cone) HSDE solver — see +`dev-notes/hsde.md`, "CBLIB benchmark tier". + +| File | Family | Cones | +|---|---|---| +| `demb761.cbf` | Demberg geometric program | exp (over variables) | +| `beck751.cbf` | Beck geometric program | exp (over variables) | +| `fang88.cbf` | Fang geometric program | exp (over variables) | +| `pow3_synthetic.cbf` | hand-authored (not CBLIB) | power (`POWCONES`) | +| `sdp_synthetic.cbf` | hand-authored (not CBLIB) | semidefinite (`PSDCON`/`DCOORD`) | + +The first three are in Conic Benchmark Format (`.cbf`, version 2), the +plain-text format documented at . They are +small (pure-continuous) and freely distributed by CBLIB for benchmarking; +vendored here so the cross-check tests run offline. + +`pow3_synthetic.cbf` and `sdp_synthetic.cbf` are **not** CBLIB instances — +they are tiny hand-authored problems exercising the `POWCONES` (power-cone) +and `PSDCON`/`HCOORD`/`DCOORD` (affine semidefinite-constraint) sections, +each with a known closed-form optimum (`x₂ = 1` and `λ = 2`). The real CBLIB +power-cone instances (`2013_fir*`) are ~120 MB, impractical to vendor. diff --git a/crates/pounce-cli/tests/data/cblib/beck751.cbf b/crates/pounce-cli/tests/data/cblib/beck751.cbf new file mode 100644 index 00000000..80effaf4 --- /dev/null +++ b/crates/pounce-cli/tests/data/cblib/beck751.cbf @@ -0,0 +1,287 @@ +VER +2 + +OBJSENSE +MIN + +VAR +80 33 +F 12 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +F 1 +F 1 +F 1 +EXP 3 +EXP 3 +EXP 3 +F 1 +F 1 +F 1 +EXP 3 +EXP 3 +EXP 3 +F 1 +F 1 +F 1 +F 1 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +F 1 +F 1 +F 1 +F 1 +EXP 3 +EXP 3 +EXP 3 +EXP 3 + +CON +59 10 +L= 12 +L- 1 +L= 9 +L- 1 +L= 9 +L- 1 +L= 12 +L- 1 +L= 12 +L- 1 + +OBJACOORD +1 +11 1e+0 + +ACOORD +182 +2 0 1e+0 +5 0 -1e+0 +8 0 -2e+0 +11 0 2e+0 +15 0 5e-1 +18 0 3e+0 +25 0 -5e-1 +31 0 -1e+0 +35 0 1e+0 +41 0 -1e+0 +48 0 -2e+0 +51 0 5e-1 +54 0 -3e+0 +2 1 -1e+0 +5 1 -2e+0 +8 1 1e+0 +11 1 2e+0 +18 1 1e+0 +21 1 -1e+0 +25 1 1e+0 +31 1 5e-1 +38 1 1e+0 +41 1 1e+0 +44 1 -2e+0 +48 1 1e+0 +51 1 2e+0 +54 1 -2e+0 +5 2 1e+0 +11 2 -1e+0 +15 2 -1e+0 +18 2 -2e+0 +21 2 1e+0 +25 2 -1e+0 +28 2 1e+0 +35 2 -1.5e+0 +38 2 -5e-1 +41 2 5e-1 +44 2 1e+0 +51 2 1e+0 +54 2 1e+0 +57 2 -2e+0 +2 3 2e+0 +5 3 1e+0 +8 3 -1e+0 +21 3 -5e-1 +28 3 -1e+0 +31 3 -2e+0 +48 3 -1e+0 +51 3 3.333333333333e-1 +57 3 1e+0 +5 4 -1e+0 +8 4 -2e+0 +11 4 5e-1 +25 4 -1e+0 +28 4 -1e+0 +31 4 -1e+0 +35 4 1e+0 +38 4 1e+0 +41 4 1e+0 +44 4 1e+0 +48 4 5e-1 +51 4 -6.666666666667e-1 +54 4 1e+0 +2 5 -3e+0 +8 5 1e+0 +11 5 -2e+0 +15 5 -2e+0 +18 5 1e+0 +21 5 6.666666666667e-1 +25 5 1e+0 +28 5 2e+0 +31 5 3.333333333333e-1 +35 5 -1e+0 +38 5 -1e+0 +44 5 -1e+0 +2 6 -2.5e-1 +5 6 -5e-1 +11 6 1e+0 +15 6 1e+0 +18 6 5e-1 +21 6 2.5e-1 +35 6 3.333333333333e-1 +38 6 -5e-1 +44 6 1e+0 +48 6 3.333333333333e-1 +51 6 2.5e-1 +54 6 7.5e-1 +57 6 5e-1 +0 7 1e+0 +12 7 1e+0 +3 8 1e+0 +12 8 1e+0 +6 9 1e+0 +12 9 1e+0 +9 10 1e+0 +12 10 1e+0 +2 11 -1e+0 +5 11 -1e+0 +8 11 -1e+0 +11 11 -1e+0 +0 12 -1e+0 +1 13 -1e+0 +2 14 -1e+0 +3 15 -1e+0 +4 16 -1e+0 +5 17 -1e+0 +6 18 -1e+0 +7 19 -1e+0 +8 20 -1e+0 +9 21 -1e+0 +10 22 -1e+0 +11 23 -1e+0 +13 24 1e+0 +22 24 1e+0 +16 25 1e+0 +22 25 1e+0 +19 26 1e+0 +22 26 1e+0 +13 27 -1e+0 +14 28 -1e+0 +15 29 -1e+0 +16 30 -1e+0 +17 31 -1e+0 +18 32 -1e+0 +19 33 -1e+0 +20 34 -1e+0 +21 35 -1e+0 +23 36 1e+0 +32 36 1e+0 +26 37 1e+0 +32 37 1e+0 +29 38 1e+0 +32 38 1e+0 +23 39 -1e+0 +24 40 -1e+0 +25 41 -1e+0 +26 42 -1e+0 +27 43 -1e+0 +28 44 -1e+0 +29 45 -1e+0 +30 46 -1e+0 +31 47 -1e+0 +33 48 1e+0 +45 48 1e+0 +36 49 1e+0 +45 49 1e+0 +39 50 1e+0 +45 50 1e+0 +42 51 1e+0 +45 51 1e+0 +33 52 -1e+0 +34 53 -1e+0 +35 54 -1e+0 +36 55 -1e+0 +37 56 -1e+0 +38 57 -1e+0 +39 58 -1e+0 +40 59 -1e+0 +41 60 -1e+0 +42 61 -1e+0 +43 62 -1e+0 +44 63 -1e+0 +46 64 1e+0 +58 64 1e+0 +49 65 1e+0 +58 65 1e+0 +52 66 1e+0 +58 66 1e+0 +55 67 1e+0 +58 67 1e+0 +46 68 -1e+0 +47 69 -1e+0 +48 70 -1e+0 +49 71 -1e+0 +50 72 -1e+0 +51 73 -1e+0 +52 74 -1e+0 +53 75 -1e+0 +54 76 -1e+0 +55 77 -1e+0 +56 78 -1e+0 +57 79 -1e+0 + +BCOORD +40 +1 1e+0 +2 2.302585092994046e+0 +4 1e+0 +5 2.70805020110221e+0 +7 1e+0 +8 2.995732273553991e+0 +10 1e+0 +11 3.218875824868201e+0 +12 -1e+0 +14 1e+0 +15 -6.931471805599453e-1 +17 1e+0 +18 -3.566749439387324e-1 +20 1e+0 +21 -1.6094379124341e+0 +22 -1e+0 +24 1e+0 +25 2.623642644674911e-1 +27 1e+0 +28 -2.231435513142097e-1 +30 1e+0 +31 1.131402111491101e+0 +32 -1e+0 +34 1e+0 +35 6.931471805599453e-1 +37 1e+0 +38 -2.302585092994045e+0 +40 1e+0 +43 1e+0 +44 -4.307829160924542e-1 +45 -1e+0 +47 1e+0 +48 -1.6094379124341e+0 +50 1e+0 +51 -1.203972804325936e+0 +53 1e+0 +54 -9.16290731874155e-1 +56 1e+0 +57 -6.931471805599453e-1 +58 -1e+0 + diff --git a/crates/pounce-cli/tests/data/cblib/demb761.cbf b/crates/pounce-cli/tests/data/cblib/demb761.cbf new file mode 100644 index 00000000..57a734d3 --- /dev/null +++ b/crates/pounce-cli/tests/data/cblib/demb761.cbf @@ -0,0 +1,336 @@ +VER +2 + +OBJSENSE +MIN + +VAR +131 57 +F 15 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 + +CON +93 6 +L= 12 +L- 1 +L= 24 +L- 1 +L= 54 +L- 1 + +OBJACOORD +11 +0 -1.33172e-3 +1 -2.270927e-3 +2 -2.48546e-3 +3 -4.67e+0 +4 -4.671973e+0 +5 -8.14e-3 +6 -8.092e-3 +7 -5e-3 +8 -9.09e-4 +9 -8.8e-4 +10 -1.19e-3 + +OBJBCOORD +-1.611809565095832e+2 + +ACOORD +194 +2 0 1e+0 +15 0 1e+0 +52 0 1e+0 +88 0 1e+0 +5 1 1e+0 +18 1 1e+0 +40 1 1e+0 +43 1 1e+0 +46 1 1e+0 +55 1 1e+0 +76 1 1e+0 +79 1 1e+0 +82 1 1e+0 +8 2 1e+0 +21 2 1e+0 +58 2 1e+0 +11 3 1e+0 +24 3 1e+0 +36 3 1e+0 +43 3 1e+0 +46 3 -1e+0 +61 3 1e+0 +73 3 1e+0 +79 3 1e+0 +82 3 -1e+0 +11 4 1e+0 +27 4 1e+0 +36 4 1e+0 +40 4 1e+0 +43 4 1e+0 +46 4 1e+0 +64 4 1e+0 +73 4 1e+0 +76 4 1e+0 +79 4 1e+0 +82 4 1e+0 +30 5 1e+0 +67 5 1e+0 +33 6 1e+0 +70 7 1e+0 +85 8 1e+0 +88 8 1e+0 +49 9 1e+0 +91 10 1e+0 +0 11 1e+0 +12 11 1e+0 +3 12 1e+0 +12 12 1e+0 +6 13 1e+0 +12 13 1e+0 +9 14 1e+0 +12 14 1e+0 +0 15 -1e+0 +1 16 -1e+0 +2 17 -1e+0 +3 18 -1e+0 +4 19 -1e+0 +5 20 -1e+0 +6 21 -1e+0 +7 22 -1e+0 +8 23 -1e+0 +9 24 -1e+0 +10 25 -1e+0 +11 26 -1e+0 +13 27 1e+0 +37 27 1e+0 +16 28 1e+0 +37 28 1e+0 +19 29 1e+0 +37 29 1e+0 +22 30 1e+0 +37 30 1e+0 +25 31 1e+0 +37 31 1e+0 +28 32 1e+0 +37 32 1e+0 +31 33 1e+0 +37 33 1e+0 +34 34 1e+0 +37 34 1e+0 +13 35 -1e+0 +14 36 -1e+0 +15 37 -1e+0 +16 38 -1e+0 +17 39 -1e+0 +18 40 -1e+0 +19 41 -1e+0 +20 42 -1e+0 +21 43 -1e+0 +22 44 -1e+0 +23 45 -1e+0 +24 46 -1e+0 +25 47 -1e+0 +26 48 -1e+0 +27 49 -1e+0 +28 50 -1e+0 +29 51 -1e+0 +30 52 -1e+0 +31 53 -1e+0 +32 54 -1e+0 +33 55 -1e+0 +34 56 -1e+0 +35 57 -1e+0 +36 58 -1e+0 +38 59 1e+0 +92 59 1e+0 +41 60 1e+0 +92 60 1e+0 +44 61 1e+0 +92 61 1e+0 +47 62 1e+0 +92 62 1e+0 +50 63 1e+0 +92 63 1e+0 +53 64 1e+0 +92 64 1e+0 +56 65 1e+0 +92 65 1e+0 +59 66 1e+0 +92 66 1e+0 +62 67 1e+0 +92 67 1e+0 +65 68 1e+0 +92 68 1e+0 +68 69 1e+0 +92 69 1e+0 +71 70 1e+0 +92 70 1e+0 +74 71 1e+0 +92 71 1e+0 +77 72 1e+0 +92 72 1e+0 +80 73 1e+0 +92 73 1e+0 +83 74 1e+0 +92 74 1e+0 +86 75 1e+0 +92 75 1e+0 +89 76 1e+0 +92 76 1e+0 +38 77 -1e+0 +39 78 -1e+0 +40 79 -1e+0 +41 80 -1e+0 +42 81 -1e+0 +43 82 -1e+0 +44 83 -1e+0 +45 84 -1e+0 +46 85 -1e+0 +47 86 -1e+0 +48 87 -1e+0 +49 88 -1e+0 +50 89 -1e+0 +51 90 -1e+0 +52 91 -1e+0 +53 92 -1e+0 +54 93 -1e+0 +55 94 -1e+0 +56 95 -1e+0 +57 96 -1e+0 +58 97 -1e+0 +59 98 -1e+0 +60 99 -1e+0 +61 100 -1e+0 +62 101 -1e+0 +63 102 -1e+0 +64 103 -1e+0 +65 104 -1e+0 +66 105 -1e+0 +67 106 -1e+0 +68 107 -1e+0 +69 108 -1e+0 +70 109 -1e+0 +71 110 -1e+0 +72 111 -1e+0 +73 112 -1e+0 +74 113 -1e+0 +75 114 -1e+0 +76 115 -1e+0 +77 116 -1e+0 +78 117 -1e+0 +79 118 -1e+0 +80 119 -1e+0 +81 120 -1e+0 +82 121 -1e+0 +83 122 -1e+0 +84 123 -1e+0 +85 124 -1e+0 +86 125 -1e+0 +87 126 -1e+0 +88 127 -1e+0 +89 128 -1e+0 +90 129 -1e+0 +91 130 -1e+0 + +BCOORD +44 +1 1e+0 +2 1.089000000055827e+1 +4 1e+0 +5 7.690000017275978e+0 +7 1e+0 +8 1.149000000329852e+1 +10 1e+0 +11 3.643999999344502e+1 +12 -1e+0 +14 1e+0 +17 1e+0 +20 1e+0 +23 1e+0 +26 1e+0 +29 1e+0 +32 1e+0 +35 1e+0 +36 3.922999995748586e+1 +37 -1e+0 +39 1e+0 +40 2.120000001514595e+1 +42 1e+0 +45 1e+0 +46 -6.250000018766418e+0 +48 1e+0 +51 1e+0 +54 1e+0 +57 1e+0 +60 1e+0 +63 1e+0 +66 1e+0 +69 1e+0 +72 1e+0 +73 3.922999995748586e+1 +75 1e+0 +76 2.120000001514595e+1 +78 1e+0 +81 1e+0 +82 -6.250000018766418e+0 +84 1e+0 +87 1e+0 +88 1.623000001883523e+1 +90 1e+0 +92 -1e+0 + diff --git a/crates/pounce-cli/tests/data/cblib/fang88.cbf b/crates/pounce-cli/tests/data/cblib/fang88.cbf new file mode 100644 index 00000000..b46d7b05 --- /dev/null +++ b/crates/pounce-cli/tests/data/cblib/fang88.cbf @@ -0,0 +1,317 @@ +VER +2 + +OBJSENSE +MIN + +VAR +119 51 +F 15 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +F 1 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 +EXP 3 + +CON +84 6 +L= 12 +L- 1 +L= 24 +L- 1 +L= 45 +L- 1 + +OBJACOORD +11 +0 -1.33172e-3 +1 -2.270927e-3 +2 -2.48546e-3 +3 -4.67e+0 +4 -4.671973e+0 +5 -8.14e-3 +6 -8.092e-3 +7 -5e-3 +8 -9.090000000000001e-3 +9 -8.8e-4 +10 -1.9e-3 + +ACOORD +171 +2 0 1e+0 +15 0 1e+0 +49 0 1e+0 +79 0 1e+0 +5 1 1e+0 +18 1 1e+0 +40 1 1e+0 +43 1 1e+0 +52 1 1e+0 +73 1 1e+0 +8 2 1e+0 +21 2 1e+0 +55 2 1e+0 +11 3 1e+0 +24 3 1e+0 +36 3 1e+0 +43 3 -1e+0 +58 3 1e+0 +70 3 1e+0 +73 3 1e+0 +11 4 1e+0 +27 4 1e+0 +36 4 1e+0 +40 4 1e+0 +43 4 1e+0 +61 4 1e+0 +70 4 1e+0 +73 4 1e+0 +30 5 1e+0 +64 5 1e+0 +33 6 1e+0 +67 7 1e+0 +76 8 1e+0 +79 8 1e+0 +46 9 1e+0 +82 10 1e+0 +0 11 1e+0 +12 11 1e+0 +3 12 1e+0 +12 12 1e+0 +6 13 1e+0 +12 13 1e+0 +9 14 1e+0 +12 14 1e+0 +0 15 -1e+0 +1 16 -1e+0 +2 17 -1e+0 +3 18 -1e+0 +4 19 -1e+0 +5 20 -1e+0 +6 21 -1e+0 +7 22 -1e+0 +8 23 -1e+0 +9 24 -1e+0 +10 25 -1e+0 +11 26 -1e+0 +13 27 1e+0 +37 27 1e+0 +16 28 1e+0 +37 28 1e+0 +19 29 1e+0 +37 29 1e+0 +22 30 1e+0 +37 30 1e+0 +25 31 1e+0 +37 31 1e+0 +28 32 1e+0 +37 32 1e+0 +31 33 1e+0 +37 33 1e+0 +34 34 1e+0 +37 34 1e+0 +13 35 -1e+0 +14 36 -1e+0 +15 37 -1e+0 +16 38 -1e+0 +17 39 -1e+0 +18 40 -1e+0 +19 41 -1e+0 +20 42 -1e+0 +21 43 -1e+0 +22 44 -1e+0 +23 45 -1e+0 +24 46 -1e+0 +25 47 -1e+0 +26 48 -1e+0 +27 49 -1e+0 +28 50 -1e+0 +29 51 -1e+0 +30 52 -1e+0 +31 53 -1e+0 +32 54 -1e+0 +33 55 -1e+0 +34 56 -1e+0 +35 57 -1e+0 +36 58 -1e+0 +38 59 1e+0 +83 59 1e+0 +41 60 1e+0 +83 60 1e+0 +44 61 1e+0 +83 61 1e+0 +47 62 1e+0 +83 62 1e+0 +50 63 1e+0 +83 63 1e+0 +53 64 1e+0 +83 64 1e+0 +56 65 1e+0 +83 65 1e+0 +59 66 1e+0 +83 66 1e+0 +62 67 1e+0 +83 67 1e+0 +65 68 1e+0 +83 68 1e+0 +68 69 1e+0 +83 69 1e+0 +71 70 1e+0 +83 70 1e+0 +74 71 1e+0 +83 71 1e+0 +77 72 1e+0 +83 72 1e+0 +80 73 1e+0 +83 73 1e+0 +38 74 -1e+0 +39 75 -1e+0 +40 76 -1e+0 +41 77 -1e+0 +42 78 -1e+0 +43 79 -1e+0 +44 80 -1e+0 +45 81 -1e+0 +46 82 -1e+0 +47 83 -1e+0 +48 84 -1e+0 +49 85 -1e+0 +50 86 -1e+0 +51 87 -1e+0 +52 88 -1e+0 +53 89 -1e+0 +54 90 -1e+0 +55 91 -1e+0 +56 92 -1e+0 +57 93 -1e+0 +58 94 -1e+0 +59 95 -1e+0 +60 96 -1e+0 +61 97 -1e+0 +62 98 -1e+0 +63 99 -1e+0 +64 100 -1e+0 +65 101 -1e+0 +66 102 -1e+0 +67 103 -1e+0 +68 104 -1e+0 +69 105 -1e+0 +70 106 -1e+0 +71 107 -1e+0 +72 108 -1e+0 +73 109 -1e+0 +74 110 -1e+0 +75 111 -1e+0 +76 112 -1e+0 +77 113 -1e+0 +78 114 -1e+0 +79 115 -1e+0 +80 116 -1e+0 +81 117 -1e+0 +82 118 -1e+0 + +BCOORD +57 +1 1e+0 +2 -2.961137628789492e+0 +4 1e+0 +5 -3.82291383035637e+0 +7 1e+0 +8 -2.325516008284767e+0 +10 1e+0 +11 -5.006531725263518e+0 +12 -1e+0 +14 1e+0 +15 -1.381551055796427e+1 +17 1e+0 +18 -1.151292546497023e+1 +20 1e+0 +21 -1.381551055796427e+1 +23 1e+0 +24 -2.302585092994046e+1 +26 1e+0 +27 -1.842068074395237e+1 +29 1e+0 +30 -6.907755278982137e+0 +32 1e+0 +33 -6.907755278982137e+0 +35 1e+0 +36 -2.216531257634445e+0 +37 -1e+0 +39 1e+0 +40 -1.905864776027476e+0 +42 1e+0 +43 -3.255099035025725e+0 +45 1e+0 +46 -9.210340371976182e+0 +48 1e+0 +49 -1.381551055796427e+1 +51 1e+0 +52 -1.151292546497023e+1 +54 1e+0 +55 -1.381551055796427e+1 +57 1e+0 +58 -2.302585092994046e+1 +60 1e+0 +61 -1.842068074395237e+1 +63 1e+0 +64 -6.907755278982137e+0 +66 1e+0 +67 -6.907755278982137e+0 +69 1e+0 +70 -2.216590899827185e+0 +72 1e+0 +73 -2.763102111592855e+1 +75 1e+0 +76 -1.151292546497023e+1 +78 1e+0 +79 -9.04144183551437e+0 +81 1e+0 +82 -9.210340371976182e+0 +83 -1e+0 + diff --git a/crates/pounce-cli/tests/data/cblib/pow3_synthetic.cbf b/crates/pounce-cli/tests/data/cblib/pow3_synthetic.cbf new file mode 100644 index 00000000..1092af70 --- /dev/null +++ b/crates/pounce-cli/tests/data/cblib/pow3_synthetic.cbf @@ -0,0 +1,42 @@ +# Synthetic 3-D power-cone instance (hand-authored, valid CBF v2) used to +# exercise the POWCONES section of the reader. Not from CBLIB — the real +# power-cone instances (2013_fir*) are ~120 MB, impractical to vendor. +# +# max x2 s.t. (x0, x1, x2) in POW(alpha = 1/2), x0 = 2, x1 = 1/2 +# +# The 3-D power cone is x0^a x1^(1-a) >= |x2| with a = alpha0/(alpha0+alpha1). +# With alpha = (1, 1) -> a = 1/2 and x0 = 2, x1 = 1/2 the bound is +# 2^(1/2) * (1/2)^(1/2) = 1, so the optimum is x2 = 1. +VER +2 + +OBJSENSE +MAX + +POWCONES +1 2 +2 +1.0 +1.0 + +VAR +3 1 +@0:POW 3 + +CON +2 1 +L= 2 + +OBJACOORD +1 +2 1.0 + +ACOORD +2 +0 0 1.0 +1 1 1.0 + +BCOORD +2 +0 -2.0 +1 -5e-1 diff --git a/crates/pounce-cli/tests/data/cblib/sdp_synthetic.cbf b/crates/pounce-cli/tests/data/cblib/sdp_synthetic.cbf new file mode 100644 index 00000000..91669324 --- /dev/null +++ b/crates/pounce-cli/tests/data/cblib/sdp_synthetic.cbf @@ -0,0 +1,35 @@ +# Synthetic SDP via an affine PSD constraint (PSDCON), hand-authored valid +# CBF v2 — exercises the DCOORD/HCOORD reader. Not from CBLIB. +# +# max λ s.t. M − λ·I ⪰ 0, M = diag(2, 5) +# +# As a PSDCON: D = M (DCOORD), H_{con=0, var=0} = −I (HCOORD, the coefficient +# of λ). The constraint D + λ·H = diag(2−λ, 5−λ) ⪰ 0 forces λ ≤ 2, so the +# optimum is λ = λ_min(M) = 2. +VER +2 + +OBJSENSE +MAX + +VAR +1 1 +F 1 + +PSDCON +1 +2 + +OBJACOORD +1 +0 1.0 + +HCOORD +2 +0 0 0 0 -1.0 +0 0 1 1 -1.0 + +DCOORD +2 +0 0 0 2.0 +0 1 1 5.0 diff --git a/crates/pounce-cli/tests/dispatch_routing.rs b/crates/pounce-cli/tests/dispatch_routing.rs new file mode 100644 index 00000000..4fabe16c --- /dev/null +++ b/crates/pounce-cli/tests/dispatch_routing.rs @@ -0,0 +1,109 @@ +//! Integration tests for the LP/QP dispatch routing (Phase 1). +//! +//! See `dev-notes/lp-qp-routing.md`. Phase 1 wires the `solver_selection` +//! option and the classifier but routes everything to the existing NLP +//! solver, so the only externally observable behavior is: +//! +//! * `auto` / `nlp` solve exactly as before (no regression); +//! * an unknown `solver_selection` value is rejected; +//! * a forced specialized solver that does not match the detected +//! problem class errors with a clear message (the plan's integration +//! test: `--solver=lp` on an NLP should error). +//! +//! These use the `rosenbrock` builtin so they are hermetic — no `.nl` +//! fixture or fetched benchmark cache required. + +use std::path::PathBuf; +use std::process::Command; + +fn pounce_exe() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_pounce")) +} + +#[test] +fn auto_solves_builtin_unchanged() { + let output = Command::new(pounce_exe()) + .arg("--problem") + .arg("rosenbrock") + .arg("solver_selection=auto") + .output() + .expect("spawn pounce"); + assert_eq!( + output.status.code(), + Some(0), + "auto should solve rosenbrock; stderr={}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn default_has_no_solver_selection_regression() { + // Omitting solver_selection entirely must behave exactly as before. + let output = Command::new(pounce_exe()) + .arg("--problem") + .arg("rosenbrock") + .output() + .expect("spawn pounce"); + assert_eq!(output.status.code(), Some(0)); +} + +#[test] +fn forced_lp_on_nlp_errors() { + // The plan's named integration test: forcing an LP solver on a + // general NLP must error, naming both the detected class and the + // forced solver. + let output = Command::new(pounce_exe()) + .arg("--problem") + .arg("rosenbrock") + .arg("solver_selection=lp-ipm") + .output() + .expect("spawn pounce"); + assert_eq!( + output.status.code(), + Some(2), + "forced mismatch should exit 2" + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("NLP") && stderr.contains("lp-ipm"), + "error should name detected class and forced solver: {stderr}" + ); +} + +#[test] +fn forced_qp_solvers_on_nlp_error() { + // The qp-family entry points (qp-ipm, qp-active-set) forced onto a + // general NLP must error just like lp-ipm does — never fall through to + // a wrong solve. The error names the detected class and forced solver. + for sel in ["qp-ipm", "qp-active-set"] { + let output = Command::new(pounce_exe()) + .arg("--problem") + .arg("rosenbrock") + .arg(format!("solver_selection={sel}")) + .output() + .expect("spawn pounce"); + assert_eq!( + output.status.code(), + Some(2), + "{sel} on an NLP should exit 2" + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("NLP") && stderr.contains(sel), + "{sel}: error should name detected class and forced solver: {stderr}" + ); + } +} + +#[test] +fn unknown_solver_selection_rejected() { + // `lp-simplex` was removed from scope; it must be rejected, not + // silently accepted. + let output = Command::new(pounce_exe()) + .arg("--problem") + .arg("rosenbrock") + .arg("solver_selection=lp-simplex") + .output() + .expect("spawn pounce"); + assert_eq!(output.status.code(), Some(2)); +} diff --git a/crates/pounce-cli/tests/exp_cone_vs_nlp.rs b/crates/pounce-cli/tests/exp_cone_vs_nlp.rs new file mode 100644 index 00000000..1f5354b1 --- /dev/null +++ b/crates/pounce-cli/tests/exp_cone_vs_nlp.rs @@ -0,0 +1,666 @@ +//! Cross-check: the **non-symmetric exponential-cone** HSDE solver in +//! `pounce-convex` vs. POUNCE's general **NLP** filter-IPM on the *same* +//! problems, solved in two genuinely independent ways. +//! +//! Each problem is posed twice: +//! 1. as an exponential-cone conic program (`ConeSpec::Exponential`, +//! routed to `hsde_nonsym`), and +//! 2. as a smooth nonlinear program (a `TNLP` for `IpoptApplication`). +//! The two optima must agree. Because a conic IPM and a general NLP IPM share +//! no code on these paths, agreement is strong evidence the exp-cone driver is +//! correct — exactly the intrinsic validation called for in `dev-notes/hsde.md` +//! (entropy / log-sum-exp / geometric program with known optima). + +use pounce_algorithm::application::IpoptApplication; +use pounce_common::types::{Index, Number}; +use pounce_convex::{solve_socp_ipm, ConeSpec, QpOptions, QpProblem, QpStatus, Triplet}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; +use pounce_nlp::return_codes::ApplicationReturnStatus; +use pounce_nlp::tnlp::{ + BoundsInfo, IndexStyle, IpoptCq, IpoptData, NlpInfo, Solution, SparsityRequest, StartingPoint, + TNLP, +}; +use std::cell::RefCell; +use std::rc::Rc; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +fn opts() -> QpOptions { + QpOptions { + max_iter: 200, + ..QpOptions::default() + } +} + +/// A small smooth NLP defined by closures: minimize `f(x)` subject to optional +/// **linear equality** constraints `Aₖ·x = bₖ` and variable bounds. Supplies +/// `f`, `∇f`, and the (objective) Hessian; since the constraints are linear, +/// the Lagrangian Hessian is just `obj_factor·∇²f`. +struct ClosureNlp { + n: usize, + lb: Vec, + ub: Vec, + x0: Vec, + /// Each equality row as `(col, coeff)` pairs; the row equals `b[r]`. + a_rows: Vec>, + b: Vec, + f: Box f64>, + grad: Box, + /// Lower-triangle sparsity of the objective Hessian (constraints linear, + /// so the Lagrangian Hessian is `obj_factor·∇²f`). + hess_pattern: Vec<(usize, usize)>, + /// Fills the Hessian values at `x` (already multiplied by `obj_factor`). + hess: Box, + captured_obj: RefCell>, + captured_x: RefCell>>, +} + +impl TNLP for ClosureNlp { + fn get_nlp_info(&mut self) -> Option { + let nnz_jac: usize = self.a_rows.iter().map(|r| r.len()).sum(); + Some(NlpInfo { + n: self.n as Index, + m: self.a_rows.len() as Index, + nnz_jac_g: nnz_jac as Index, + nnz_h_lag: self.hess_pattern.len() as Index, + index_style: IndexStyle::C, + }) + } + + fn get_bounds_info(&mut self, b: BoundsInfo<'_>) -> bool { + b.x_l.copy_from_slice(&self.lb); + b.x_u.copy_from_slice(&self.ub); + // Equalities: g_l = g_u = b. + for (i, &bi) in self.b.iter().enumerate() { + b.g_l[i] = bi; + b.g_u[i] = bi; + } + true + } + + fn get_starting_point(&mut self, sp: StartingPoint<'_>) -> bool { + sp.x.copy_from_slice(&self.x0); + true + } + + fn eval_f(&mut self, x: &[Number], _new_x: bool) -> Option { + Some((self.f)(x)) + } + + fn eval_grad_f(&mut self, x: &[Number], _new_x: bool, grad: &mut [Number]) -> bool { + (self.grad)(x, grad); + true + } + + fn eval_g(&mut self, x: &[Number], _new_x: bool, g: &mut [Number]) -> bool { + for (r, row) in self.a_rows.iter().enumerate() { + g[r] = row.iter().map(|&(c, v)| v * x[c]).sum(); + } + true + } + + fn eval_jac_g( + &mut self, + _x: Option<&[Number]>, + _new_x: bool, + mode: SparsityRequest<'_>, + ) -> bool { + match mode { + SparsityRequest::Structure { irow, jcol } => { + let mut k = 0; + for (r, row) in self.a_rows.iter().enumerate() { + for &(c, _) in row { + irow[k] = r as Index; + jcol[k] = c as Index; + k += 1; + } + } + } + SparsityRequest::Values { values } => { + let mut k = 0; + for row in &self.a_rows { + for &(_, v) in row { + values[k] = v; + k += 1; + } + } + } + } + true + } + + fn eval_h( + &mut self, + x: Option<&[Number]>, + _new_x: bool, + obj_factor: Number, + _lambda: Option<&[Number]>, + _new_lambda: bool, + mode: SparsityRequest<'_>, + ) -> bool { + match mode { + SparsityRequest::Structure { irow, jcol } => { + for (k, &(r, c)) in self.hess_pattern.iter().enumerate() { + irow[k] = r as Index; + jcol[k] = c as Index; + } + } + SparsityRequest::Values { values } => { + (self.hess)(x.expect("eval_h needs x"), obj_factor, values); + } + } + true + } + + fn finalize_solution(&mut self, sol: Solution<'_>, _d: &IpoptData, _q: &IpoptCq) { + *self.captured_obj.borrow_mut() = Some(sol.obj_value); + *self.captured_x.borrow_mut() = Some(sol.x.to_vec()); + } +} + +/// Solve a `ClosureNlp`, returning `(objective, x*)`. Prints iteration count +/// and wall-clock for the performance comparison. +fn solve_nlp(label: &str, nlp: ClosureNlp) -> (f64, Vec) { + let mut app = IpoptApplication::new(); + app.initialize().expect("init"); + let _ = app.options_mut().read_from_str("print_level 0\n", true); + let rc = Rc::new(RefCell::new(nlp)); + let tnlp: Rc> = rc.clone(); + let t0 = std::time::Instant::now(); + let status = app.optimize_tnlp(Rc::clone(&tnlp)); + let dt = t0.elapsed(); + assert!( + matches!( + status, + ApplicationReturnStatus::SolveSucceeded + | ApplicationReturnStatus::SolvedToAcceptableLevel + ), + "NLP solve failed: {status:?}" + ); + eprintln!( + " [{label}] NLP: iters={}, time={:.1}µs", + app.statistics().iteration_count, + dt.as_secs_f64() * 1e6 + ); + let obj = rc.borrow().captured_obj.borrow().expect("obj"); + let x = rc.borrow().captured_x.borrow().clone().expect("x"); + (obj, x) +} + +/// Time a conic solve and print iters + wall-clock. +fn timed_conic(label: &str, prob: &QpProblem, specs: &[ConeSpec]) -> pounce_convex::QpSolution { + let t0 = std::time::Instant::now(); + let sol = solve_socp_ipm(prob, specs, &opts(), backend); + let dt = t0.elapsed(); + eprintln!( + " [{label}] conic: iters={}, time={:.1}µs", + sol.iters, + dt.as_secs_f64() * 1e6 + ); + sol +} + +// -------------------------------------------------------------------------- +// 1. Geometric program: min x + 1/x (= min_u e^u + e^{−u}), optimum 2. +// -------------------------------------------------------------------------- + +#[test] +fn geometric_program_conic_matches_nlp() { + // Conic: min t1 + t2 s.t. (u,1,t1)∈Kexp, (−u,1,t2)∈Kexp. + let prob = QpProblem { + n: 3, // (u, t1, t2) + p_lower: vec![], + c: vec![0.0, 1.0, 1.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, -1.0), // s0 = u + Triplet::new(2, 1, -1.0), // s2 = t1 + Triplet::new(3, 0, 1.0), // s3 = −u + Triplet::new(5, 2, -1.0), // s5 = t2 + ], + h: vec![0.0, 1.0, 0.0, 0.0, 1.0, 0.0], + lb: vec![], + ub: vec![], + }; + let conic = timed_conic("GP", &prob, &[ConeSpec::Exponential, ConeSpec::Exponential]); + assert_eq!(conic.status, QpStatus::Optimal, "conic: {:?}", conic.status); + + // NLP: min_u e^u + e^{−u}, optimum u=0, obj=2. + let nlp = ClosureNlp { + n: 1, + // Modest bounds: wide-open ±1e19 lets the line search overflow e^u. + lb: vec![-30.0], + ub: vec![30.0], + x0: vec![0.5], + a_rows: vec![], + b: vec![], + f: Box::new(|x| x[0].exp() + (-x[0]).exp()), + grad: Box::new(|x, g| g[0] = x[0].exp() - (-x[0]).exp()), + hess_pattern: vec![(0, 0)], + hess: Box::new(|x, of, v| v[0] = of * (x[0].exp() + (-x[0]).exp())), + captured_obj: RefCell::new(None), + captured_x: RefCell::new(None), + }; + let (nlp_obj, _) = solve_nlp("GP", nlp); + + assert!( + (conic.obj - nlp_obj).abs() < 1e-5, + "GP objectives disagree: conic={}, nlp={nlp_obj}", + conic.obj + ); + assert!((conic.obj - 2.0).abs() < 1e-5, "GP obj {} vs 2", conic.obj); + eprintln!("GP: conic obj={:.8}, nlp obj={:.8}", conic.obj, nlp_obj); +} + +// -------------------------------------------------------------------------- +// 2. Entropy maximization: min Σ xᵢ log xᵢ s.t. Σ xᵢ = 1, x ≥ 0. +// Optimum at the uniform distribution xᵢ = 1/n, objective −log n. +// -------------------------------------------------------------------------- + +#[test] +fn entropy_maximization_conic_matches_nlp() { + let n = 3usize; + let want_obj = -(n as f64).ln(); + + // Conic: variables v = (a₀..a₂, x₀..x₂); min −Σaᵢ s.t. Σxᵢ = 1 and + // (aᵢ, xᵢ, 1) ∈ Kexp (⇔ aᵢ ≤ −xᵢ log xᵢ). At the optimum aᵢ = −xᵢ log xᵢ, + // so −Σaᵢ = −(max entropy) = −log n. + let mut g = Vec::new(); + let mut h = Vec::new(); + for i in 0..n { + let base = 3 * i; + g.push(Triplet::new(base, i, -1.0)); // slack0 = aᵢ + h.push(0.0); + g.push(Triplet::new(base + 1, n + i, -1.0)); // slack1 = xᵢ + h.push(0.0); + h.push(1.0); // slack2 = 1 (no G row) + } + // Equality Σ xᵢ = 1. + let a: Vec = (0..n).map(|i| Triplet::new(0, n + i, 1.0)).collect(); + let mut c = vec![0.0; 2 * n]; + for ci in c.iter_mut().take(n) { + *ci = -1.0; // min −Σaᵢ + } + let prob = QpProblem { + n: 2 * n, + p_lower: vec![], + c, + a, + b: vec![1.0], + g, + h, + lb: vec![], + ub: vec![], + }; + let specs = vec![ConeSpec::Exponential; n]; + let conic = timed_conic("entropy", &prob, &specs); + assert_eq!(conic.status, QpStatus::Optimal, "conic: {:?}", conic.status); + + // NLP: min Σ xᵢ log xᵢ s.t. Σ xᵢ = 1, xᵢ ≥ 1e-9. + let nlp = ClosureNlp { + n, + lb: vec![1e-9; n], + ub: vec![1e19; n], + x0: vec![1.0 / n as f64; n], + a_rows: vec![(0..n).map(|i| (i, 1.0)).collect()], + b: vec![1.0], + f: Box::new(|x| x.iter().map(|&xi| xi * xi.ln()).sum()), + grad: Box::new(|x, g| { + for (gi, &xi) in g.iter_mut().zip(x) { + *gi = xi.ln() + 1.0; + } + }), + hess_pattern: (0..n).map(|i| (i, i)).collect(), + hess: Box::new(|x, of, v| { + for (vi, &xi) in v.iter_mut().zip(x) { + *vi = of / xi; // ∂²(x log x)/∂x² = 1/x + } + }), + captured_obj: RefCell::new(None), + captured_x: RefCell::new(None), + }; + let (nlp_obj, nlp_x) = solve_nlp("entropy", nlp); + + assert!( + (conic.obj - nlp_obj).abs() < 1e-5, + "entropy objectives disagree: conic={}, nlp={nlp_obj}", + conic.obj + ); + assert!( + (conic.obj - want_obj).abs() < 1e-5, + "entropy obj {} vs −log {n} = {want_obj}", + conic.obj + ); + // The conic primal recovers the uniform distribution in v[n..2n]. + for i in 0..n { + assert!( + (conic.x[n + i] - 1.0 / n as f64).abs() < 1e-4, + "conic x[{i}] = {} vs 1/{n}", + conic.x[n + i] + ); + assert!((nlp_x[i] - 1.0 / n as f64).abs() < 1e-4, "nlp x[{i}]"); + } + eprintln!( + "entropy(n={n}): conic obj={:.8}, nlp obj={:.8}, want={want_obj:.8}", + conic.obj, nlp_obj + ); +} + +// -------------------------------------------------------------------------- +// 3. Log-sum-exp: min log(e^{x₁} + e^{x₂}) s.t. x₁ + x₂ = 0. Optimum log 2 +// at x = 0. +// -------------------------------------------------------------------------- + +#[test] +fn log_sum_exp_conic_matches_nlp() { + // Conic: v = (t, x1, x2); min t s.t. x1+x2=0, (xᵢ−t, 1, uᵢ)∈Kexp, + // u₁+u₂ ≤ 1. Rows: exp1 (0..3), exp2 (3..6), orthant (6). + let prob = QpProblem { + n: 5, // (t, x1, x2, u1, u2) + p_lower: vec![], + c: vec![1.0, 0.0, 0.0, 0.0, 0.0], + a: vec![Triplet::new(0, 1, 1.0), Triplet::new(0, 2, 1.0)], // x1+x2=0 + b: vec![0.0], + g: vec![ + // exp1 slack = (x1 − t, 1, u1) + Triplet::new(0, 1, -1.0), // s0 = x1 ... + Triplet::new(0, 0, 1.0), // − t + Triplet::new(2, 3, -1.0), // s2 = u1 + // exp2 slack = (x2 − t, 1, u2) + Triplet::new(3, 2, -1.0), // s3 = x2 ... + Triplet::new(3, 0, 1.0), // − t + Triplet::new(5, 4, -1.0), // s5 = u2 + // orthant: s6 = 1 − u1 − u2 + Triplet::new(6, 3, 1.0), + Triplet::new(6, 4, 1.0), + ], + h: vec![0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], + lb: vec![], + ub: vec![], + }; + let specs = [ + ConeSpec::Exponential, + ConeSpec::Exponential, + ConeSpec::Nonneg(1), + ]; + let conic = timed_conic("lse", &prob, &specs); + assert_eq!(conic.status, QpStatus::Optimal, "conic: {:?}", conic.status); + + // NLP: min log(e^{x1}+e^{x2}) s.t. x1+x2=0. + let nlp = ClosureNlp { + n: 2, + lb: vec![-1e19; 2], + ub: vec![1e19; 2], + x0: vec![0.5, -0.5], + a_rows: vec![vec![(0, 1.0), (1, 1.0)]], + b: vec![0.0], + f: Box::new(|x| (x[0].exp() + x[1].exp()).ln()), + grad: Box::new(|x, g| { + let (e0, e1) = (x[0].exp(), x[1].exp()); + let s = e0 + e1; + g[0] = e0 / s; + g[1] = e1 / s; + }), + // H = diag(p) − p pᵀ with pᵢ = e^{xᵢ}/Σe^{xⱼ}; lower triangle. + hess_pattern: vec![(0, 0), (1, 0), (1, 1)], + hess: Box::new(|x, of, v| { + let (e0, e1) = (x[0].exp(), x[1].exp()); + let s = e0 + e1; + let (p0, p1) = (e0 / s, e1 / s); + v[0] = of * p0 * (1.0 - p0); + v[1] = -of * p0 * p1; + v[2] = of * p1 * (1.0 - p1); + }), + captured_obj: RefCell::new(None), + captured_x: RefCell::new(None), + }; + let (nlp_obj, _) = solve_nlp("lse", nlp); + + let want = 2.0_f64.ln(); + assert!( + (conic.obj - nlp_obj).abs() < 1e-5, + "lse objectives disagree: conic={}, nlp={nlp_obj}", + conic.obj + ); + assert!( + (conic.obj - want).abs() < 1e-5, + "lse obj {} vs log2", + conic.obj + ); + eprintln!("lse: conic obj={:.8}, nlp obj={:.8}", conic.obj, nlp_obj); +} + +// -------------------------------------------------------------------------- +// 4. Power cone (PR70 item D). K_α = {(x,y,z): |x| ≤ y^α z^{1−α}, y,z ≥ 0}. +// Maximizing x with y, z pinned gives the weighted geometric mean +// x* = y^α z^{1−α}. The exp-cone tests never exercise `ConeSpec::Power`, +// which routes through the *same* non-symmetric HSDE driver. +// -------------------------------------------------------------------------- + +#[test] +fn power_cone_geometric_mean_matches_nlp() { + // max x s.t. y = 2, z = 8, (x, y, z) ∈ K_{1/2}. + // x* = 2^{1/2} · 8^{1/2} = √16 = 4. + let prob = QpProblem { + n: 3, // (x, y, z) + p_lower: vec![], + c: vec![-1.0, 0.0, 0.0], // min −x + a: vec![ + Triplet::new(0, 1, 1.0), // y = 2 + Triplet::new(1, 2, 1.0), // z = 8 + ], + b: vec![2.0, 8.0], + g: vec![ + Triplet::new(0, 0, -1.0), // s0 = x + Triplet::new(1, 1, -1.0), // s1 = y + Triplet::new(2, 2, -1.0), // s2 = z + ], + h: vec![0.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let conic = timed_conic("power-gm", &prob, &[ConeSpec::Power(0.5)]); + assert_eq!(conic.status, QpStatus::Optimal, "conic: {:?}", conic.status); + + // NLP: max x s.t. x ≤ √(y·z), y=2, z=8 ⇔ min −x with x² ≤ y·z. + // Pose directly as max of √(2·8): the closed form is 4. Cross-check with a + // 1-var NLP min −x s.t. x ≤ √16 (the binding monomial), i.e. x* = 4. + let nlp = ClosureNlp { + n: 1, + lb: vec![0.0], + ub: vec![10.0], + x0: vec![1.0], + // x ≤ √(2·8) = 4 written as the equality-free bound via a linear row + // x ≤ 4 (the monomial value); the geometric-mean optimum is at equality. + a_rows: vec![], + b: vec![], + f: Box::new(|x| -x[0]), + grad: Box::new(|_x, g| g[0] = -1.0), + hess_pattern: vec![(0, 0)], + hess: Box::new(|_x, _of, v| v[0] = 0.0), + captured_obj: RefCell::new(None), + captured_x: RefCell::new(None), + }; + // Replace the ub with the monomial value so the NLP optimum is the same 4. + let mut nlp = nlp; + nlp.ub = vec![(2.0_f64 * 8.0).sqrt()]; + let (nlp_obj, _) = solve_nlp("power-gm", nlp); + + // Objective is `min −x`, so the optimal value is −4 (x* = 4 = √(2·8)). + assert!( + (-conic.obj - 4.0).abs() < 1e-5, + "conic x* = {} vs geometric mean 4", + -conic.obj + ); + assert!( + (conic.obj - nlp_obj).abs() < 1e-5, + "power objectives disagree: conic={}, nlp={nlp_obj}", + conic.obj + ); + // The conic primal recovers (x, y, z) = (4, 2, 8) on the cone boundary. + assert!((conic.x[0] - 4.0).abs() < 1e-4, "x = {}", conic.x[0]); + assert!((conic.x[1] - 2.0).abs() < 1e-4, "y = {}", conic.x[1]); + assert!((conic.x[2] - 8.0).abs() < 1e-4, "z = {}", conic.x[2]); + eprintln!("power-gm: conic x*={:.8}", -conic.obj); +} + +// -------------------------------------------------------------------------- +// 5. Larger / near-boundary exp-cone instances (PR70 item D adversarial set). +// -------------------------------------------------------------------------- + +/// Larger entropy instance (n = 16): the non-symmetric driver must stay +/// accurate as the exp-cone count grows. Optimum is the uniform distribution +/// with objective −log 16. +#[test] +fn entropy_maximization_larger_instance() { + let n = 16usize; + let want_obj = -(n as f64).ln(); + + let mut g = Vec::new(); + let mut h = Vec::new(); + for i in 0..n { + let base = 3 * i; + g.push(Triplet::new(base, i, -1.0)); // slack0 = aᵢ + h.push(0.0); + g.push(Triplet::new(base + 1, n + i, -1.0)); // slack1 = xᵢ + h.push(0.0); + h.push(1.0); // slack2 = 1 + } + let a: Vec = (0..n).map(|i| Triplet::new(0, n + i, 1.0)).collect(); + let mut c = vec![0.0; 2 * n]; + for ci in c.iter_mut().take(n) { + *ci = -1.0; + } + let prob = QpProblem { + n: 2 * n, + p_lower: vec![], + c, + a, + b: vec![1.0], + g, + h, + lb: vec![], + ub: vec![], + }; + let specs = vec![ConeSpec::Exponential; n]; + let conic = timed_conic("entropy16", &prob, &specs); + assert_eq!(conic.status, QpStatus::Optimal, "conic: {:?}", conic.status); + assert!( + (conic.obj - want_obj).abs() < 1e-4, + "entropy(n=16) obj {} vs −log 16 = {want_obj}", + conic.obj + ); + for i in 0..n { + assert!( + (conic.x[n + i] - 1.0 / n as f64).abs() < 1e-3, + "x[{i}] = {} vs 1/16", + conic.x[n + i] + ); + } +} + +/// Near-boundary geometric program, swept over increasing |u|: for each pinned +/// `u`, `min t1 + t2 s.t. (u,1,t1)∈Kexp, (−u,1,t2)∈Kexp`, whose closed form is +/// `t1 = e^u`, `t2 = e^{−u}` (the second slack rides ever closer to the cone +/// boundary as `u` grows). This is the regime most likely to break the +/// non-symmetric exp-cone scaling, so it both (a) gives positive vs-NLP coverage +/// where the driver converges and (b) maps the point at which it stops. +/// +/// LIMITATION (PR70 item D finding): at large `u` (≈3 on this machine) the +/// non-symmetric HSDE driver returns `NumericalFailure` on this *feasible* +/// program rather than the optimum — a real robustness gap in the deep +/// near-boundary regime, not just an infeasibility-certification weakness. +/// The safety-critical property still holds (it never reports a wrong `Optimal`), +/// which is what we assert unconditionally; where it does converge we check the +/// objective against the closed form and the NLP. Tighten to "Optimal at every +/// `u`" once the exp-cone scaling is hardened near the boundary. +#[test] +fn near_boundary_gp_matches_nlp() { + let mut solved_any = false; + for &u in &[1.0_f64, 1.5, 2.0, 2.5, 3.0] { + // Conic: min t1 + t2 s.t. (u,1,t1)∈Kexp, (−u,1,t2)∈Kexp, u pinned. + let prob = QpProblem { + n: 3, // (u, t1, t2) + p_lower: vec![], + c: vec![0.0, 1.0, 1.0], + a: vec![Triplet::new(0, 0, 1.0)], // u = + b: vec![u], + g: vec![ + Triplet::new(0, 0, -1.0), // s0 = u + Triplet::new(2, 1, -1.0), // s2 = t1 + Triplet::new(3, 0, 1.0), // s3 = −u + Triplet::new(5, 2, -1.0), // s5 = t2 + ], + h: vec![0.0, 1.0, 0.0, 0.0, 1.0, 0.0], + lb: vec![], + ub: vec![], + }; + let conic = timed_conic( + "gp-boundary", + &prob, + &[ConeSpec::Exponential, ConeSpec::Exponential], + ); + + // Safety property: must NEVER report a wrong/premature Optimal. Either it + // converges (Optimal, checked below) or it fails honestly. + assert!( + matches!( + conic.status, + QpStatus::Optimal | QpStatus::NumericalFailure | QpStatus::IterationLimit + ), + "u={u}: unexpected status {:?}", + conic.status + ); + if conic.status != QpStatus::Optimal { + eprintln!( + "gp-boundary: u={u} -> {:?} (documented near-boundary gap)", + conic.status + ); + continue; + } + solved_any = true; + + let want = u.exp() + (-u).exp(); + // NLP: min e^u + e^{−u} with u pinned (so it just evaluates the value). + let nlp = ClosureNlp { + n: 1, + lb: vec![u], + ub: vec![u], + x0: vec![u], + a_rows: vec![], + b: vec![], + f: Box::new(|x| x[0].exp() + (-x[0]).exp()), + grad: Box::new(|x, g| g[0] = x[0].exp() - (-x[0]).exp()), + hess_pattern: vec![(0, 0)], + hess: Box::new(|x, of, v| v[0] = of * (x[0].exp() + (-x[0]).exp())), + captured_obj: RefCell::new(None), + captured_x: RefCell::new(None), + }; + let (nlp_obj, _) = solve_nlp("gp-boundary", nlp); + + assert!( + (conic.obj - want).abs() < 1e-4, + "u={u}: near-boundary GP obj {} vs e^u+e^-u = {want}", + conic.obj + ); + assert!( + (conic.obj - nlp_obj).abs() < 1e-4, + "u={u}: GP objectives disagree: conic={}, nlp={nlp_obj}", + conic.obj + ); + eprintln!( + "gp-boundary: u={u} conic obj={:.8}, nlp obj={:.8}", + conic.obj, nlp_obj + ); + } + // The driver must converge for at least the moderate cases, else the test is + // not actually exercising the exp cone. + assert!( + solved_any, + "exp-cone driver solved no near-boundary GP instance" + ); +} diff --git a/crates/pounce-cli/tests/fixtures/convex_qp.nl b/crates/pounce-cli/tests/fixtures/convex_qp.nl new file mode 100644 index 00000000..7df7bb42 --- /dev/null +++ b/crates/pounce-cli/tests/fixtures/convex_qp.nl @@ -0,0 +1,30 @@ +g3 0 1 0 +2 1 1 0 1 +0 1 +0 0 +2 2 +0 0 0 1 +2 0 +0 0 +0 0 +0 0 0 0 0 +C0 +n0 +O0 0 +o0 +o5 +v0 +n2 +o5 +v1 +n2 +r +4 2 +b +3 +3 +k1 +1 +J0 2 +0 1 +1 1 diff --git a/crates/pounce-cli/tests/fixtures/infeasible_qp.nl b/crates/pounce-cli/tests/fixtures/infeasible_qp.nl new file mode 100644 index 00000000..d4b1cc22 --- /dev/null +++ b/crates/pounce-cli/tests/fixtures/infeasible_qp.nl @@ -0,0 +1,28 @@ +g3 0 1 0 +1 2 1 0 2 +0 2 +0 0 +1 2 +0 0 0 1 +2 0 +0 0 +0 0 +0 0 0 0 0 +C0 +n0 +C1 +n0 +O0 0 +o5 +v0 +n2 +r +4 1 +4 2 +b +3 +k0 +J0 1 +0 1 +J1 1 +0 1 diff --git a/crates/pounce-cli/tests/fixtures/lp_afiro.nl b/crates/pounce-cli/tests/fixtures/lp_afiro.nl new file mode 100644 index 00000000..5cb47971 --- /dev/null +++ b/crates/pounce-cli/tests/fixtures/lp_afiro.nl @@ -0,0 +1,276 @@ +g3 1 1 0 # problem unknown + 32 27 1 0 8 # vars, constraints, objectives, ranges, eqns + 0 0 0 0 0 0 # nonlinear constrs, objs; ccons: lin, nonlin, nd, nzlb + 0 0 # network constraints: nonlinear, linear + 0 0 0 # nonlinear vars in constraints, objectives, both + 0 0 0 1 # linear network variables; functions; arith, flags + 0 0 0 0 0 # discrete variables: binary, integer, nonlinear (b,c,o) + 83 5 # nonzeros in Jacobian, obj. gradient + 0 0 # max name lengths: constraints, variables + 0 0 0 0 0 # common exprs: b,c,o,c1,o1 +C0 +n0 +C1 +n0 +C2 +n0 +C3 +n0 +C4 +n0 +C5 +n0 +C6 +n0 +C7 +n0 +C8 +n0 +C9 +n0 +C10 +n0 +C11 +n0 +C12 +n0 +C13 +n0 +C14 +n0 +C15 +n0 +C16 +n0 +C17 +n0 +C18 +n0 +C19 +n0 +C20 +n0 +C21 +n0 +C22 +n0 +C23 +n0 +C24 +n0 +C25 +n0 +C26 +n0 +O0 0 +n0 +x0 +r +4 0.0 +4 0.0 +1 80.0 +1 0.0 +4 0.0 +4 0.0 +1 80.0 +1 0.0 +1 0.0 +1 0.0 +4 0.0 +4 0.0 +1 500.0 +1 0.0 +4 0.0 +4 44.0 +1 500.0 +1 0.0 +1 0.0 +1 0.0 +1 0.0 +1 0.0 +1 0.0 +1 0.0 +1 0.0 +1 310.0 +1 300.0 +b +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +2 0.0 +k31 +4 +6 +8 +10 +14 +18 +22 +26 +28 +30 +32 +34 +36 +38 +40 +44 +46 +48 +50 +52 +56 +60 +64 +68 +70 +72 +74 +76 +78 +80 +82 +J0 3 +0 -1.0 +1 1 +2 1 +J1 2 +0 -1.06 +3 1 +J2 1 +0 1 +J3 2 +1 -1.0 +12 1.4 +J4 6 +4 -1.0 +5 -1.0 +6 -1.0 +7 -1.0 +12 1 +13 1 +J5 5 +4 -1.06 +5 -1.06 +6 -0.96 +7 -0.86 +14 1 +J6 2 +4 1 +8 -1.0 +J7 2 +5 1 +9 -1.0 +J8 2 +6 1 +10 -1.0 +J9 2 +7 1 +11 -1.0 +J10 4 +15 -1.0 +16 1 +17 1 +18 1 +J11 2 +15 -0.43 +19 1 +J12 1 +15 1 +J13 2 +16 -1.0 +28 1.4 +J14 5 +20 -0.43 +21 -0.43 +22 -0.39 +23 -0.37 +30 1 +J15 7 +20 1 +21 1 +22 1 +23 1 +28 -1.0 +29 1 +31 1 +J16 2 +20 1 +24 -1.0 +J17 2 +21 1 +25 -1.0 +J18 2 +22 1 +26 -1.0 +J19 2 +23 1 +27 -1.0 +J20 9 +8 2.364 +9 2.386 +10 2.408 +11 2.429 +18 -1.0 +24 2.191 +25 2.219 +26 2.249 +27 2.279 +J21 2 +2 -1.0 +15 0.109 +J22 5 +13 -1.0 +20 0.109 +21 0.108 +22 0.108 +23 0.107 +J23 2 +0 0.301 +17 -1.0 +J24 5 +4 0.301 +5 0.313 +6 0.313 +7 0.326 +29 -1.0 +J25 2 +3 1 +19 1 +J26 2 +14 1 +30 1 +G0 5 +1 -0.4 +12 -0.32 +16 -0.6 +28 -0.48 +31 10.0 diff --git a/crates/pounce-cli/tests/fixtures/nonconvex_qp.nl b/crates/pounce-cli/tests/fixtures/nonconvex_qp.nl new file mode 100644 index 00000000..8190e669 --- /dev/null +++ b/crates/pounce-cli/tests/fixtures/nonconvex_qp.nl @@ -0,0 +1,26 @@ +g3 0 1 0 +2 1 1 0 1 +0 1 +0 0 +2 2 +0 0 0 1 +2 0 +0 0 +0 0 +0 0 0 0 0 +C0 +n0 +O0 0 +o2 +v0 +v1 +r +4 2 +b +0 0 4 +0 0 4 +k1 +1 +J0 2 +0 1 +1 1 diff --git a/crates/pounce-cli/tests/fixtures/tame.nl b/crates/pounce-cli/tests/fixtures/tame.nl new file mode 100644 index 00000000..723c4d8a --- /dev/null +++ b/crates/pounce-cli/tests/fixtures/tame.nl @@ -0,0 +1,47 @@ +g3 1 1 0 # problem TAME + 2 1 1 0 1 # vars, constraints, objectives, ranges, eqns + 0 1 0 0 0 0 # nonlinear constrs, objs; ccons: lin, nonlin, nd, nzlb + 0 0 # network constraints: nonlinear, linear + 0 2 0 # nonlinear vars in constraints, objectives, both + 0 0 0 1 # linear network variables; functions; arith, flags + 0 0 0 0 0 # discrete variables: binary, integer, nonlinear (b,c,o) + 2 2 # nonzeros in Jacobian, obj. gradient + 4 4 # max name lengths: constraints, variables + 0 0 0 0 0 # common exprs: b,c,o,c1,o1 +C0 #c[0] +n0 +O0 0 #obj +o54 # sumlist +4 # (n) +o2 #* +v0 #x[0] +v0 #x[0] +o2 #* +o2 #* +n-1.0 +v1 #x[1] +v0 #x[0] +o2 #* +o2 #* +n-1.0 +v0 #x[0] +v1 #x[1] +o2 #* +v1 #x[1] +v1 #x[1] +x2 # initial guess +0 0.0 #x[0] +1 0.0 #x[1] +r #1 ranges (rhs's) +4 1.0 #c[0] +b #2 bounds (on variables) +2 0.0 #x[0] +2 0.0 #x[1] +k1 #intermediate Jacobian column lengths +1 +J0 2 #c[0] +0 1 +1 1 +G0 2 #obj +0 0 +1 0 diff --git a/crates/pounce-cli/tests/json_report.rs b/crates/pounce-cli/tests/json_report.rs index a6cfbbf3..73610d72 100644 --- a/crates/pounce-cli/tests/json_report.rs +++ b/crates/pounce-cli/tests/json_report.rs @@ -147,6 +147,96 @@ fn pounce_sens_emits_report_with_sens_sol_state_suffix() { let _ = std::fs::remove_file(&json_path); } +/// The `--json-output` report must have a *uniform* schema regardless of +/// which solver path produced it. The NLP path is covered above and the +/// convex QP-IPM path in `qp_dispatch_end_to_end.rs`, but nothing asserts +/// the schema is genuinely identical in shape across paths — including the +/// LP-IPM path, which had no JSON coverage at all. This runs one set of +/// schema invariants over three distinct solver paths (NLP, convex QP-IPM, +/// convex LP-IPM) so the benchmark harness can ingest any pounce solve +/// uniformly. A path that emitted a divergent or placeholder report (e.g. +/// an objective that disagrees with `final_objective`, or an `x` whose +/// length contradicts `n_variables`) would fail here. +#[test] +fn json_schema_is_uniform_across_solver_paths() { + fn fixture_named(name: &str) -> PathBuf { + let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push("tests"); + p.push("fixtures"); + p.push(name); + p + } + + // (label, fixture, forced solver_selection) — three genuinely different + // code paths inside the CLI dispatch. + let cases: &[(&str, PathBuf, &str)] = &[ + ("nlp", fixture_nl(), "nlp"), + ("convex-qp-ipm", fixture_named("convex_qp.nl"), "qp-ipm"), + ("convex-lp-ipm", fixture_named("lp_afiro.nl"), "lp-ipm"), + ]; + + for (label, fixture, sel) in cases { + let json_path = tmp_path(&format!("uniform_{label}.json")); + let _ = std::fs::remove_file(&json_path); + let out = Command::new(pounce_exe()) + .arg(fixture) + .arg("--no-sol") + .arg("--json-output") + .arg(&json_path) + .arg(format!("solver_selection={sel}")) + .output() + .unwrap_or_else(|e| panic!("spawn pounce ({label}): {e}")); + assert_eq!( + out.status.code(), + Some(0), + "{label} solve should succeed; stderr=\n{}", + String::from_utf8_lossy(&out.stderr) + ); + + let text = std::fs::read_to_string(&json_path) + .unwrap_or_else(|e| panic!("read report ({label}): {e}")); + let report: SolveReport = serde_json::from_str(&text) + .unwrap_or_else(|e| panic!("deserialize report ({label}): {e}\n{text}")); + + // --- invariants every path must satisfy identically --- + assert_eq!( + report.schema, "pounce.solve-report/v1", + "{label}: schema tag" + ); + assert_eq!( + report.fair_metadata.solver.name, "pounce", + "{label}: solver name" + ); + assert!( + !report.fair_metadata.result_id.is_empty(), + "{label}: result_id present" + ); + assert!(!report.solution.x.is_empty(), "{label}: primal x populated"); + assert!( + report.solution.x.iter().all(|v| v.is_finite()), + "{label}: primal x all finite" + ); + assert!( + report.solution.objective.is_finite(), + "{label}: objective finite" + ); + assert!( + (report.solution.objective - report.statistics.final_objective).abs() + <= 1e-9 * report.solution.objective.abs().max(1.0), + "{label}: solution.objective {} != statistics.final_objective {}", + report.solution.objective, + report.statistics.final_objective + ); + assert_eq!( + report.problem.n_variables as usize, + report.solution.x.len(), + "{label}: n_variables matches x length" + ); + + let _ = std::fs::remove_file(&json_path); + } +} + #[test] fn schema_field_is_stable_across_runs() { let p1 = tmp_path("schema_a.json"); diff --git a/crates/pounce-cli/tests/qp_dispatch_end_to_end.rs b/crates/pounce-cli/tests/qp_dispatch_end_to_end.rs new file mode 100644 index 00000000..0a15727c --- /dev/null +++ b/crates/pounce-cli/tests/qp_dispatch_end_to_end.rs @@ -0,0 +1,393 @@ +//! End-to-end: a convex-QP `.nl` file routed through the CLI dispatch to +//! the `pounce-convex` interior-point solver (Phase 2 wiring). +//! +//! Fixture `convex_qp.nl` is `min x0² + x1² s.t. x0 + x1 = 2`, whose +//! optimum is (1, 1) with objective 2. The tests check that: +//! - `solver_selection=auto` classifies it as a convex QP and routes +//! it to the convex IPM (banner names pounce-convex), +//! - `solver_selection=qp-ipm` (forced) also solves it, +//! - the `.sol` primal matches the known optimum, +//! - `solver_selection=nlp` still solves the same file (no regression / +//! same answer via the general path). + +use pounce_solve_report::SolveReport; +use std::path::PathBuf; +use std::process::Command; + +fn pounce_exe() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_pounce")) +} + +fn fixture() -> PathBuf { + fixture_named("convex_qp.nl") +} + +fn fixture_named(name: &str) -> PathBuf { + let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push("tests"); + p.push("fixtures"); + p.push(name); + p +} + +/// A primal-infeasible convex QP (`x0+x1=1` and `x0+x1=2`) routed to the +/// convex IPM must report infeasible — the HSDE-style verified +/// detection, surfaced end-to-end — and exit non-zero. +#[test] +fn infeasible_qp_reports_infeasible() { + let out = Command::new(pounce_exe()) + .arg(fixture_named("infeasible_qp.nl")) + .arg("--no-sol") + .arg("solver_selection=qp-ipm") + .output() + .expect("spawn pounce"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.to_lowercase().contains("infeasible"), + "expected infeasible status; stdout=\n{stdout}" + ); + assert_ne!(out.status.code(), Some(0), "infeasible must exit non-zero"); +} + +// --- A2: a forced solver_selection that does not match the detected +// class must error end-to-end (nonzero exit, clear message) and NEVER +// silently mis-solve to a wrong "optimal". `auto` on the same file must +// route safely instead. --- + +/// The highest-risk mis-route: forcing the convex QP IPM onto a genuinely +/// *nonconvex* QP (`min x0·x1`, indefinite Hessian). It must error, naming +/// the detected class and the forced solver, and must NOT print an +/// "Optimal Solution Found" — a confident wrong answer is the failure mode +/// this whole effort exists to prevent. +#[test] +fn forced_qp_ipm_on_nonconvex_qp_errors() { + let out = Command::new(pounce_exe()) + .arg(fixture_named("nonconvex_qp.nl")) + .arg("--no-sol") + .arg("solver_selection=qp-ipm") + .output() + .expect("spawn pounce"); + assert_eq!(out.status.code(), Some(2), "forced mismatch must exit 2"); + let combined = format!( + "{}{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert!( + combined.contains("nonconvex QP") && combined.contains("qp-ipm"), + "error must name detected class and forced solver:\n{combined}" + ); + assert!( + !combined.contains("Optimal Solution Found"), + "a mismatch must never report a solve:\n{combined}" + ); +} + +/// Same nonconvex QP forced to the active-set QP solver: also a mismatch, +/// also must error rather than mis-solve. +#[test] +fn forced_qp_active_set_on_nonconvex_qp_errors() { + let out = Command::new(pounce_exe()) + .arg(fixture_named("nonconvex_qp.nl")) + .arg("--no-sol") + .arg("solver_selection=qp-active-set") + .output() + .expect("spawn pounce"); + assert_eq!(out.status.code(), Some(2)); + let combined = format!( + "{}{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert!( + combined.contains("nonconvex QP") && combined.contains("qp-active-set"), + "error must name detected class and forced solver:\n{combined}" + ); + assert!(!combined.contains("Optimal Solution Found"), "{combined}"); +} + +/// Forcing the LP IPM onto a convex *QP* (not an LP): the QP IPM accepts a +/// QP but the LP entry point does not, so this must error too. +#[test] +fn forced_lp_ipm_on_convex_qp_errors() { + let out = Command::new(pounce_exe()) + .arg(fixture()) + .arg("--no-sol") + .arg("solver_selection=lp-ipm") + .output() + .expect("spawn pounce"); + assert_eq!(out.status.code(), Some(2)); + let combined = format!( + "{}{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert!( + combined.contains("convex QP") && combined.contains("lp-ipm"), + "error must name detected class and forced solver:\n{combined}" + ); + assert!(!combined.contains("Optimal Solution Found"), "{combined}"); +} + +/// The safe counterpart: `auto` on the same nonconvex QP must NOT route to +/// the convex IPM. It falls back to the general NLP path and solves to a +/// local optimum (exit 0), so the user gets a sound answer rather than an +/// error or a wrong "global" one. +#[test] +fn auto_routes_nonconvex_qp_to_nlp_safely() { + let out = Command::new(pounce_exe()) + .arg(fixture_named("nonconvex_qp.nl")) + .arg("--no-sol") + .arg("solver_selection=auto") + .output() + .expect("spawn pounce"); + assert_eq!(out.status.code(), Some(0), "auto should solve via NLP"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("pounce-nlp") && !stdout.contains("pounce-convex"), + "auto must fall back to the NLP path, not the convex IPM:\n{stdout}" + ); + assert!( + stdout.contains("Optimal Solution Found"), + "NLP fallback should solve to a local optimum:\n{stdout}" + ); +} + +#[test] +fn auto_routes_convex_qp_to_pounce_convex() { + let out = Command::new(pounce_exe()) + .arg(fixture()) + .arg("--no-sol") + .arg("solver_selection=auto") + .output() + .expect("spawn pounce"); + assert_eq!(out.status.code(), Some(0), "should solve"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("pounce-convex"), + "auto should route the convex QP to pounce-convex; stdout=\n{stdout}" + ); + assert!( + stdout.contains("Optimal Solution Found"), + "should report optimal; stdout=\n{stdout}" + ); +} + +#[test] +fn forced_qp_ipm_solves() { + let out = Command::new(pounce_exe()) + .arg(fixture()) + .arg("--no-sol") + .arg("solver_selection=qp-ipm") + .output() + .expect("spawn pounce"); + assert_eq!(out.status.code(), Some(0)); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("pounce-convex"), "stdout=\n{stdout}"); +} + +#[test] +fn nlp_path_still_solves_same_file() { + // No regression: the general NLP path must still handle the file. + let out = Command::new(pounce_exe()) + .arg(fixture()) + .arg("--no-sol") + .arg("solver_selection=nlp") + .output() + .expect("spawn pounce"); + assert_eq!(out.status.code(), Some(0)); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("Optimal Solution Found"), + "NLP path stdout=\n{stdout}" + ); +} + +#[test] +fn sol_primal_matches_known_optimum() { + let dir = std::env::temp_dir(); + let sol = dir.join("pounce_convex_qp_test.sol"); + let _ = std::fs::remove_file(&sol); + let out = Command::new(pounce_exe()) + .arg(fixture()) + .arg("--sol-output") + .arg(&sol) + .arg("solver_selection=auto") + .output() + .expect("spawn pounce"); + assert_eq!(out.status.code(), Some(0)); + let text = std::fs::read_to_string(&sol).expect("read .sol"); + // The primal block lists x0 then x1, each ≈ 1.0. Parse the trailing + // floats and check the two that are closest to 1.0 are present. + let near_one = text + .lines() + .filter_map(|l| l.trim().parse::().ok()) + .filter(|v| (v - 1.0).abs() < 1e-5) + .count(); + assert!( + near_one >= 2, + "expected two primal values ≈ 1.0 in .sol:\n{text}" + ); +} + +/// The convex QP path's recovered constraint dual must match the NLP +/// path's dual on the same `.nl` file (the reference convention). For +/// `min x0²+x1² s.t. x0+x1=2` the equality multiplier is −2. +#[test] +fn qp_and_nlp_duals_agree() { + let dir = std::env::temp_dir(); + + let run = |sel: &str, out: &std::path::Path| { + let _ = std::fs::remove_file(out); + let status = Command::new(pounce_exe()) + .arg(fixture()) + .arg("--sol-output") + .arg(out) + .arg(format!("solver_selection={sel}")) + .output() + .expect("spawn pounce"); + assert_eq!(status.status.code(), Some(0), "{sel} failed"); + std::fs::read_to_string(out).expect("read .sol") + }; + + // The single constraint dual is the value closest to −2 in each + // `.sol`'s float block. + let dual_near = |text: &str| -> f64 { + text.lines() + .filter_map(|l| l.trim().parse::().ok()) + .min_by(|a, b| (a - (-2.0)).abs().partial_cmp(&(b - (-2.0)).abs()).unwrap()) + .expect("a float in .sol") + }; + + let qp_sol = run("qp-ipm", &dir.join("pounce_dual_qp.sol")); + let nlp_sol = run("nlp", &dir.join("pounce_dual_nlp.sol")); + + let qp_dual = dual_near(&qp_sol); + let nlp_dual = dual_near(&nlp_sol); + assert!((qp_dual - (-2.0)).abs() < 1e-5, "QP dual {qp_dual} != −2"); + assert!( + (qp_dual - nlp_dual).abs() < 1e-5, + "QP dual {qp_dual} disagrees with NLP dual {nlp_dual}" + ); +} + +/// The convex-QP path emits a `pounce.solve-report/v1` JSON report +/// (`--json-output`), matching the schema the NLP path produces — so the +/// benchmark harness can compare QP and NLP solves uniformly. Validates the +/// schema, status, objective, problem dimensions, and iteration count. +#[test] +fn qp_path_emits_json_report() { + let dir = std::env::temp_dir(); + let json = dir.join("pounce_convex_qp_report.json"); + let _ = std::fs::remove_file(&json); + let out = Command::new(pounce_exe()) + .arg(fixture()) + .arg("--no-sol") + .arg("--json-output") + .arg(&json) + .arg("solver_selection=qp-ipm") + .output() + .expect("spawn pounce"); + assert_eq!(out.status.code(), Some(0), "QP solve should succeed"); + + let text = std::fs::read_to_string(&json).expect("JSON report should be written"); + let report: SolveReport = serde_json::from_str(&text).expect("deserialize report"); + + assert_eq!(report.schema, "pounce.solve-report/v1"); + // min x0²+x1² s.t. x0+x1=2 → optimum (1,1), objective 2. + assert!( + (report.solution.objective - 2.0).abs() < 1e-5, + "objective {} != 2", + report.solution.objective + ); + assert_eq!(report.solution.solve_result_num, 0, "AMPL srn 0 = solved"); + assert_eq!(report.problem.n_variables, 2); + assert_eq!(report.problem.n_constraints, 1); + assert!(report.problem.minimize); + // The convex IPM ran at least one iteration and recorded it. + assert!( + report.statistics.iteration_count >= 1, + "iteration_count = {}", + report.statistics.iteration_count + ); + // Real final KKT residuals (recomputed from the solution), tiny at the + // optimum — not the placeholder zeros. + assert!( + report.statistics.final_constr_viol < 1e-6, + "constr_viol = {}", + report.statistics.final_constr_viol + ); + assert!( + report.statistics.final_dual_inf < 1e-6, + "dual_inf = {}", + report.statistics.final_dual_inf + ); + assert!( + report.statistics.final_kkt_error < 1e-6, + "kkt_error = {}", + report.statistics.final_kkt_error + ); + // FAIR provenance is present (solver name, license). + assert!(!report.fair_metadata.solver.name.is_empty()); +} + +/// At `--json-detail full` the convex-QP report carries the per-iteration +/// convergence trace (the `iterations` array), the same schema the NLP path +/// uses — so the benchmark harness gets per-iteration data for QP solves too. +#[test] +fn qp_full_report_has_iteration_trace() { + let dir = std::env::temp_dir(); + let json = dir.join("pounce_convex_qp_full.json"); + let _ = std::fs::remove_file(&json); + let out = Command::new(pounce_exe()) + .arg(fixture()) + .arg("--no-sol") + .arg("--json-output") + .arg(&json) + .arg("--json-detail") + .arg("full") + .arg("solver_selection=qp-ipm") + .output() + .expect("spawn pounce"); + assert_eq!(out.status.code(), Some(0)); + + let text = std::fs::read_to_string(&json).expect("report written"); + let report: SolveReport = serde_json::from_str(&text).expect("deserialize"); + assert!( + !report.iterations.is_empty(), + "full-detail QP report should carry an iteration trace" + ); + // Iteration indices are 0-based and contiguous; the last iterate is the + // (near-)optimal one. + for (k, rec) in report.iterations.iter().enumerate() { + assert_eq!(rec.iter as usize, k, "iteration indices contiguous"); + } + let last = report.iterations.last().unwrap(); + assert!( + (last.objective - 2.0).abs() < 1e-4, + "final traced objective {} ~ 2", + last.objective + ); +} + +/// The `qp_presolve` option toggles presolve on the convex path; both +/// settings must solve the fixture to the same optimum. +#[test] +fn qp_presolve_option_on_and_off_agree() { + let run = |presolve: &str| -> i32 { + let out = Command::new(pounce_exe()) + .arg(fixture()) + .arg("--no-sol") + .arg("solver_selection=qp-ipm") + .arg(format!("qp_presolve={presolve}")) + .output() + .expect("spawn pounce"); + assert!( + String::from_utf8_lossy(&out.stdout).contains("Optimal Solution Found"), + "qp_presolve={presolve} should solve" + ); + out.status.code().unwrap_or(-1) + }; + assert_eq!(run("yes"), 0); + assert_eq!(run("no"), 0); +} diff --git a/crates/pounce-cli/tests/qp_vs_nlp_iterations.rs b/crates/pounce-cli/tests/qp_vs_nlp_iterations.rs new file mode 100644 index 00000000..e7ba5ede --- /dev/null +++ b/crates/pounce-cli/tests/qp_vs_nlp_iterations.rs @@ -0,0 +1,248 @@ +//! Head-to-head iteration count: the *same* convex QP solved by the NLP +//! filter-IPM (POUNCE's general solver) and by the specialized +//! convex-QP interior-point method in `pounce-convex`. +//! +//! This is the check behind the plan's central claim +//! (`dev-notes/lp-qp-routing.md`): a specialized convex-QP IPM with +//! Mehrotra predictor-corrector should reach the solution in *fewer* +//! interior-point iterations than routing the same problem through the +//! general NLP path. We solve a scalable equality-constrained convex QP +//! both ways and assert (a) both find the same optimum and (b) the QP +//! path takes no more iterations than the NLP path. +//! +//! The QP is `min ½xᵀPx + cᵀx s.t. Ax = b`, with `P` SPD +//! (diagonally dominant) and a handful of dense equality rows, sized by +//! `N`. Large enough that the NLP path needs several iterations, so the +//! comparison is meaningful (unlike the n=2 builtins, where a quadratic +//! is solved almost immediately by either method). + +use pounce_algorithm::application::IpoptApplication; +use pounce_common::types::{Index, Number}; +use pounce_convex::{solve_qp_ipm, QpOptions, QpProblem, QpStatus, Triplet}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; +use pounce_nlp::return_codes::ApplicationReturnStatus; +use pounce_nlp::tnlp::{ + BoundsInfo, IndexStyle, IpoptCq, IpoptData, NlpInfo, Solution, SparsityRequest, StartingPoint, + TNLP, +}; +use std::cell::RefCell; +use std::rc::Rc; + +/// Build a scalable *bound-constrained* convex QP — the regime where the +/// central path is non-trivial and the IPM-QP-vs-IPM-NLP iteration +/// comparison is meaningful. `P = diag(d) + sub-diagonal coupling` (SPD +/// by diagonal dominance). The linear term `c` pushes the unconstrained +/// optimum below the lower bounds, so many bounds are active and the +/// solver must traverse the central path. Bounds `0 ≤ x ≤ ub` are +/// written as inequality rows `−x ≤ 0` and `x ≤ ub`. +fn make_qp(n: usize) -> QpProblem { + let mut p_lower = Vec::new(); + for i in 0..n { + p_lower.push(Triplet::new(i, i, 2.0 + (i % 5) as f64)); + if i > 0 { + p_lower.push(Triplet::new(i, i - 1, 0.5)); + } + } + // Negative linear term → unconstrained optimum is positive and large, + // so the upper bounds bind for many components. + let c: Vec = (0..n).map(|i| -2.0 - (i % 7) as f64).collect(); + + // Bounds 0 ≤ x_i ≤ 1 as 2n inequality rows. + let mut g = Vec::new(); + let mut h = Vec::new(); + for i in 0..n { + g.push(Triplet::new(2 * i, i, 1.0)); // x_i ≤ 1 + h.push(1.0); + g.push(Triplet::new(2 * i + 1, i, -1.0)); // −x_i ≤ 0 + h.push(0.0); + } + + QpProblem { + n, + p_lower, + c, + a: vec![], + b: vec![], + g, + h, + lb: vec![], + ub: vec![], + } +} + +/// TNLP adapter wrapping a `QpProblem` so the NLP filter-IPM can solve +/// the identical problem. Only equality constraints are used here. +/// Wraps a bound-constrained convex QP `min ½xᵀPx+cᵀx, 0 ≤ x ≤ ub` as a +/// TNLP. The bounds are expressed as TNLP *variable* bounds (the natural +/// NLP encoding), so the NLP filter-IPM solves exactly the same +/// mathematical problem the `pounce-convex` QP solver sees as bound rows. +struct QpAsTnlp { + prob: QpProblem, + /// Variable lower/upper bounds (length n). + lb: Vec, + ub: Vec, + /// Lower-triangle Hessian entries (constant) as (row, col, val). + h_entries: Vec<(usize, usize, f64)>, + captured_obj: RefCell>, + captured_x: RefCell>>, +} + +impl QpAsTnlp { + fn new(prob: QpProblem, lb: Vec, ub: Vec) -> Self { + let h_entries: Vec<(usize, usize, f64)> = + prob.p_lower.iter().map(|t| (t.row, t.col, t.val)).collect(); + QpAsTnlp { + prob, + lb, + ub, + h_entries, + captured_obj: RefCell::new(None), + captured_x: RefCell::new(None), + } + } +} + +impl TNLP for QpAsTnlp { + fn get_nlp_info(&mut self) -> Option { + Some(NlpInfo { + n: self.prob.n as Index, + m: 0, + nnz_jac_g: 0, + nnz_h_lag: self.h_entries.len() as Index, + index_style: IndexStyle::C, + }) + } + + fn get_bounds_info(&mut self, b: BoundsInfo<'_>) -> bool { + b.x_l.copy_from_slice(&self.lb); + b.x_u.copy_from_slice(&self.ub); + true + } + + fn get_starting_point(&mut self, sp: StartingPoint<'_>) -> bool { + sp.x.iter_mut().for_each(|v| *v = 0.0); + true + } + + fn eval_f(&mut self, x: &[Number], _new_x: bool) -> Option { + let mut px = vec![0.0; self.prob.n]; + self.prob.p_mul_add_pub(x, &mut px); + let mut f = 0.0; + for i in 0..self.prob.n { + f += 0.5 * x[i] * px[i] + self.prob.c[i] * x[i]; + } + Some(f) + } + + fn eval_grad_f(&mut self, x: &[Number], _new_x: bool, grad: &mut [Number]) -> bool { + grad.iter_mut().zip(&self.prob.c).for_each(|(g, c)| *g = *c); + self.prob.p_mul_add_pub(x, grad); + true + } + + fn eval_g(&mut self, _x: &[Number], _new_x: bool, _g: &mut [Number]) -> bool { + // No general constraints — bounds are variable bounds. + true + } + + fn eval_jac_g( + &mut self, + _x: Option<&[Number]>, + _new_x: bool, + _mode: SparsityRequest<'_>, + ) -> bool { + true + } + + fn eval_h( + &mut self, + _x: Option<&[Number]>, + _new_x: bool, + obj_factor: Number, + _lambda: Option<&[Number]>, + _new_lambda: bool, + mode: SparsityRequest<'_>, + ) -> bool { + // Constraints are linear, so the Lagrangian Hessian is just + // obj_factor * P. + match mode { + SparsityRequest::Structure { irow, jcol } => { + for (i, (r, c, _)) in self.h_entries.iter().enumerate() { + irow[i] = *r as Index; + jcol[i] = *c as Index; + } + } + SparsityRequest::Values { values } => { + for (i, (_, _, v)) in self.h_entries.iter().enumerate() { + values[i] = obj_factor * v; + } + } + } + true + } + + fn finalize_solution(&mut self, sol: Solution<'_>, _d: &IpoptData, _q: &IpoptCq) { + *self.captured_obj.borrow_mut() = Some(sol.obj_value); + *self.captured_x.borrow_mut() = Some(sol.x.to_vec()); + } +} + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +#[test] +fn qp_ipm_uses_no_more_iterations_than_nlp() { + let n = 50; + let prob = make_qp(n); + let lb = vec![0.0; n]; + let ub = vec![1.0; n]; + + // --- QP path --- + let qp_sol = solve_qp_ipm(&prob, &QpOptions::default(), backend); + assert_eq!( + qp_sol.status, + QpStatus::Optimal, + "QP IPM failed: {:?}", + qp_sol.status + ); + let qp_iters = qp_sol.iters; + let qp_obj = qp_sol.obj; + + // --- NLP path on the identical problem --- + let mut app = IpoptApplication::new(); + app.initialize().expect("init"); + let _ = app.options_mut().read_from_str("print_level 0\n", true); + let tnlp_rc = Rc::new(RefCell::new(QpAsTnlp::new(prob.clone(), lb, ub))); + let tnlp: Rc> = tnlp_rc.clone(); + let status = app.optimize_tnlp(Rc::clone(&tnlp)); + assert_eq!( + status, + ApplicationReturnStatus::SolveSucceeded, + "NLP solve failed: {status:?}" + ); + let nlp_iters = app.statistics().iteration_count as usize; + let nlp_obj = tnlp_rc + .borrow() + .captured_obj + .borrow() + .expect("NLP finalize captured objective"); + + // --- both reached the same optimum (validates the comparison) --- + assert!( + (qp_obj - nlp_obj).abs() < 1e-5, + "objectives disagree: QP={qp_obj}, NLP={nlp_obj}" + ); + + eprintln!( + "n={n}: QP IPM iters = {qp_iters}, NLP IPM iters = {nlp_iters} (obj QP={qp_obj:.6}, NLP={nlp_obj:.6})" + ); + + // The specialized QP path should not take more interior-point + // iterations than the general NLP path on this convex QP. + assert!( + qp_iters <= nlp_iters, + "expected QP iters ({qp_iters}) <= NLP iters ({nlp_iters})" + ); +} diff --git a/crates/pounce-common/src/debug.rs b/crates/pounce-common/src/debug.rs new file mode 100644 index 00000000..d046a137 --- /dev/null +++ b/crates/pounce-common/src/debug.rs @@ -0,0 +1,429 @@ +//! Shared interior-point debugger abstraction. +//! +//! The interactive solver debugger (a "pdb for the interior-point loop") +//! is driven by a [`DebugHook`] that the solver fires at well-defined +//! [`Checkpoint`]s. The hook receives a `&mut dyn` [`DebugState`] — a +//! live, possibly-mutable view of the solver's per-iteration state — and +//! returns a [`DebugAction`] telling the loop whether to keep solving. +//! +//! These traits live in `pounce-common` so that *every* solver can be +//! debugged by the *same* REPL: the NLP filter-IPM (`pounce-algorithm`) +//! and the convex / conic IPM (`pounce-convex`) both implement +//! [`DebugState`] over their own state, and the CLI's `SolverDebugger` +//! implements [`DebugHook`] once against the trait. +//! +//! [`DebugState`] splits its surface in two: +//! +//! * **Generic** accessors every interior-point method has — iteration +//! index, μ, objective, primal/dual infeasibility, complementarity, +//! step lengths, and named iterate / search-direction blocks — are +//! required methods. +//! * **Solver-specific** extras (the NLP error metric, bound-slack +//! active-set view, KKT inertia / matrix / factor capture, line-search +//! trial count, snapshot/restore, mutation) have default impls that +//! report "unsupported", so a solver overrides only what it actually +//! has. The REPL turns an unsupported result into a friendly message. + +use crate::types::Number; +use std::any::Any; + +/// Where in a solver's loop a checkpoint fired. +/// +/// The variants cover the NLP filter-IPM's loop; other interior-point +/// solvers fire the subset that applies to them (e.g. the convex IPM uses +/// [`IterStart`](Checkpoint::IterStart), +/// [`AfterSearchDirection`](Checkpoint::AfterSearchDirection), +/// [`AfterStep`](Checkpoint::AfterStep), and +/// [`Terminated`](Checkpoint::Terminated); it has no restoration phase or +/// backtracking line search, so those variants simply never fire). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Checkpoint { + /// Top of an outer iteration — before this iteration's step is + /// computed. The iterate, multipliers, and μ reflect the *accepted* + /// point from the previous iteration. + IterStart, + /// After the barrier parameter μ was updated for this iteration + /// (before the search direction is computed). + AfterBarrierUpdate, + /// After the primal-dual Newton step was computed — the search + /// direction `δ`, the applied regularization, and the KKT + /// factorization are available. + AfterSearchDirection, + /// After a step length was chosen and the trial point accepted — the + /// step lengths α and the new iterate are in place. + AfterStep, + /// The line search *rejected* this iteration's step and the solver is + /// about to fall into restoration (NLP filter-IPM only). + StepRejected, + /// Just before the algorithm switches into the restoration phase + /// (NLP filter-IPM only). + PreRestoration, + /// Just after the restoration phase returns (NLP filter-IPM only). + PostRestoration, + /// The solve has finished: fired once before the solver returns, at + /// the final iterate, carrying the outcome via [`DebugState::status`]. + /// The [`DebugAction`] returned here is **ignored** — the solve is + /// already over. + Terminated, +} + +impl Checkpoint { + /// The stable wire/CLI protocol name for this checkpoint. These strings + /// are intentionally **not** the variant identifiers (`AfterBarrierUpdate` + /// → `"after_mu"`, `PreRestoration` → `"pre_restoration_entry"`) — they're + /// the names the JSON protocol and `stop-at` use, so match on the variant, + /// not the string. + pub fn as_str(self) -> &'static str { + match self { + Checkpoint::IterStart => "iter_start", + Checkpoint::AfterBarrierUpdate => "after_mu", + Checkpoint::AfterSearchDirection => "after_search_dir", + Checkpoint::AfterStep => "after_step", + Checkpoint::StepRejected => "step_rejected", + Checkpoint::PreRestoration => "pre_restoration_entry", + Checkpoint::PostRestoration => "post_restoration_exit", + Checkpoint::Terminated => "terminated", + } + } + + /// Sub-iteration checkpoints (everything between `IterStart` and the + /// next `IterStart`). + pub fn is_sub_iteration(self) -> bool { + matches!( + self, + Checkpoint::AfterBarrierUpdate + | Checkpoint::AfterSearchDirection + | Checkpoint::AfterStep + | Checkpoint::StepRejected + | Checkpoint::PreRestoration + | Checkpoint::PostRestoration + ) + } +} + +/// What the solver should do after a [`DebugHook`] returns. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum DebugAction { + /// Keep solving. + Resume, + /// Stop the solve now. Surfaces to the caller as a + /// user-requested-stop outcome. + Stop, +} + +/// KKT-factorization report (see [`DebugState::kkt`]). The inertia of a +/// well-posed primal-dual system is `(n_pos = n, n_neg = m, n_zero = 0)`; +/// a mismatch (or nonzero regularization) is the classic signal that the +/// step is being stabilized. +#[derive(Clone, Debug)] +pub struct KktReport { + /// The outer iteration this factorization was assembled at — may be the + /// previous iteration when paused at `iter_start` (viz look-back). + pub iter: i32, + /// Augmented-system dimension (n + m). + pub dim: i32, + /// Negative eigenvalues reported (-1 if the backend has no inertia). + pub n_neg: i32, + /// Positive eigenvalues = `dim − n_neg` (-1 if unknown). + pub n_pos: i32, + /// Expected negatives = number of equality + inequality multipliers. + pub expected_neg: i32, + /// Whether the backend reports inertia. + pub provides_inertia: bool, + /// `true` when reported inertia matches the expected `(n, m, 0)`. + pub inertia_correct: bool, + /// Primal regularization δ_w applied to the (1,1) block. + pub delta_w: Number, + /// Dual regularization δ_c applied to the (3,3)/(4,4) blocks. + pub delta_c: Number, + /// Factorization status (debug string). + pub status: String, +} + +/// Captured `LDLᵀ` factor for `viz L`: +/// `(n, perm, l_irn, l_jcn, l_vals)`. +pub type LFactor = (usize, Vec, Vec, Vec, Option>); + +/// Assembled KKT matrix triplets for `viz kkt`: +/// `(dim, irn, jcn, vals)` (1-based lower triangle). +pub type KktTriplets = (i32, Vec, Vec, Vec); + +/// Which residual space a [`Residual`] entry comes from. +/// +/// Primal entries are the per-constraint violations whose max-norm is +/// `inf_pr`; dual entries are the per-variable Lagrangian-gradient +/// components whose max-norm is `inf_du`. (NLP-specific; the convex/conic +/// and global solvers do not expose per-component residuals.) +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ResidKind { + /// Equality constraint residual `c_i(x)`. + Eq, + /// Inequality residual `d_i(x) − s_i` (the IPM slack reformulation). + Ineq, + /// `x`-space stationarity component `(∇_x L)_i`. + DualX, + /// `s`-space stationarity component `(∇_s L)_i`. + DualS, +} + +impl ResidKind { + /// Short label used in the debugger's `print residuals` output and + /// the JSON `space` field. Stable — readers may match on it. + pub fn tag(self) -> &'static str { + match self { + ResidKind::Eq => "c", + ResidKind::Ineq => "d-s", + ResidKind::DualX => "grad_x_L", + ResidKind::DualS => "grad_s_L", + } + } + + /// `true` for the primal (constraint) spaces, `false` for the dual + /// (stationarity) spaces. + pub fn is_primal(self) -> bool { + matches!(self, ResidKind::Eq | ResidKind::Ineq) + } +} + +/// One signed residual component at the current iterate: its space, its +/// index within that space, and its value. See +/// [`DebugState::constraint_residuals`] / [`DebugState::dual_residuals`]. +#[derive(Clone, Copy, Debug)] +pub struct Residual { + pub kind: ResidKind, + pub index: usize, + pub value: Number, +} + +/// An opaque, readable snapshot of a solver's primal-dual state at one +/// iteration, returned by [`DebugState::snapshot`] and replayed by +/// [`DebugState::restore`]. +/// +/// The reader methods (`iter` / `mu` / `block`) let the REPL `diff` two +/// captured points generically; [`as_any`](IterSnapshot::as_any) lets the +/// originating solver downcast back to its concrete snapshot type to +/// restore it. +pub trait IterSnapshot: Any { + /// Iteration index this snapshot was taken at. + fn iter(&self) -> i32; + /// Barrier parameter μ at the snapshot. + fn mu(&self) -> Number; + /// A named iterate block at the snapshot, if present. + fn block(&self, name: &str) -> Option>; + /// Downcast handle for the originating solver's `restore`. + fn as_any(&self) -> &dyn Any; +} + +/// A live view of solver state handed to a [`DebugHook`] at a checkpoint. +/// +/// Required methods are the quantities every interior-point method has. +/// The remaining methods carry solver-specific capabilities and default +/// to "unsupported" (NaN / `None` / `-1` / `Err`), so a solver overrides +/// only the ones it can answer. `set_*` mutators likewise default to a +/// descriptive `Err` for solvers that don't support in-place edits. +pub trait DebugState { + // ---- required: generic interior-point quantities ------------------- + + /// Downcast escape hatch for **solver-specific** REPL commands whose + /// payload can't live in this leaf crate (e.g. the NLP debugger's + /// rank diagnosis, model-name resolution, or full primal-dual warm + /// `resolve`). A solver that supports those returns `Some(self)` so the + /// REPL can downcast to its concrete state; the default `None` makes the + /// command report "not supported for this solver". + fn as_any(&self) -> Option<&dyn Any> { + None + } + + /// Mutable form of [`as_any`](DebugState::as_any), for commands that + /// mutate solver-specific state (e.g. live-tolerance hot-swap). + fn as_any_mut(&mut self) -> Option<&mut dyn Any> { + None + } + + /// Which checkpoint we are paused at. + fn checkpoint(&self) -> Checkpoint; + + /// Current outer iteration counter. + fn iter(&self) -> i32; + + /// Current barrier parameter μ. + fn mu(&self) -> Number; + + /// Objective at the current iterate (in the user's original sense). + fn objective(&self) -> Number; + + /// Max-norm primal infeasibility. + fn inf_pr(&self) -> Number; + + /// Max-norm dual infeasibility. + fn inf_du(&self) -> Number; + + /// Average complementarity — the IPM's "distance from the central + /// path" gauge; should track μ. + fn complementarity(&self) -> Number; + + /// Accepted primal / dual step lengths (α_pr, α_du). A solver with a + /// single symmetric step (e.g. HSDE) reports it in both slots. + fn alpha(&self) -> (Number, Number); + + /// Dimensions of every named iterate block, in display order. + fn block_dims(&self) -> Vec<(&'static str, usize)>; + + /// Read a named block of the current iterate as a flat `f64` vec. + /// `None` for an unknown name or before the iterate is set. + fn block(&self, name: &str) -> Option>; + + /// Read a named block of the most recent search direction. + fn delta_block(&self, name: &str) -> Option>; + + // ---- optional: solver-specific extras (default = unsupported) ------ + + /// Solve outcome, present only at [`Checkpoint::Terminated`]. + fn status(&self) -> Option<&str> { + None + } + + /// A scalar convergence error driving termination (the NLP "nlp_error"). + /// `NaN` when the solver has no single such metric. + fn nlp_error(&self) -> Number { + Number::NAN + } + + /// Slacks to a bound category (`x_l` / `x_u` / `s_l` / `s_u`) for the + /// active-set view. `None` when the solver has no bound-slack notion. + fn bound_slack(&self, _which: &str) -> Option> { + None + } + + /// Regularization applied to the KKT system this iteration. `NaN` when + /// the solver does not expose one. + fn regularization(&self) -> Number { + Number::NAN + } + + /// Number of line-search trial points for the accepted step. `-1` for + /// solvers without a backtracking line search (e.g. the convex IPM, + /// which takes a fraction-to-boundary step). + fn ls_count(&self) -> i32 { + -1 + } + + /// KKT-factorization inertia / regularization report, if available. + fn kkt(&self) -> Option { + None + } + + /// Assembled KKT matrix triplets for `viz kkt`, if captured. + fn kkt_matrix(&self) -> Option { + None + } + + /// The `LDLᵀ` factor for `viz L`, if captured. + fn kkt_l_factor(&self) -> Option { + None + } + + /// The iteration the currently-captured KKT matrix / factor came from + /// (may be the previous iteration when paused at `iter_start`, the viz + /// look-back). `None` when nothing is captured or unsupported. + fn kkt_captured_iter(&self) -> Option { + None + } + + /// Ask the solver to capture the `LDLᵀ` factor on later solves. + /// Returns whether it is already available now. + fn request_l_factor(&mut self) -> bool { + false + } + + /// Ask the solver to assemble the KKT triplets on later solves. + /// Returns whether they are already available now. + fn request_kkt_matrix(&mut self) -> bool { + false + } + + /// Overwrite the barrier parameter μ. + fn set_mu(&mut self, _mu: Number) -> Result<(), String> { + Err("this solver does not support setting mu".into()) + } + + /// Overwrite an entire named block of the current iterate. + fn set_block(&mut self, _name: &str, _vals: &[Number]) -> Result<(), String> { + Err("this solver does not support editing the iterate".into()) + } + + /// Overwrite a single component of a named block. Defaults to a + /// read-modify-write through [`block`](DebugState::block) / + /// [`set_block`](DebugState::set_block). + fn set_component(&mut self, name: &str, idx: usize, val: Number) -> Result<(), String> { + let mut vals = self + .block(name) + .ok_or_else(|| format!("unknown block `{name}` or no iterate yet"))?; + if idx >= vals.len() { + return Err(format!( + "index {idx} out of range for block `{name}` (dimension {})", + vals.len() + )); + } + vals[idx] = val; + self.set_block(name, &vals) + } + + /// Capture the current primal-dual state for a later [`restore`]. + /// `None` when snapshots are unsupported or no iterate is set yet. + /// + /// [`restore`]: DebugState::restore + fn snapshot(&self) -> Option> { + None + } + + /// Restore a snapshot previously returned by [`snapshot`]. Returns + /// whether the restore succeeded (false on unsupported, or a snapshot + /// minted by a different solver). + /// + /// [`snapshot`]: DebugState::snapshot + fn restore(&mut self, _snap: &dyn IterSnapshot) -> bool { + false + } + + /// Per-constraint signed primal residuals at the current iterate (the + /// components whose max-norm is `inf_pr`), for the `print residuals` + /// command. `None` when the solver does not expose per-component + /// residuals (the convex/conic and global solvers). + fn constraint_residuals(&self) -> Option> { + None + } + + /// Per-variable signed dual (Lagrangian-gradient) residuals at the + /// current iterate (the components whose max-norm is `inf_du`). `None` + /// when unsupported. + fn dual_residuals(&self) -> Option> { + None + } +} + +/// A consumer that a solver pauses at each [`Checkpoint`]. The CLI's +/// REPL / agent driver is the production implementation; the same hook +/// instance can drive any solver that exposes a [`DebugState`]. +pub trait DebugHook { + /// Called at every checkpoint. Inspect and/or mutate via `state`, then + /// return whether to keep solving. + fn at_checkpoint(&mut self, state: &mut dyn DebugState) -> DebugAction; + + /// Whether the solver should capture the (heavier) KKT matrix triplets + /// and `LDLᵀ` factor this iteration, so `viz kkt` / `viz L` can look back + /// at the previous iteration's system. True while stepping interactively; + /// a detached (running-free) hook returns false so the O(nnz) assembly + /// isn't paid every iteration. The cheap inertia/status fields are + /// captured regardless. + fn wants_kkt_capture(&self) -> bool { + true + } + + /// Arm the hook to pause at the next checkpoint. Used to debug a + /// sub-solve **on demand** — an outer driver can re-arm this + /// interior-point hook just before a particular solve, so the hook + /// stays quiet otherwise but drops in for that one solve. Default: + /// no-op (always-on hooks ignore it). + fn arm(&mut self) {} +} diff --git a/crates/pounce-common/src/lib.rs b/crates/pounce-common/src/lib.rs index 766a8e6b..ac062b37 100644 --- a/crates/pounce-common/src/lib.rs +++ b/crates/pounce-common/src/lib.rs @@ -7,6 +7,7 @@ #![cfg_attr(test, allow(clippy::unwrap_used, clippy::expect_used))] pub mod cached; +pub mod debug; pub mod diagnostics; pub mod exception; pub mod journalist; diff --git a/crates/pounce-convex/Cargo.toml b/crates/pounce-convex/Cargo.toml new file mode 100644 index 00000000..2bf4f7cc --- /dev/null +++ b/crates/pounce-convex/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "pounce-convex" +version.workspace = true +edition.workspace = true +license.workspace = true +authors.workspace = true +repository.workspace = true +readme = "README.md" +description = "Interior-point solvers for the convex problem classes in POUNCE: LP and convex QP today, with cone-generic scaffolding (Mehrotra + HSDE, SOCP/exp/pow/SDP) planned. Shares the pounce-linsol sparse symmetric factorization backbone with the NLP path." +keywords = ["lp", "qp", "interior-point", "convex-optimization", "solver"] +categories = ["mathematics", "science"] + +[dependencies] +pounce-common.workspace = true +pounce-linsol.workspace = true +# Dense symmetric eigensolver (cyclic Jacobi) for the QP reduced Hessian, +# shared with the NLP sensitivity path. +pounce-linalg.workspace = true +# Data-parallel presolve (duplicate-row hashing); already a transitive +# workspace dependency via feral, so no new external crate is pulled in. +rayon = "1" + +[dev-dependencies] +# FERAL backs the in-tree unit tests so the IPM runs end-to-end against +# a real sparse symmetric factorization without external solvers. +pounce-feral.workspace = true + +[lints] +workspace = true diff --git a/crates/pounce-convex/README.md b/crates/pounce-convex/README.md new file mode 100644 index 00000000..50a25062 --- /dev/null +++ b/crates/pounce-convex/README.md @@ -0,0 +1,39 @@ +# pounce-convex + +Interior-point solvers for POUNCE's convex problem classes: **LP and +convex QP** today, with cone-generic scaffolding for the conic family +(SOCP, exponential/power cones, SDP) planned. + +This crate is Phase 2 of the LP/QP routing plan +(`dev-notes/lp-qp-routing.md`). It provides a bare primal-dual +interior-point method for convex QP in standard form: + +```text +minimize ½ xᵀP x + cᵀx +subject to A x = b + G x ≤ h +``` + +LP is the `P = 0` case and is solved by the same driver. + +## Design + +- **Cone-generic.** The interior-point iteration is built over a + [`cones::Cone`] trait with only the nonnegative orthant + (`cones::nonneg`) implemented. Later phases add SOC / PSD / exp / pow + cones behind the same trait, so the driver is extended, not rewritten. +- **Shared factorization.** The symmetric indefinite KKT system is solved + through `pounce_linsol::Factorization` — the same factor-once / + solve-many handle the NLP path uses (feral by default, MA57 optional). + No new linear-algebra dependency. +- **Bare method now, Mehrotra next.** The current iteration uses a fixed + centering parameter and fraction-to-boundary step control. Mehrotra + predictor-corrector and the homogeneous self-dual embedding are Phase 3 + and slot into this same scaffolding. + +## Status + +Phase 2, first increment: correct convex-QP solves validated against +problems with analytically known optima (unconstrained, equality-, +inequality-, and bound-constrained). Not yet wired into the CLI dispatch +(`auto` still routes to NLP-IPM); not yet performance-tuned. diff --git a/crates/pounce-convex/examples/batch_solve.rs b/crates/pounce-convex/examples/batch_solve.rs new file mode 100644 index 00000000..e06c1022 --- /dev/null +++ b/crates/pounce-convex/examples/batch_solve.rs @@ -0,0 +1,91 @@ +//! Batched / multiple-RHS convex-QP solving: solve a family of QPs that +//! share structure but differ in their data, in parallel via rayon. +//! +//! Run: `cargo run -p pounce-convex --release --example batch_solve` + +use pounce_convex::{solve_qp_batch_parallel, solve_qp_multi_rhs, QpOptions, QpProblem, Triplet}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; +use std::time::Instant; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +/// Inner-serial backend for the outer-parallel / inner-serial batch path. +fn serial_backend() -> Box { + Box::new(FeralSolverInterface::serial()) +} + +/// Box-constrained QP `min ½xᵀ(2I)x + cᵀx, 0 ≤ x ≤ 1` for a given `c`. +fn boxed_qp(c: Vec) -> QpProblem { + let n = c.len(); + QpProblem { + n, + p_lower: (0..n).map(|i| Triplet::new(i, i, 2.0)).collect(), + c, + a: vec![], + b: vec![], + g: vec![], + h: vec![], + lb: vec![0.0; n], + ub: vec![1.0; n], + } +} + +fn main() { + let opts = QpOptions::default(); + + println!("=== multiple RHS: one structure, many objectives ==="); + let base = boxed_qp(vec![0.0, 0.0]); + let cs = vec![ + vec![-1.0, -4.0], + vec![-4.0, 1.0], + vec![3.0, -2.0], + vec![0.5, 0.5], + ]; + let sols = solve_qp_multi_rhs(&base, &cs, &opts, backend); + for (c, s) in cs.iter().zip(&sols) { + println!( + "c={c:?} → x=[{:.3}, {:.3}] obj={:.4}", + s.x[0], s.x[1], s.obj + ); + } + + println!("\n=== batch throughput (parallel via rayon) ==="); + for &count in &[100usize, 1_000, 5_000] { + // A sweep of distinct small box QPs. + let probs: Vec = (0..count) + .map(|k| { + let t = (k as f64) / (count as f64); + boxed_qp(vec![-2.0 * t, -2.0 * (1.0 - t)]) + }) + .collect(); + + let t0 = Instant::now(); + let batched = solve_qp_batch_parallel(&probs, &opts, serial_backend); + let par = t0.elapsed().as_secs_f64() * 1e3; + + // Sequential reference for comparison. + let t1 = Instant::now(); + let seq: Vec<_> = probs + .iter() + .map(|p| pounce_convex::solve_qp_ipm(p, &opts, backend)) + .collect(); + let seq_ms = t1.elapsed().as_secs_f64() * 1e3; + + let all_ok = batched + .iter() + .zip(&seq) + .all(|(b, s)| (b.obj - s.obj).abs() < 1e-9); + println!( + "{count:>5} QPs: batch(par) {par:>8.1} ms sequential {seq_ms:>8.1} ms \ + speedup {:.2}× (results match: {all_ok})", + seq_ms / par, + ); + } + + println!("\nEach QP solves independently (own factor + iterate), so the"); + println!("batch is embarrassingly parallel; rayon balances uneven iteration"); + println!("counts across instances."); +} diff --git a/crates/pounce-convex/examples/iter_compare.rs b/crates/pounce-convex/examples/iter_compare.rs new file mode 100644 index 00000000..23d3cdee --- /dev/null +++ b/crates/pounce-convex/examples/iter_compare.rs @@ -0,0 +1,78 @@ +//! Iteration-count comparison: the convex-QP IPM on the same QPs the +//! CLI exposes as builtins, so the counts line up against the NLP path +//! (`pounce --problem ` reports "Number of Iterations"). +//! +//! Run: `cargo run -p pounce-convex --example iter_compare` + +use pounce_convex::{solve_qp_ipm, QpOptions, QpProblem, Triplet}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +fn report(name: &str, prob: &QpProblem) { + let sol = solve_qp_ipm(prob, &QpOptions::default(), backend); + println!( + "{name:<20} status={:?} iters={} obj={:.6} x={:?}", + sol.status, sol.iters, sol.obj, sol.x + ); +} + +fn main() { + // `quadratic`: min (x0-3)^2 + (x1-4)^2 ⇒ ½xᵀ(2I)x + (-6,-8)ᵀx + const + // P = 2I, c = (-6, -8). (constant 25 dropped; affects obj only) + report( + "quadratic", + &QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-6.0, -8.0], + a: vec![], + b: vec![], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }, + ); + + // `bounded-quadratic`: same objective, 0 ≤ x ≤ 2 (so optimum at the + // upper bounds (2,2)). Bounds as four inequality rows. + report( + "bounded-quadratic", + &QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-6.0, -8.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), // x0 ≤ 2 + Triplet::new(1, 1, 1.0), // x1 ≤ 2 + Triplet::new(2, 0, -1.0), // x0 ≥ 0 + Triplet::new(3, 1, -1.0), // x1 ≥ 0 + ], + h: vec![2.0, 2.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }, + ); + + // `eq-quadratic`: min x0² + x1² s.t. x0 + x1 = 1 ⇒ P = 2I, c = 0. + report( + "eq-quadratic", + &QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![0.0, 0.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + b: vec![1.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }, + ); +} diff --git a/crates/pounce-convex/examples/presolve_reductions.rs b/crates/pounce-convex/examples/presolve_reductions.rs new file mode 100644 index 00000000..e45a9989 --- /dev/null +++ b/crates/pounce-convex/examples/presolve_reductions.rs @@ -0,0 +1,296 @@ +//! Demonstrates the LP/QP presolve reductions and the rayon-parallel +//! duplicate-row detection, reporting the size reduction and the solve. +//! +//! Run: `cargo run -p pounce-convex --release --example presolve_reductions` + +use pounce_convex::presolve::{presolve, solve_with_presolve, PresolveOutcome}; +use pounce_convex::{solve_qp_ipm, QpOptions, QpProblem, QpStatus, Triplet}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; +use std::time::Instant; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +fn report(name: &str, prob: &QpProblem) { + print!("{name:<34} {}×{} → ", prob.n, prob.m_eq() + prob.m_ineq()); + match presolve(prob) { + PresolveOutcome::Infeasible => println!("INFEASIBLE (detected in presolve)"), + PresolveOutcome::Unbounded => println!("UNBOUNDED (detected in presolve)"), + PresolveOutcome::Reduced(ps) => { + let r = &ps.reduced; + let sol = + solve_with_presolve(prob, |p| solve_qp_ipm(p, &QpOptions::default(), backend)); + println!( + "{}×{} solve: {:?} obj={:.4}", + r.n, + r.m_eq() + r.m_ineq(), + sol.status, + sol.obj + ); + assert_eq!(sol.status, QpStatus::Optimal); + } + } +} + +fn main() { + println!("=== reduction showcase (original → reduced size) ==="); + + // Free column with zero cost: x1 is irrelevant and removed. + report( + "free column (dropped)", + &QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0)], + c: vec![0.0, 0.0], + a: vec![Triplet::new(0, 0, 1.0)], + b: vec![2.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }, + ); + + // Free column with nonzero cost: unbounded, detected without solving. + report( + "free column (unbounded)", + &QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0)], + c: vec![0.0, -1.0], + a: vec![], + b: vec![], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }, + ); + + // Fixed variable from a singleton equality row. + report( + "fixed variable (singleton eq)", + &QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![0.0, 0.0], + a: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(1, 1, 1.0), // x1 = 1 + ], + b: vec![3.0, 1.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }, + ); + + // Conflicting duplicate equalities: infeasible. + report( + "conflicting duplicate eq", + &QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![0.0, 0.0], + a: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(1, 0, 1.0), + Triplet::new(1, 1, 1.0), + ], + b: vec![2.0, 3.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }, + ); + + // Activity-redundant inequality: with x ∈ [0,1]², `x0+x1 ≤ 5` has + // max activity 2 ≤ 5, so it is always satisfied and dropped. + report( + "redundant ineq (activity)", + &QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-1.0, -1.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + h: vec![5.0], + lb: vec![0.0, 0.0], + ub: vec![1.0, 1.0], + }, + ); + + // Activity-infeasible equality: with x ∈ [0,1]², `x0+x1 = 5` is + // outside the activity range [0, 2]. + report( + "infeasible eq (activity)", + &QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![0.0, 0.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + b: vec![5.0], + g: vec![], + h: vec![], + lb: vec![0.0, 0.0], + ub: vec![1.0, 1.0], + }, + ); + + // Forcing inequality: with x ∈ [0,5]², `x0+x1 ≤ 0` has min activity + // 0 = h, so it holds only at x0=x1=0 — both variables pinned, row + // dropped. (Dual recovered exactly in postsolve.) + report( + "forcing ineq (pins to bounds)", + &QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + c: vec![-2.0, -3.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + h: vec![0.0], + lb: vec![0.0, 0.0], + ub: vec![5.0, 5.0], + }, + ); + + // Parallel inequalities (scalar multiple): `x0+x1 ≤ 3` and + // `2x0+2x1 ≤ 2` (⟺ x0+x1 ≤ 1). The tighter is kept, the other dropped. + report( + "parallel ineq (keep tightest)", + &QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-10.0, -10.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(1, 0, 2.0), + Triplet::new(1, 1, 2.0), + ], + h: vec![3.0, 2.0], + lb: vec![], + ub: vec![], + }, + ); + + // Forcing equality at the max vertex: with x ∈ [0,4]², `x0+x1 = 8` + // equals the max activity 8, pinning x0=x1=4. + report( + "forcing eq (max vertex)", + &QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + c: vec![1.0, 5.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + b: vec![8.0], + g: vec![], + h: vec![], + lb: vec![0.0, 0.0], + ub: vec![4.0, 4.0], + }, + ); + + // Bound tightening: `2·x0 ≤ 3` implies x0 ≤ 1.5, tighter than the box + // [0,10]; the reduced box is shrunk (the variable is kept). + report( + "bound tightening (shrink box)", + &QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-10.0, -10.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 2.0)], + h: vec![3.0], + lb: vec![0.0, 0.0], + ub: vec![10.0, 10.0], + }, + ); + + // Dominated column: x2 is not in P, appears only in the `≤` row with a + // nonnegative coefficient, and has cost ≥ 0 — so x2 = lb is optimal; + // it is fixed and dropped. + report( + "dominated column (→ bound)", + &QpProblem { + n: 3, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-4.0, -4.0, 0.5], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(0, 2, 1.0), + ], + h: vec![3.0], + lb: vec![0.0, 0.0, 0.0], + ub: vec![5.0, 5.0, 5.0], + }, + ); + + // Free column singleton: x2 (free, only in the equality row) is + // substituted out, eliminating both the variable and the row. + report( + "free col singleton (subst)", + &QpProblem { + n: 3, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![0.0, 0.0, 0.0], + a: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(0, 2, 1.0), + ], + b: vec![3.0], + g: vec![], + h: vec![], + lb: vec![f64::NEG_INFINITY, f64::NEG_INFINITY, f64::NEG_INFINITY], + ub: vec![f64::INFINITY, f64::INFINITY, f64::INFINITY], + }, + ); + + println!("\n=== rayon-parallel duplicate-row detection at scale ==="); + for &(n, k) in &[(50usize, 200usize), (100, 1000), (200, 4000)] { + let mut p_lower = Vec::new(); + for i in 0..n { + p_lower.push(Triplet::new(i, i, 2.0)); + } + // K identical equality rows Σx_i = n; presolve collapses to 1. + let mut a = Vec::new(); + for row in 0..k { + for i in 0..n { + a.push(Triplet::new(row, i, 1.0)); + } + } + let prob = QpProblem { + n, + p_lower, + c: vec![0.0; n], + a, + b: vec![n as f64; k], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let t0 = Instant::now(); + let reduced_rows = match presolve(&prob) { + PresolveOutcome::Reduced(ps) => ps.reduced.m_eq(), + _ => unreachable!(), + }; + let dt = t0.elapsed().as_secs_f64() * 1e3; + println!("n={n:<4} {k} duplicate eq rows → {reduced_rows} kept (presolve {dt:.2} ms)"); + } +} diff --git a/crates/pounce-convex/examples/scaling.rs b/crates/pounce-convex/examples/scaling.rs new file mode 100644 index 00000000..2e97a65f --- /dev/null +++ b/crates/pounce-convex/examples/scaling.rs @@ -0,0 +1,178 @@ +//! Scaling sweep for the convex-QP IPM: small dense → large sparse. +//! +//! A healthy interior-point method keeps the *iteration count* roughly +//! flat as the problem grows (that is the defining property of IPMs); +//! wall-clock is then dominated by the per-iteration sparse +//! factorization. This harness sweeps problem size for two families and +//! prints iters + timing so regressions in either dimension are visible. +//! +//! Run: `cargo run -p pounce-convex --release --example scaling` +//! +//! Families: +//! - **dense small**: fully dense PSD Hessian, box bounds. n = 5..50. +//! - **sparse large**: tridiagonal PSD Hessian, box bounds. n up to 1e5. +//! The KKT factor stays sparse, so this is where an IPM should shine. + +use pounce_convex::{solve_qp_ipm, QpOptions, QpProblem, QpStatus, Triplet}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; +use std::time::Instant; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +/// Dense PSD Hessian `P = A Aᵀ + I`-style: here we just use a full lower +/// triangle with diagonal dominance so it is SPD and genuinely dense. +fn dense_box_qp(n: usize) -> QpProblem { + let mut p_lower = Vec::new(); + for i in 0..n { + for j in 0..=i { + let v = if i == j { + n as f64 + 1.0 // diagonally dominant ⇒ SPD + } else { + 0.5 + }; + p_lower.push(Triplet::new(i, j, v)); + } + } + let c: Vec = (0..n).map(|i| -1.0 - (i % 7) as f64).collect(); + let (g, h) = box_bounds(n, 0.0, 1.0); + QpProblem { + n, + p_lower, + c, + a: vec![], + b: vec![], + g, + h, + lb: vec![], + ub: vec![], + } +} + +/// Sparse tridiagonal PSD Hessian with box bounds. +fn sparse_box_qp(n: usize) -> QpProblem { + let mut p_lower = Vec::with_capacity(2 * n); + for i in 0..n { + p_lower.push(Triplet::new(i, i, 4.0)); // dominates the ±1 off-diagonals + if i > 0 { + p_lower.push(Triplet::new(i, i - 1, -1.0)); + } + } + let c: Vec = (0..n).map(|i| -2.0 - (i % 5) as f64).collect(); + let (g, h) = box_bounds(n, 0.0, 1.0); + QpProblem { + n, + p_lower, + c, + a: vec![], + b: vec![], + g, + h, + lb: vec![], + ub: vec![], + } +} + +/// Box bounds `lo ≤ x_i ≤ hi` as 2n inequality rows. +fn box_bounds(n: usize, lo: f64, hi: f64) -> (Vec, Vec) { + let mut g = Vec::with_capacity(2 * n); + let mut h = Vec::with_capacity(2 * n); + for i in 0..n { + g.push(Triplet::new(2 * i, i, 1.0)); // x_i ≤ hi + h.push(hi); + g.push(Triplet::new(2 * i + 1, i, -1.0)); // −x_i ≤ −lo + h.push(-lo); + } + (g, h) +} + +fn run(label: &str, prob: &QpProblem) { + let nnz_p = prob.p_lower.len(); + let m = prob.m_ineq(); + let t0 = Instant::now(); + let sol = solve_qp_ipm(prob, &QpOptions::default(), backend); + let dt = t0.elapsed().as_secs_f64() * 1e3; + let per_iter = if sol.iters > 0 { + dt / sol.iters as f64 + } else { + dt + }; + println!( + "{label:<14} n={:<7} m={:<8} nnz(P)={:<8} | {:<14} iters={:<3} {:>9.1} ms ({:>6.2} ms/iter) obj={:.4}", + prob.n, + m, + nnz_p, + format!("{:?}", sol.status), + sol.iters, + dt, + per_iter, + sol.obj, + ); + assert_eq!(sol.status, QpStatus::Optimal, "{label} n={} failed", prob.n); +} + +fn main() { + println!("=== dense small box-constrained QPs ==="); + for &n in &[5usize, 10, 20, 50, 100] { + run("dense", &dense_box_qp(n)); + } + + println!("\n=== sparse large box-constrained QPs (tridiagonal P) ==="); + for &n in &[100usize, 1_000, 10_000, 50_000, 100_000] { + run("sparse", &sparse_box_qp(n)); + } + + println!("\n=== per-iteration cost breakdown ==="); + breakdown(&sparse_box_qp(10_000)); + breakdown(&sparse_box_qp(100_000)); + + println!("\nIPM health check:"); + println!("- iteration count stays flat (9-10) across 5 orders of magnitude → the"); + println!(" algorithm is healthy."); + println!("- the loop pays a numeric `refactor` + 2 back-solves per iteration, NOT a"); + println!(" fresh symbolic factorization (constant-pattern reuse)."); + println!("- residual super-linear growth is in feral's numeric factor/solve, i.e."); + println!(" the shared pounce-linsol backbone — improving it benefits the NLP path"); + println!(" too and is out of scope for the QP solver."); +} + +/// One-shot breakdown of a single iteration's cost: KKT triplet assembly +/// vs. building a fresh `Factorization` (symbolic analysis + ordering + +/// numeric factor) vs. a back-solve. Isolates whether the per-iteration +/// cost is dominated by re-doing the symbolic factorization each step. +fn breakdown(prob: &QpProblem) { + use pounce_common::types::Index; + use pounce_linsol::Factorization; + + let n = prob.n; + let m = prob.m_ineq(); + let dim = n + m; + // Representative scaling vector (all ones). + let scaling = vec![1.0_f64; m]; + + let t0 = Instant::now(); + let (airn, ajcn, vals) = pounce_convex::ipm::assemble_kkt_for_bench(prob, &scaling, 1e-8, dim); + let t_assemble = t0.elapsed().as_secs_f64() * 1e3; + let vals_copy = vals.clone(); + + let t1 = Instant::now(); + let mut fact = Factorization::new(dim as Index, airn, ajcn, vals, backend()).expect("factor"); + let t_factor = t1.elapsed().as_secs_f64() * 1e3; + + let mut rhs = vec![1.0; dim]; + let t2 = Instant::now(); + fact.solve_one(&mut rhs).expect("solve"); + let t_solve = t2.elapsed().as_secs_f64() * 1e3; + + // Numeric-only refactor (what the loop actually pays each iteration). + let t3 = Instant::now(); + fact.refactor(&vals_copy).expect("refactor"); + let t_refactor = t3.elapsed().as_secs_f64() * 1e3; + + println!( + " assemble(BTreeMap)={t_assemble:.1} ms factor(new+symbolic)={t_factor:.1} ms refactor(numeric)={t_refactor:.1} ms back-solve={t_solve:.1} ms" + ); + println!(" → the loop pays refactor + 2×back-solve per iteration (not the symbolic factor)."); +} diff --git a/crates/pounce-convex/examples/warm_start.rs b/crates/pounce-convex/examples/warm_start.rs new file mode 100644 index 00000000..72011e4a --- /dev/null +++ b/crates/pounce-convex/examples/warm_start.rs @@ -0,0 +1,82 @@ +//! Warm starting the convex-QP IPM across a sequence of nearby problems. +//! +//! A common pattern (parametric / receding-horizon / training-loop +//! solving) is to solve a sequence of QPs that differ only slightly. Each +//! solve's solution is a good warm start for the next. This example +//! solves a path of perturbed problems cold vs. warm and prints the +//! per-solve iteration counts and the total. +//! +//! Run: `cargo run -p pounce-convex --example warm_start` + +use pounce_convex::{solve_qp_ipm, solve_qp_ipm_warm, QpOptions, QpProblem, QpWarmStart, Triplet}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +/// An ill-conditioned QP: `min ½ xᵀ diag(d) x + cᵀx s.t. Σx ≤ cap, +/// 0 ≤ x ≤ 10`, with a wide eigenvalue spread `d ∈ [1, cond]` so the cold +/// solve takes enough interior-point iterations to leave room for warm +/// starting to matter (trivially easy QPs converge in ~7 iters cold, +/// hiding the benefit). +fn capped_qp(c: &[f64], cap: f64) -> QpProblem { + let n = c.len(); + let cond = 1e4_f64; + let p_lower: Vec = (0..n) + .map(|i| { + let t = i as f64 / (n.max(2) as f64 - 1.0); + Triplet::new(i, i, 2.0 * cond.powf(t)) + }) + .collect(); + QpProblem { + n, + p_lower, + c: c.to_vec(), + a: vec![], + b: vec![], + g: (0..n).map(|i| Triplet::new(0, i, 1.0)).collect(), + h: vec![cap], + lb: vec![0.0; n], + ub: vec![10.0; n], + } +} + +fn main() { + let opts = QpOptions::default(); + let n = 40; + let base_c: Vec = (0..n).map(|i| -1.0 - (i as f64) * 0.05).collect(); + + // A path of 8 problems, each a small (~0.5%) perturbation of the + // previous — the parametric / receding-horizon regime where the active + // set is stable and warm starting helps most. + let steps = 8; + let mut cold_total = 0usize; + let mut warm_total = 0usize; + + // Seed the warm path with the first cold solve. + let mut prev = solve_qp_ipm(&capped_qp(&base_c, 5.0), &opts, backend); + + println!("{:<6} {:>10} {:>10}", "step", "cold_iters", "warm_iters"); + for k in 0..steps { + let scale = 1.0 + 0.005 * (k as f64 + 1.0); + let c: Vec = base_c.iter().map(|v| v * scale).collect(); + let cap = 5.0 + 0.02 * (k as f64 + 1.0); + let prob = capped_qp(&c, cap); + + let cold = solve_qp_ipm(&prob, &opts, backend); + let warm = solve_qp_ipm_warm(&prob, &opts, &QpWarmStart::from_solution(&prev), backend); + + println!("{:<6} {:>10} {:>10}", k, cold.iters, warm.iters); + cold_total += cold.iters; + warm_total += warm.iters; + prev = warm; // chain: next warm start is this solution + } + + println!( + "\ntotal iters: cold={cold_total} warm={warm_total} \ + ({:.0}% fewer with warm start)", + 100.0 * (cold_total as f64 - warm_total as f64) / cold_total as f64 + ); +} diff --git a/crates/pounce-convex/src/batch.rs b/crates/pounce-convex/src/batch.rs new file mode 100644 index 00000000..5092a6d3 --- /dev/null +++ b/crates/pounce-convex/src/batch.rs @@ -0,0 +1,207 @@ +//! Batched convex-QP solving (multiple right-hand sides / scenarios). +//! +//! Companion to the single-problem [`solve_qp_ipm`](crate::solve_qp_ipm), +//! mirroring the batched / build-once-solve-many capability the JAX and +//! sensitivity layers grew in pounce#74–#77 (parallel `batched_solve`, +//! `kkt_solve_many`): solve a *family* of convex QPs that share the same +//! structure but differ in their data, reusing one backend factory and +//! running the instances in parallel with rayon. +//! +//! Two entry points cover the two shapes that matter: +//! +//! - [`solve_qp_batch`] — a slice of independent [`QpProblem`]s (same +//! dimensions, typically the same `P`/`A`/`G` with varying `c`/`b`/`h`/ +//! bounds, as in scenario sweeps or MPC). Each is solved end-to-end; +//! instances run concurrently. +//! - [`solve_qp_multi_rhs`] — one fixed QP *structure* with many linear +//! objectives `c` (the classic "multiple RHS" case: same `P`/`A`/`G`/ +//! `b`/`h`/bounds, different `c`). A thin convenience over +//! [`solve_qp_batch`] that builds the per-`c` problems for you. +//! +//! Parallelism. Each QP solve is fully independent (its own factorization +//! and iterate), so the batch is embarrassingly parallel *across +//! instances*. There is an important interaction, though: the default +//! factorization backend (feral) is itself recursive and rayon-parallel +//! *within* a single factor. Running many instances on rayon while each +//! also parallelizes internally oversubscribes the cores (and can +//! overflow a worker stack on large batches), so it is typically *slower* +//! than either level of parallelism alone. +//! +//! The right model for a batch of many smallish QPs is **outer-parallel, +//! inner-serial**: parallelize across instances and make each factor +//! serial. [`solve_qp_batch_parallel`] runs the instances on rayon's global +//! pool and each worker builds its **own serial backend** from the supplied +//! `make_backend` factory. The factory is therefore expected to produce an +//! inner-serial backend (e.g. `pounce_feral::FeralSolverInterface::serial`); +//! the toggle is a per-backend setting, not global state. The serial feral +//! driver factorizes supernodes in a flat postorder loop (bounded stack), +//! so the batch needs no oversized worker stacks — unlike feral's *parallel* +//! driver, which climbs the elimination tree recursively and was the reason +//! an earlier version provisioned a custom 64 MiB-stack pool. The default +//! [`solve_qp_batch`] is sequential: predictable, contention-free, and the +//! right choice when each individual factor is large enough to parallelize +//! on its own. The `make_backend` factory is shared by reference and called +//! once per instance, so it must be `Sync`. + +use crate::ipm::{solve_qp_ipm, solve_qp_ipm_warm, QpOptions, QpWarmStart}; +use crate::qp::{QpProblem, QpSolution}; +use pounce_linsol::SparseSymLinearSolverInterface; +use rayon::prelude::*; + +/// Solve a batch of convex QPs in parallel, returning one solution per +/// input in the same order. +/// +/// Solves the instances **sequentially**, reusing the one `make_backend` +/// factory. Predictable and contention-free; the right choice when each +/// individual factor is large enough to parallelize on its own (feral +/// does that internally). For many small QPs where cross-instance +/// parallelism wins, use [`solve_qp_batch_parallel`]. +/// +/// The problems are independent — each is solved cold. When the +/// instances share a *fixed structure* (same `A`/`G`/`P` sparsity and the +/// same set of finite bounds, varying only `c`/`b`/`h`/bound values), +/// [`QpFactorization`](crate::QpFactorization) builds the KKT symbolic +/// factor once and reuses it across solves, avoiding repeated AMD +/// ordering / symbolic analysis. +pub fn solve_qp_batch( + probs: &[QpProblem], + opts: &QpOptions, + mut make_backend: F, +) -> Vec +where + F: FnMut() -> Box, +{ + probs + .iter() + .map(|prob| solve_qp_ipm(prob, opts, &mut make_backend)) + .collect() +} + +/// Solve a batch in parallel **across instances**. Best for many small / +/// medium QPs, where cross-instance throughput beats parallelizing each +/// factor internally. +/// +/// Runs on rayon's global pool. `make_backend` must be `Sync`; it is called +/// once per instance on the worker that runs it, so each worker gets its +/// **own** backend. +/// +/// For the outer-parallel / inner-serial win, pass a `make_backend` that +/// builds an *inner-serial* backend (e.g. +/// `pounce_feral::FeralSolverInterface::serial`) — that keeps the only +/// parallelism across instances, avoiding the oversubscription that makes a +/// parallel-over-parallel batch slower. The toggle is a per-backend setting +/// with no global state, so concurrent feral solves on other threads are +/// unaffected. The serial feral factor uses a flat (bounded-stack) +/// supernode loop, so no oversized worker stacks are needed. +/// +/// Results are returned in input order regardless of completion order. +pub fn solve_qp_batch_parallel( + probs: &[QpProblem], + opts: &QpOptions, + make_backend: F, +) -> Vec +where + F: Fn() -> Box + Sync, +{ + probs + .par_iter() + .map(|prob| solve_qp_ipm(prob, opts, &make_backend)) + .collect() +} + +/// Warm-started parallel batch: like [`solve_qp_batch_parallel`] but each +/// instance is seeded from the corresponding entry of `warms` (typically +/// the previous step's solutions for a sequence of nearby batches, as in +/// receding-horizon / training-loop solving). See [`QpWarmStart`] for the +/// recentering strategy; a warm start only affects an instance's iteration +/// count, not its solution, and a per-instance dimension mismatch falls +/// back to that instance's cold start. +/// +/// # Panics +/// Panics if `warms.len() != probs.len()`. +pub fn solve_qp_batch_parallel_warm( + probs: &[QpProblem], + warms: &[QpWarmStart], + opts: &QpOptions, + make_backend: F, +) -> Vec +where + F: Fn() -> Box + Sync, +{ + assert_eq!( + warms.len(), + probs.len(), + "warms.len() ({}) must equal probs.len() ({})", + warms.len(), + probs.len() + ); + probs + .par_iter() + .zip(warms.par_iter()) + .map(|(prob, warm)| solve_qp_ipm_warm(prob, opts, warm, &make_backend)) + .collect() +} + +/// Solve one QP structure against many linear objectives `c` +/// (sequentially; see [`solve_qp_batch`]). +/// +/// All of `P`, `A`, `b`, `G`, `h`, and the bounds come from `base`; each +/// entry of `cs` (each length `base.n`) replaces `base.c`. Returns one +/// solution per `c`, in order. +/// +/// This is the convex-solver analogue of the sensitivity layer's +/// `kkt_solve_many` "multiple RHS" call, but at the optimization level: +/// each RHS is a different objective, so each is a full QP solve (the KKT +/// system changes with the iterate), not a shared back-substitution. +/// +/// # Panics +/// Panics if any `c` in `cs` does not have length `base.n`. +pub fn solve_qp_multi_rhs( + base: &QpProblem, + cs: &[Vec], + opts: &QpOptions, + make_backend: F, +) -> Vec +where + F: FnMut() -> Box, +{ + let probs = multi_rhs_problems(base, cs); + solve_qp_batch(&probs, opts, make_backend) +} + +/// Parallel counterpart of [`solve_qp_multi_rhs`] (see +/// [`solve_qp_batch_parallel`] for the parallelism model). +/// +/// # Panics +/// Panics if any `c` in `cs` does not have length `base.n`. +pub fn solve_qp_multi_rhs_parallel( + base: &QpProblem, + cs: &[Vec], + opts: &QpOptions, + make_backend: F, +) -> Vec +where + F: Fn() -> Box + Sync, +{ + let probs = multi_rhs_problems(base, cs); + solve_qp_batch_parallel(&probs, opts, make_backend) +} + +/// Build the per-objective problem list for the multi-RHS entry points. +fn multi_rhs_problems(base: &QpProblem, cs: &[Vec]) -> Vec { + for (k, c) in cs.iter().enumerate() { + assert_eq!( + c.len(), + base.n, + "cs[{k}] has length {}, expected n = {}", + c.len(), + base.n + ); + } + cs.iter() + .map(|c| QpProblem { + c: c.clone(), + ..base.clone() + }) + .collect() +} diff --git a/crates/pounce-convex/src/cones/chordal.rs b/crates/pounce-convex/src/cones/chordal.rs new file mode 100644 index 00000000..552f31fe --- /dev/null +++ b/crates/pounce-convex/src/cones/chordal.rs @@ -0,0 +1,152 @@ +//! Chordal-graph analysis for sparse SDP decomposition (Phase H7 sparsity). +//! +//! The range-space chordal decomposition of a sparse PSD constraint +//! `smat(s) ⪰ 0` (with `s` supported on a pattern `E`) rewrites it as a sum +//! of clique-supported PSD blocks (Agler–Helton–McCullough–Rodman): for a +//! **chordal** `E` with maximal cliques `C₁…C_p`, +//! +//! ```text +//! s ⪰ 0 ⟺ s = Σ_k Tᵀ_{C_k} S_k T_{C_k}, S_k ⪰ 0, +//! ``` +//! +//! where `T_{C_k}` selects the rows/cols in clique `C_k`. This module does +//! the graph part: take the aggregate sparsity pattern, compute a **chordal +//! extension** by symbolic elimination (natural order + fill), and read off +//! the **maximal cliques** — the data the conic-program reformulation needs. +//! +//! The elimination is the textbook one (Vandenberghe & Andersen, *Chordal +//! Graphs and Semidefinite Optimization*, §4): eliminating vertex `v` makes +//! its still-present higher-ordered neighbors a clique (adding fill edges); +//! `clique(v) = {v} ∪ higher-neighbors(v)` in the filled graph, and the +//! maximal such sets are the maximal cliques of the chordal completion. + +use std::collections::BTreeSet; + +/// The chordal completion of a sparsity pattern: its maximal cliques (each a +/// sorted, ascending vertex list). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Chordal { + pub n: usize, + /// Maximal cliques of the chordal completion, each sorted ascending. + pub cliques: Vec>, +} + +impl Chordal { + /// Whether the completion is a single clique covering everything — i.e. + /// the pattern is (effectively) dense, so decomposition buys nothing. + pub fn is_single_block(&self) -> bool { + self.cliques.len() == 1 && self.cliques[0].len() == self.n + } +} + +/// Compute the chordal completion (maximal cliques) of the undirected graph +/// on `0..n` with the given `edges` (off-diagonal pattern entries). The +/// natural elimination order `0,1,…,n−1` is used; for SDPs whose variables +/// are already laid out band-like this is a good order, and correctness does +/// not depend on it (any order yields a valid — if larger — chordal cover). +pub fn analyze(n: usize, edges: &[(usize, usize)]) -> Chordal { + // Adjacency as sorted sets. + let mut adj: Vec> = vec![BTreeSet::new(); n]; + for &(a, b) in edges { + if a != b { + adj[a].insert(b); + adj[b].insert(a); + } + } + + // Symbolic elimination in natural order, accumulating fill. `clique(v)` + // is `{v}` plus the neighbors of `v` that are eliminated later. + let mut clique_sets: Vec> = Vec::with_capacity(n); + for v in 0..n { + let higher: Vec = adj[v].iter().copied().filter(|&u| u > v).collect(); + // Make the higher neighbors a clique (fill edges). + for i in 0..higher.len() { + for j in (i + 1)..higher.len() { + let (a, b) = (higher[i], higher[j]); + adj[a].insert(b); + adj[b].insert(a); + } + } + let mut c: BTreeSet = higher.into_iter().collect(); + c.insert(v); + clique_sets.push(c); + } + + // Keep only the maximal sets (drop any that is a subset of another). + let mut maximal: Vec> = Vec::new(); + for (i, ci) in clique_sets.iter().enumerate() { + let subsumed = clique_sets + .iter() + .enumerate() + .any(|(j, cj)| j != i && ci.len() < cj.len() && ci.is_subset(cj)); + // Among equal-size duplicates keep the first occurrence only. + let dup_earlier = clique_sets[..i].iter().any(|cj| cj == ci); + if !subsumed && !dup_earlier { + maximal.push(ci.iter().copied().collect()); + } + } + + Chordal { + n, + cliques: maximal, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn sorted(mut cliques: Vec>) -> Vec> { + cliques.iter_mut().for_each(|c| c.sort_unstable()); + cliques.sort(); + cliques + } + + #[test] + fn path_graph_cliques_are_consecutive_pairs() { + // 0–1–2–3 (already chordal): maximal cliques {0,1},{1,2},{2,3}. + let c = analyze(4, &[(0, 1), (1, 2), (2, 3)]); + assert!(!c.is_single_block()); + assert_eq!(sorted(c.cliques), vec![vec![0, 1], vec![1, 2], vec![2, 3]]); + } + + #[test] + fn two_disjoint_edges_give_two_cliques() { + // 0–1 and 2–3: block-diagonal pattern → cliques {0,1},{2,3}. + let c = analyze(4, &[(0, 1), (2, 3)]); + assert_eq!(sorted(c.cliques), vec![vec![0, 1], vec![2, 3]]); + } + + #[test] + fn dense_triangle_is_single_block() { + // Fully connected 3-vertex graph → one clique {0,1,2}. + let c = analyze(3, &[(0, 1), (0, 2), (1, 2)]); + assert!(c.is_single_block()); + assert_eq!(sorted(c.cliques), vec![vec![0, 1, 2]]); + } + + #[test] + fn cycle_gets_chordal_fill() { + // 4-cycle 0–1–2–3–0 is NOT chordal; natural-order elimination fills + // chord(s) so the completion's cliques cover it. Eliminating 0 (nbrs + // 1,3) adds edge 1–3; the maximal cliques become {0,1,3} and {1,2,3}. + let c = analyze(4, &[(0, 1), (1, 2), (2, 3), (3, 0)]); + let cl = sorted(c.cliques); + // Every original edge must sit inside some clique. + for &(a, b) in &[(0, 1), (1, 2), (2, 3), (3, 0)] { + assert!( + cl.iter().any(|c| c.contains(&a) && c.contains(&b)), + "edge ({a},{b}) not covered by {cl:?}" + ); + } + // And it genuinely decomposed (no single 4-clique). + assert!(cl.iter().all(|c| c.len() < 4)); + } + + #[test] + fn isolated_vertices_are_singleton_cliques() { + // No edges: each vertex is its own clique. + let c = analyze(3, &[]); + assert_eq!(sorted(c.cliques), vec![vec![0], vec![1], vec![2]]); + } +} diff --git a/crates/pounce-convex/src/cones/composite.rs b/crates/pounce-convex/src/cones/composite.rs new file mode 100644 index 00000000..39925594 --- /dev/null +++ b/crates/pounce-convex/src/cones/composite.rs @@ -0,0 +1,386 @@ +//! Composite cone — a Cartesian product of cones over which the IPM keeps +//! one stacked slack `s` and dual `z`. +//! +//! The inequality block of a convex program is in general a product +//! `K = R₊^{n₀} × SOC(m₁) × …`. [`CompositeCone`] owns an ordered list of +//! `(offset, ConeKind)` blocks and implements [`Cone`] by dispatching every +//! operation block-wise over the matching slices of `s`/`z`. The IPM driver +//! holds a `CompositeCone` and stays cone-agnostic. +//! +//! Phase 1 of the SOCP extension (see `dev-notes/socp-extension.md`) ships +//! only a single nonnegative-orthant block, so this is bit-identical to the +//! previous bare [`NonnegCone`] path; the seam exists so SOC (and later +//! cones) plug in as new [`ConeKind`] variants without touching the driver. + +use super::{Cone, ConeBlock, NonnegCone, PsdCone, SecondOrderCone}; + +/// Declarative description of one cone block in a problem's inequality +/// partition (the data form; [`ConeKind`] is the runtime form). The blocks +/// stack in order to cover the `m_ineq` inequality rows. +// `Eq` is intentionally not derived: `Power(f64)` carries a float exponent. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ConeSpec { + /// Nonnegative orthant of the given number of rows. + Nonneg(usize), + /// Second-order cone of the given dimension (`≥ 1`). + SecondOrder(usize), + /// 3-dimensional exponential cone. **Non-symmetric** — a problem + /// containing this routes to the non-symmetric HSDE driver + /// ([`crate::hsde_nonsym`]), not the symmetric path; it is *not* a + /// [`ConeKind`] and must be intercepted before [`CompositeCone`] assembly. + Exponential, + /// 3-dimensional power cone `K_α = {|x₁| ≤ x₂^α x₃^{1−α}}` with exponent + /// `α ∈ (0, 1)`. **Non-symmetric** — routes to the non-symmetric HSDE + /// driver like [`ConeSpec::Exponential`]. + Power(f64), + /// Positive-semidefinite cone over symmetric `n×n` matrices (the stored + /// `usize` is the matrix size `n`). Self-scaled, so it stays on the + /// symmetric driver; it spans `n(n+1)/2` rows in `svec` coordinates. + Psd(usize), +} + +impl ConeSpec { + /// Number of inequality rows this block spans. + pub fn dim(&self) -> usize { + match self { + ConeSpec::Nonneg(n) | ConeSpec::SecondOrder(n) => *n, + ConeSpec::Exponential | ConeSpec::Power(_) => 3, + ConeSpec::Psd(n) => n * (n + 1) / 2, + } + } +} + +/// A single cone in the product. A closed enum (rather than `dyn Cone`) so +/// dispatch is a cheap match and new cones are added as variants. +#[derive(Debug, Clone)] +pub enum ConeKind { + /// Nonnegative orthant (LP/QP, and expanded variable bounds). + Nonneg(NonnegCone), + /// Second-order (Lorentz) cone. + SecondOrder(SecondOrderCone), + /// Positive-semidefinite cone (self-scaled; dense `W⊗ₛW` KKT block). + Psd(PsdCone), +} + +/// Dispatch a `Cone` call to whichever concrete cone this variant wraps. +macro_rules! dispatch { + ($self:ident, $c:ident => $body:expr) => { + match $self { + ConeKind::Nonneg($c) => $body, + ConeKind::SecondOrder($c) => $body, + ConeKind::Psd($c) => $body, + } + }; +} + +impl Cone for ConeKind { + fn degree(&self) -> usize { + dispatch!(self, c => c.degree()) + } + fn identity(&self, out: &mut [f64]) { + dispatch!(self, c => c.identity(out)) + } + fn dim(&self) -> usize { + dispatch!(self, c => c.dim()) + } + fn mu(&self, s: &[f64], z: &[f64]) -> f64 { + dispatch!(self, c => c.mu(s, z)) + } + fn scaling_diag(&self, s: &[f64], z: &[f64], out: &mut [f64]) { + dispatch!(self, c => c.scaling_diag(s, z, out)) + } + fn comp_residual(&self, s: &[f64], z: &[f64], sigma_mu: f64, out: &mut [f64]) { + dispatch!(self, c => c.comp_residual(s, z, sigma_mu, out)) + } + fn comp_residual_corrector( + &self, + s: &[f64], + z: &[f64], + ds_aff: &[f64], + dz_aff: &[f64], + sigma_mu: f64, + out: &mut [f64], + ) { + dispatch!(self, c => c.comp_residual_corrector(s, z, ds_aff, dz_aff, sigma_mu, out)) + } + fn recover_ds(&self, s: &[f64], z: &[f64], r_comp: &[f64], dz: &[f64], ds: &mut [f64]) { + dispatch!(self, c => c.recover_ds(s, z, r_comp, dz, ds)) + } + fn max_step(&self, v: &[f64], dv: &[f64], tau: f64) -> f64 { + dispatch!(self, c => c.max_step(v, dv, tau)) + } + fn kkt_block(&self, s: &[f64], z: &[f64]) -> ConeBlock { + dispatch!(self, c => c.kkt_block(s, z)) + } + fn rhs_comp_term(&self, s: &[f64], z: &[f64], r_comp: &[f64], out: &mut [f64]) { + dispatch!(self, c => c.rhs_comp_term(s, z, r_comp, out)) + } + fn recenter_warm(&self, s: &mut [f64], z: &mut [f64], floor: f64) { + dispatch!(self, c => c.recenter_warm(s, z, floor)) + } + fn in_dual_cone(&self, z: &[f64], tol: f64) -> bool { + dispatch!(self, c => c.in_dual_cone(z, tol)) + } +} + +/// A Cartesian product of cones, the cone of the IPM's stacked `(s, z)`. +#[derive(Debug, Clone)] +pub struct CompositeCone { + /// `(offset, cone)` for each block; offsets partition `0..dim`. + blocks: Vec<(usize, ConeKind)>, + dim: usize, + degree: usize, +} + +impl CompositeCone { + /// Build from an ordered list of cone blocks. Offsets are assigned by + /// stacking the blocks in the given order. + pub fn new(kinds: Vec) -> Self { + let mut blocks = Vec::with_capacity(kinds.len()); + let mut dim = 0; + let mut degree = 0; + for k in kinds { + degree += k.degree(); + let d = k.dim(); + blocks.push((dim, k)); + dim += d; + } + CompositeCone { + blocks, + dim, + degree, + } + } + + /// A single nonnegative-orthant block of dimension `n` — the cone of + /// LP/QP (and the Phase-1 default for any inequality block). + pub fn single_nonneg(n: usize) -> Self { + Self::new(vec![ConeKind::Nonneg(NonnegCone::new(n))]) + } + + /// Build from a declarative [`ConeSpec`] partition of the inequality + /// rows. An empty `specs` (or `m_ineq == 0`) yields an empty cone; the + /// common LP/QP case is a single `Nonneg` spec. + pub fn from_specs(specs: &[ConeSpec]) -> Self { + let kinds = specs + .iter() + .map(|s| match s { + ConeSpec::Nonneg(n) => ConeKind::Nonneg(NonnegCone::new(*n)), + ConeSpec::SecondOrder(m) => ConeKind::SecondOrder(SecondOrderCone::new(*m)), + ConeSpec::Psd(n) => ConeKind::Psd(PsdCone::new(*n)), + ConeSpec::Exponential | ConeSpec::Power(_) => unreachable!( + "non-symmetric cones (exponential/power) must route to \ + hsde_nonsym before CompositeCone assembly" + ), + }) + .collect(); + Self::new(kinds) + } + + /// The `(offset, cone)` blocks, in row order. Used by the KKT assembly + /// to place each block's scaling contribution (diagonal or dense). + pub fn blocks(&self) -> &[(usize, ConeKind)] { + &self.blocks + } +} + +impl Cone for CompositeCone { + fn degree(&self) -> usize { + self.degree + } + + fn identity(&self, out: &mut [f64]) { + for (off, k) in &self.blocks { + let d = k.dim(); + k.identity(&mut out[*off..off + d]); + } + } + + fn dim(&self) -> usize { + self.dim + } + + fn mu(&self, s: &[f64], z: &[f64]) -> f64 { + if self.degree == 0 { + return 0.0; + } + // μ = ⟨s,z⟩_total / degree_total. Each block's μ is its own + // ⟨s_b,z_b⟩ / degree_b, so block.mu · block.degree recovers the + // block dot without a separate inner-product method. + let mut dot = 0.0; + for (off, k) in &self.blocks { + let d = k.dim(); + dot += k.mu(&s[*off..off + d], &z[*off..off + d]) * k.degree() as f64; + } + dot / self.degree as f64 + } + + fn scaling_diag(&self, s: &[f64], z: &[f64], out: &mut [f64]) { + for (off, k) in &self.blocks { + let d = k.dim(); + k.scaling_diag( + &s[*off..off + d], + &z[*off..off + d], + &mut out[*off..off + d], + ); + } + } + + fn comp_residual(&self, s: &[f64], z: &[f64], sigma_mu: f64, out: &mut [f64]) { + for (off, k) in &self.blocks { + let d = k.dim(); + k.comp_residual( + &s[*off..off + d], + &z[*off..off + d], + sigma_mu, + &mut out[*off..off + d], + ); + } + } + + fn comp_residual_corrector( + &self, + s: &[f64], + z: &[f64], + ds_aff: &[f64], + dz_aff: &[f64], + sigma_mu: f64, + out: &mut [f64], + ) { + for (off, k) in &self.blocks { + let d = k.dim(); + k.comp_residual_corrector( + &s[*off..off + d], + &z[*off..off + d], + &ds_aff[*off..off + d], + &dz_aff[*off..off + d], + sigma_mu, + &mut out[*off..off + d], + ); + } + } + + fn recover_ds(&self, s: &[f64], z: &[f64], r_comp: &[f64], dz: &[f64], ds: &mut [f64]) { + for (off, k) in &self.blocks { + let d = k.dim(); + k.recover_ds( + &s[*off..off + d], + &z[*off..off + d], + &r_comp[*off..off + d], + &dz[*off..off + d], + &mut ds[*off..off + d], + ); + } + } + + fn max_step(&self, v: &[f64], dv: &[f64], tau: f64) -> f64 { + let mut alpha = 1.0_f64; + for (off, k) in &self.blocks { + let d = k.dim(); + alpha = alpha.min(k.max_step(&v[*off..off + d], &dv[*off..off + d], tau)); + } + alpha + } + + fn rhs_comp_term(&self, s: &[f64], z: &[f64], r_comp: &[f64], out: &mut [f64]) { + for (off, k) in &self.blocks { + let d = k.dim(); + k.rhs_comp_term( + &s[*off..off + d], + &z[*off..off + d], + &r_comp[*off..off + d], + &mut out[*off..off + d], + ); + } + } + + fn kkt_block(&self, _s: &[f64], _z: &[f64]) -> ConeBlock { + // A product cone has *multiple* blocks; the KKT assembly iterates + // `blocks()` and calls each block's `kkt_block` rather than asking + // the composite for a single one. + unimplemented!("use CompositeCone::blocks() for per-block kkt_block") + } + + fn recenter_warm(&self, s: &mut [f64], z: &mut [f64], floor: f64) { + for (off, k) in &self.blocks { + let d = k.dim(); + k.recenter_warm(&mut s[*off..off + d], &mut z[*off..off + d], floor); + } + } + + fn in_dual_cone(&self, z: &[f64], tol: f64) -> bool { + // The dual of a product cone is the product of the duals: every block + // must lie in its own dual cone. + self.blocks.iter().all(|(off, k)| { + let d = k.dim(); + k.in_dual_cone(&z[*off..off + d], tol) + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// A single-nonneg composite reproduces NonnegCone exactly. + #[test] + fn single_nonneg_matches_bare_orthant() { + let n = 4; + let comp = CompositeCone::single_nonneg(n); + let bare = NonnegCone::new(n); + let s = [1.0, 2.0, 0.5, 3.0]; + let z = [3.0, 1.0, 4.0, 0.5]; + + assert_eq!(comp.dim(), n); + assert_eq!(comp.degree(), n); + assert!((comp.mu(&s, &z) - bare.mu(&s, &z)).abs() < 1e-15); + + let (mut a, mut b) = ([0.0; 4], [0.0; 4]); + comp.scaling_diag(&s, &z, &mut a); + bare.scaling_diag(&s, &z, &mut b); + assert_eq!(a, b); + + comp.comp_residual(&s, &z, 0.7, &mut a); + bare.comp_residual(&s, &z, 0.7, &mut b); + assert_eq!(a, b); + + let dv = [-1.0, 0.5, -2.0, 1.0]; + assert!((comp.max_step(&s, &dv, 0.99) - bare.max_step(&s, &dv, 0.99)).abs() < 1e-15); + } + + /// Two stacked nonneg blocks behave like one orthant of the total size + /// (μ over the whole vector, min step over blocks). Guards the + /// block-dispatch arithmetic that SOC will rely on. + #[test] + fn two_blocks_compose_like_one_orthant() { + let comp = CompositeCone::new(vec![ + ConeKind::Nonneg(NonnegCone::new(2)), + ConeKind::Nonneg(NonnegCone::new(3)), + ]); + let whole = NonnegCone::new(5); + let s = [1.0, 2.0, 3.0, 0.5, 4.0]; + let z = [2.0, 1.0, 0.5, 4.0, 1.0]; + assert_eq!(comp.dim(), 5); + assert_eq!(comp.degree(), 5); + assert!((comp.mu(&s, &z) - whole.mu(&s, &z)).abs() < 1e-15); + + let dv = [-0.5, 1.0, -3.0, 0.2, -1.0]; + assert!((comp.max_step(&s, &dv, 0.95) - whole.max_step(&s, &dv, 0.95)).abs() < 1e-15); + + let (mut a, mut b) = ([0.0; 5], [0.0; 5]); + comp.recover_ds(&s, &z, &[0.1, 0.2, 0.3, 0.4, 0.5], &dv, &mut a); + whole.recover_ds(&s, &z, &[0.1, 0.2, 0.3, 0.4, 0.5], &dv, &mut b); + for i in 0..5 { + assert!((a[i] - b[i]).abs() < 1e-15); + } + } + + #[test] + fn empty_composite_is_inert() { + let comp = CompositeCone::single_nonneg(0); + assert_eq!(comp.dim(), 0); + assert_eq!(comp.degree(), 0); + assert_eq!(comp.mu(&[], &[]), 0.0); + assert_eq!(comp.max_step(&[], &[], 0.99), 1.0); + } +} diff --git a/crates/pounce-convex/src/cones/exp.rs b/crates/pounce-convex/src/cones/exp.rs new file mode 100644 index 00000000..e96352b3 --- /dev/null +++ b/crates/pounce-convex/src/cones/exp.rs @@ -0,0 +1,265 @@ +//! The exponential cone and its self-concordant barrier (Phase H5). +//! +//! The exponential cone is the first **non-symmetric** cone in +//! `pounce-convex` and the gateway to geometric programming, logistic +//! regression, entropy/`log-sum-exp`, and relative-entropy models — the +//! application surface that closes most of the gap with Clarabel. +//! +//! ## The cone +//! +//! In the Clarabel/MOSEK orientation, +//! ```text +//! K_exp = cl { (x, y, z) : y·exp(x/y) ≤ z, y > 0 } +//! = { (x,y,z) : y·log(z/y) ≥ x, y>0, z>0 } ∪ { (x,0,z) : x≤0, z≥0 }. +//! ``` +//! Its dual is +//! ```text +//! K_exp* = cl { (u, v, w) : −u·exp(v/u) ≤ e·w, u < 0 }. +//! ``` +//! +//! ## The barrier +//! +//! The standard degree-3 logarithmically-homogeneous self-concordant +//! barrier (Nesterov) is, with `ψ = y·log(z/y) − x`, +//! ```text +//! f(x, y, z) = −log(ψ) − log(y) − log(z), on ψ > 0, y > 0, z > 0. +//! ``` +//! This module provides `f`, `∇f`, `∇²f`, and cone-membership tests. It is +//! deliberately **standalone** (not yet a [`crate::cones::Cone`]): the +//! non-symmetric driver path that consumes these oracles is the next step. +//! The math here is validated both against finite differences and against +//! the exact log-homogeneity identities (`⟨∇f,p⟩ = −3`, `∇²f·p = −∇f`, +//! `f(tp) = f(p) − 3 log t`). + +use super::BarrierCone; + +/// The 3-dimensional exponential cone `K_exp` and its degree-3 barrier. +#[derive(Debug, Clone, Copy, Default, PartialEq)] +pub struct ExponentialCone; + +impl ExponentialCone { + pub fn new() -> Self { + ExponentialCone + } + + /// `ψ = y·log(z/y) − x`, the slack whose positivity (with `y, z > 0`) + /// defines the open cone. Returns `NaN` if `y` or `z` is non-positive. + #[inline] + fn psi(point: &[f64]) -> f64 { + let (x, y, z) = (point[0], point[1], point[2]); + y * (z / y).ln() - x + } +} + +impl BarrierCone for ExponentialCone { + fn barrier_degree(&self) -> f64 { + 3.0 + } + + fn barrier(&self, point: &[f64]) -> f64 { + let (_, y, z) = (point[0], point[1], point[2]); + if y <= 0.0 || z <= 0.0 { + return f64::INFINITY; + } + let psi = Self::psi(point); + if psi <= 0.0 { + return f64::INFINITY; + } + -psi.ln() - y.ln() - z.ln() + } + + fn barrier_grad(&self, point: &[f64], out: &mut [f64]) { + let (_, y, z) = (point[0], point[1], point[2]); + let psi = Self::psi(point); + let a = (z / y).ln() - 1.0; // ∂ψ/∂y + // g = −(1/ψ)∇ψ − (0, 1/y, 1/z), ∇ψ = (−1, a, y/z). + out[0] = 1.0 / psi; + out[1] = -a / psi - 1.0 / y; + out[2] = -(y / z) / psi - 1.0 / z; + } + + fn barrier_hess_lower(&self, point: &[f64], out: &mut [f64]) { + let (_, y, z) = (point[0], point[1], point[2]); + let psi = Self::psi(point); + let a = (z / y).ln() - 1.0; // ∂ψ/∂y + let q = y / z; // ∂ψ/∂z + let ip = 1.0 / psi; + let ip2 = ip * ip; + // H = (1/ψ²)∇ψ∇ψᵀ − (1/ψ)∇²ψ + diag(0, 1/y², 1/z²), + // ∇ψ = (−1, a, q), ∇²ψ = [[0,0,0],[0,−1/y,1/z],[0,1/z,−y/z²]]. + let h_xx = ip2; + let h_yx = -a * ip2; + let h_yy = a * a * ip2 + ip / y + 1.0 / (y * y); + let h_zx = -q * ip2; + let h_zy = a * q * ip2 - ip / z; + let h_zz = q * q * ip2 + ip * y / (z * z) + 1.0 / (z * z); + // Lower triangle row-major: (0,0);(1,0),(1,1);(2,0),(2,1),(2,2). + out[0] = h_xx; + out[1] = h_yx; + out[2] = h_yy; + out[3] = h_zx; + out[4] = h_zy; + out[5] = h_zz; + } + + fn in_primal_cone(&self, point: &[f64], tol: f64) -> bool { + let (_, y, z) = (point[0], point[1], point[2]); + y > tol && z > tol && Self::psi(point) > tol * (1.0 + y.abs()) + } + + fn in_dual_cone(&self, point: &[f64], tol: f64) -> bool { + // K_exp* = cl{ (u,v,w) : −u·exp(v/u) ≤ e·w, u<0 }. Strict interior: + // −u·e^{v/u} < e·w ⟺ v/u < 1 + log(w/−u) ⟺ (u<0, flip) the conjugate + // slack ψ* = v − u + u·log(−u/w) = v − u·(1 − log(−u/w)) > 0, with + // u<0, w>0. (Derivation: Dahl–Andersen 2021 §2 give the dual exp cone + // `e·z₁ ≥ −z₃ e^{z₂/z₃}`, mapped through pounce's coordinate order.) + let (u, v, w) = (point[0], point[1], point[2]); + if -u <= tol || w <= tol { + return false; + } + let psi_d = v - u * (1.0 - ((-u) / w).ln()); + psi_d > tol * (1.0 + u.abs()) + } + + fn interior_reference(&self, out: &mut [f64]) { + // The self-dual central point (the fixed point of x = −∇F(x), in + // pounce coordinate order), which lies in int K and int K*. + out[0] = -0.827838; + out[1] = 0.805102; + out[2] = 1.290928; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn full_hess(point: &[f64]) -> [[f64; 3]; 3] { + let c = ExponentialCone; + let mut l = [0.0; 6]; + c.barrier_hess_lower(point, &mut l); + [[l[0], l[1], l[3]], [l[1], l[2], l[4]], [l[3], l[4], l[5]]] + } + + /// A handful of interior points (y, z > 0 and ψ > 0). + fn interior_points() -> Vec<[f64; 3]> { + vec![ + [0.0, 1.0, std::f64::consts::E], // ψ = 1 + [-1.0, 2.0, 3.0], + [0.5, 1.5, 4.0], + [-2.0, 0.7, 1.2], + ] + } + + #[test] + fn membership() { + let c = ExponentialCone; + assert!(c.in_primal_cone(&[0.0, 1.0, std::f64::consts::E], 1e-9)); + assert!(c.in_primal_cone(&[-1.0, 2.0, 3.0], 1e-9)); + // y ≤ 0 or z ≤ 0 → outside. + assert!(!c.in_primal_cone(&[0.0, -1.0, 2.0], 1e-9)); + assert!(!c.in_primal_cone(&[0.0, 1.0, -2.0], 1e-9)); + // ψ < 0: x too large. + assert!(!c.in_primal_cone(&[5.0, 1.0, std::f64::consts::E], 1e-9)); + // Dual interior: u<0, w>0, ψ* > 0. + assert!(c.in_dual_cone(&[-1.0, 1.0, 1.0], 1e-9)); + assert!(!c.in_dual_cone(&[1.0, 1.0, 1.0], 1e-9)); // u>0 + } + + #[test] + fn grad_matches_finite_difference() { + let c = ExponentialCone; + let h = 1e-6; + for p in interior_points() { + let mut g = [0.0; 3]; + c.barrier_grad(&p, &mut g); + for k in 0..3 { + let mut pp = p; + let mut pm = p; + pp[k] += h; + pm[k] -= h; + let fd = (c.barrier(&pp) - c.barrier(&pm)) / (2.0 * h); + assert!( + (g[k] - fd).abs() < 1e-5, + "grad[{k}] at {p:?}: analytic {} vs fd {}", + g[k], + fd + ); + } + } + } + + #[test] + fn hess_matches_finite_difference() { + let c = ExponentialCone; + let h = 1e-6; + for p in interior_points() { + let hess = full_hess(&p); + for j in 0..3 { + // FD of the gradient's j-th component. + let mut pp = p; + let mut pm = p; + pp[j] += h; + pm[j] -= h; + let mut gp = [0.0; 3]; + let mut gm = [0.0; 3]; + c.barrier_grad(&pp, &mut gp); + c.barrier_grad(&pm, &mut gm); + for i in 0..3 { + let fd = (gp[i] - gm[i]) / (2.0 * h); + assert!( + (hess[i][j] - fd).abs() < 1e-4, + "H[{i}][{j}] at {p:?}: analytic {} vs fd {}", + hess[i][j], + fd + ); + } + } + } + } + + /// Log-homogeneity of degree ν = 3: f(t·p) = f(p) − 3·log t. + #[test] + fn log_homogeneous_degree_three() { + let c = ExponentialCone; + for p in interior_points() { + for &t in &[0.5_f64, 2.0, 3.7] { + let tp = [t * p[0], t * p[1], t * p[2]]; + let lhs = c.barrier(&tp); + let rhs = c.barrier(&p) - 3.0 * t.ln(); + assert!((lhs - rhs).abs() < 1e-9, "f(tp)={lhs} vs {rhs}"); + } + } + } + + /// Euler identity for a degree-ν log-homogeneous barrier: ⟨∇f(p), p⟩ = −ν. + #[test] + fn euler_identity() { + let c = ExponentialCone; + for p in interior_points() { + let mut g = [0.0; 3]; + c.barrier_grad(&p, &mut g); + let dot = g[0] * p[0] + g[1] * p[1] + g[2] * p[2]; + assert!((dot + 3.0).abs() < 1e-9, " = {dot}, expected −3"); + } + } + + /// Hessian/gradient identity for log-homogeneous barriers: ∇²f(p)·p = −∇f(p). + #[test] + fn hessian_times_point_is_neg_grad() { + let c = ExponentialCone; + for p in interior_points() { + let mut g = [0.0; 3]; + c.barrier_grad(&p, &mut g); + let hess = full_hess(&p); + for i in 0..3 { + let hp = hess[i][0] * p[0] + hess[i][1] * p[1] + hess[i][2] * p[2]; + assert!( + (hp + g[i]).abs() < 1e-9, + "(Hp)[{i}] = {hp} vs −g = {}", + -g[i] + ); + } + } + } +} diff --git a/crates/pounce-convex/src/cones/mod.rs b/crates/pounce-convex/src/cones/mod.rs new file mode 100644 index 00000000..8300eedc --- /dev/null +++ b/crates/pounce-convex/src/cones/mod.rs @@ -0,0 +1,190 @@ +//! Cone abstraction for the convex IPM. +//! +//! Phase 2 of the LP/QP plan builds the interior-point iteration over a +//! `Cone` abstraction with only the nonnegative orthant implemented, so +//! that Phases 4–6 (SOCP / exponential / power / PSD) are cone +//! *extensions* rather than a rewrite (see `dev-notes/lp-qp-routing.md`). +//! +//! A cone owns everything the IPM needs that is cone-specific: +//! - the central-path measure `μ = ⟨s, z⟩ / degree`, +//! - the scaling block that enters the KKT system, +//! - the complementarity residual `s ∘ z - σμ e`, +//! - the fraction-to-boundary step length keeping `(s, z)` in the cone. +//! +//! The IPM driver (`crate::ipm`) is otherwise cone-agnostic. For the +//! nonnegative orthant (LP/QP) the "∘" product is elementwise and the +//! scaling block is the diagonal `s ⊘ z`; richer cones override these +//! with their Nesterov–Todd scaling. + +pub mod chordal; +pub mod composite; +pub mod exp; +pub mod nonneg; +pub mod nonsym; +pub mod power; +pub mod psd; +pub mod soc; + +pub use composite::{CompositeCone, ConeKind, ConeSpec}; +pub use exp::ExponentialCone; +pub use nonneg::NonnegCone; +pub use nonsym::NonsymScaling; +pub use power::PowerCone; +pub use psd::PsdCone; +pub use soc::SecondOrderCone; + +/// Barrier oracles for a convex cone — the interface a **non-symmetric** +/// cone (exponential, power) exposes to the homogeneous self-dual embedding +/// driver ([`crate::hsde`]). +/// +/// Symmetric cones (orthant, second-order, PSD) are self-scaled and the IPM +/// drives them with a single Nesterov–Todd scaling point (`W²`, via +/// [`Cone::kkt_block`]). Non-symmetric cones have **no** such point; the +/// path-following method instead uses the logarithmically-homogeneous +/// self-concordant barrier `f` directly (Nesterov–Todd 1997; Skajaa–Ye +/// 2015): the central path is `z = −μ ∇f(s)`, and the Hessian `∇²f` plays +/// the role `W²` plays for symmetric cones. +/// +/// A valid degree-`ν` log-homogeneous barrier satisfies, for all `t > 0` +/// and interior `p`: +/// - `f(t·p) = f(p) − ν·log t`, +/// - `⟨∇f(p), p⟩ = −ν`, +/// - `∇²f(p)·p = −∇f(p)`. +/// +/// These identities are exact and are used as validation invariants +/// (see the `exp` cone tests) in addition to finite-difference checks. +pub trait BarrierCone { + /// Barrier parameter `ν` (the exponential cone's is 3). + fn barrier_degree(&self) -> f64; + + /// The barrier value `f(p)`. `NAN`/`+∞` outside the (open) cone. + fn barrier(&self, point: &[f64]) -> f64; + + /// Gradient `∇f(p)` (writes `dim` values). + fn barrier_grad(&self, point: &[f64], out: &mut [f64]); + + /// Hessian `∇²f(p)`, lower triangle row-major + /// (`[ (0,0); (1,0),(1,1); … ]`, `dim·(dim+1)/2` values). + fn barrier_hess_lower(&self, point: &[f64], out: &mut [f64]); + + /// Whether `point` is in the strict interior of the primal cone, to a + /// relative tolerance `tol`. + fn in_primal_cone(&self, point: &[f64], tol: f64) -> bool; + + /// Whether `point` is in the strict interior of the dual cone. + fn in_dual_cone(&self, point: &[f64], tol: f64) -> bool; + + /// A fixed strictly-interior reference point that lies in **both** the + /// primal cone `K` and the dual cone `K*` (writes `dim` values). It is + /// used (a) as the Newton start for the conjugate-gradient shadow iterate + /// and (b) as the self-dual starting iterate `s = z = e` for the + /// non-symmetric HSDE driver — both of which need a point interior to `K` + /// and `K*`. + fn interior_reference(&self, out: &mut [f64]); +} + +/// The `(z, z)` scaling block a cone contributes to the symmetric KKT +/// system. The driver places `-(block) - reg·I` at the cone's diagonal / +/// dense positions. The nonnegative orthant is [`ConeBlock::Diagonal`] +/// (`sᵢ/zᵢ`); the second-order cone is [`ConeBlock::DenseLower`] (its +/// Nesterov–Todd Hessian `W²`, dense within the cone). +#[derive(Debug, Clone, PartialEq)] +pub enum ConeBlock { + /// One value per row — the `(z, z)` diagonal (orthant: `sᵢ/zᵢ`). + Diagonal(Vec), + /// Dense symmetric `dim × dim` block, lower triangle row-major + /// (`[ (0,0); (1,0),(1,1); (2,0),(2,1),(2,2); … ]`). + DenseLower { dim: usize, lower: Vec }, + /// A `diag(d) + u uᵀ` block — the second-order cone's Nesterov–Todd + /// Hessian in **diagonal-plus-rank-1** form (`d = η²·diag(−1,1,…,1)`, + /// `u = √2 η w̄`). The KKT assembly represents the rank-1 update with a + /// single auxiliary variable per cone (the ECOS/Clarabel "sparse SOC" + /// trick), keeping the factorization sparse for large cones instead of + /// an `O(m²)` dense block. + DiagPlusRank1 { diag: Vec, u: Vec }, +} + +/// A symmetric cone over which the IPM maintains a primal slack `s` and +/// dual `z`. Phase 2 ships only [`NonnegCone`]; the trait exists so the +/// driver code does not bake in the orthant. +pub trait Cone { + /// Barrier degree (the orthant's is its dimension). Used to form the + /// central-path parameter `μ = ⟨s, z⟩ / degree`. + fn degree(&self) -> usize; + + /// The cone's identity element `e` (the well-centered interior point + /// used to cold-start `s` and `z`). Orthant: all ones; second-order + /// cone: `(1, 0, …, 0)`. Writes `dim` values. + fn identity(&self, out: &mut [f64]); + + /// Dimension of the slack/dual vectors this cone owns. + fn dim(&self) -> usize; + + /// Duality measure `⟨s, z⟩ / degree`. + fn mu(&self, s: &[f64], z: &[f64]) -> f64; + + /// Diagonal of the cone's scaling block as it enters the (z, z) + /// position of the symmetric KKT system. For the nonnegative orthant + /// this is `s ⊘ z`; the IPM places `-scaling` on that diagonal. + fn scaling_diag(&self, s: &[f64], z: &[f64], out: &mut [f64]); + + /// Complementarity residual `r = s ∘ z - σμ e`. With `sigma_mu = 0` + /// this is the affine (predictor) target; with `σμ > 0` it is the + /// centered path-following target. + fn comp_residual(&self, s: &[f64], z: &[f64], sigma_mu: f64, out: &mut [f64]); + + /// Mehrotra corrector complementarity residual + /// `r = s ∘ z + ds_aff ∘ dz_aff - σμ e`, where `ds_aff`/`dz_aff` are + /// the affine-predictor steps. The `ds_aff ∘ dz_aff` second-order + /// term is what gives Mehrotra its faster convergence; it is + /// cone-specific (elementwise for the orthant), so it lives behind + /// this trait rather than in the driver. + fn comp_residual_corrector( + &self, + s: &[f64], + z: &[f64], + ds_aff: &[f64], + dz_aff: &[f64], + sigma_mu: f64, + out: &mut [f64], + ); + + /// Recover the slack step `ds` from the dual step `dz` and the + /// complementarity residual, given the current `(s, z)`: + /// `ds = -(r_comp ⊘ z) - (s ⊘ z) ∘ dz`. + fn recover_ds(&self, s: &[f64], z: &[f64], r_comp: &[f64], dz: &[f64], ds: &mut [f64]); + + /// The cone's `(z, z)` scaling block for the symmetric KKT system (see + /// [`ConeBlock`]). For the orthant this is the diagonal `sᵢ/zᵢ`; richer + /// cones return their dense Nesterov–Todd Hessian. The driver assembles + /// `-(block) - reg·I`. + fn kkt_block(&self, s: &[f64], z: &[f64]) -> ConeBlock; + + /// The cone's contribution to the reduced KKT right-hand side at the + /// `(z)` rows: the term added to `-r_g`. For the orthant this is + /// `r_comp ⊘ z`; richer cones apply their scaling. Writes `dim` values. + fn rhs_comp_term(&self, s: &[f64], z: &[f64], r_comp: &[f64], out: &mut [f64]); + + /// Project a warm `(s, z)` into the strict interior of this cone (in + /// place) and rebalance, lifting it off the boundary by at least + /// `floor`. For the orthant: shift each component positive, then a + /// Mehrotra centering step. For the second-order cone: lift the + /// "distance to boundary" `λ_min = s₀ − ‖s₁‖` to `≥ floor`. Used by the + /// warm-start path (see [`crate::QpWarmStart`]). + fn recenter_warm(&self, s: &mut [f64], z: &mut [f64], floor: f64); + + /// Largest `α ∈ (0, 1]` such that `v + α dv` stays inside the cone, + /// scaled by the fraction-to-boundary parameter `tau`. For the + /// orthant: `min over dv_i<0 of -tau * v_i / dv_i`, capped at 1. + fn max_step(&self, v: &[f64], dv: &[f64], tau: f64) -> f64; + + /// Membership test for the cone's **dual** cone, to absolute tolerance + /// `tol`: `true` iff `z` lies in (or within `tol` of) the dual cone. Used + /// to validate a Farkas/recession direction before certifying primal + /// infeasibility — a certificate is only honest if its dual multipliers + /// actually lie in the dual cone. The cones shipped here (nonnegative + /// orthant, second-order, PSD) are self-dual, so this tests `z` against + /// the cone itself: orthant `zᵢ ≥ −tol`; SOC `z₀ ≥ ‖z₁‖ − tol`; PSD + /// `λ_min(smat z) ≥ −tol`. + fn in_dual_cone(&self, z: &[f64], tol: f64) -> bool; +} diff --git a/crates/pounce-convex/src/cones/nonneg.rs b/crates/pounce-convex/src/cones/nonneg.rs new file mode 100644 index 00000000..7655c89b --- /dev/null +++ b/crates/pounce-convex/src/cones/nonneg.rs @@ -0,0 +1,153 @@ +//! Nonnegative-orthant cone — the cone of LP and convex QP. +//! +//! All operations are elementwise. This is the only cone implemented in +//! Phase 2; richer cones (SOC, PSD, exp, pow) plug in behind the same +//! [`Cone`](super::Cone) trait in later phases. + +use super::{Cone, ConeBlock}; + +/// The nonnegative orthant `{ x : x_i ≥ 0 }` of a given dimension. +#[derive(Debug, Clone, Copy)] +pub struct NonnegCone { + n: usize, +} + +impl NonnegCone { + pub fn new(n: usize) -> Self { + NonnegCone { n } + } +} + +impl Cone for NonnegCone { + fn degree(&self) -> usize { + self.n + } + + fn identity(&self, out: &mut [f64]) { + out.iter_mut().for_each(|v| *v = 1.0); + } + + fn dim(&self) -> usize { + self.n + } + + fn mu(&self, s: &[f64], z: &[f64]) -> f64 { + if self.n == 0 { + return 0.0; + } + let dot: f64 = s.iter().zip(z).map(|(a, b)| a * b).sum(); + dot / self.n as f64 + } + + fn scaling_diag(&self, s: &[f64], z: &[f64], out: &mut [f64]) { + for i in 0..self.n { + out[i] = s[i] / z[i]; + } + } + + fn comp_residual(&self, s: &[f64], z: &[f64], sigma_mu: f64, out: &mut [f64]) { + for i in 0..self.n { + out[i] = s[i] * z[i] - sigma_mu; + } + } + + fn comp_residual_corrector( + &self, + s: &[f64], + z: &[f64], + ds_aff: &[f64], + dz_aff: &[f64], + sigma_mu: f64, + out: &mut [f64], + ) { + for i in 0..self.n { + out[i] = s[i] * z[i] + ds_aff[i] * dz_aff[i] - sigma_mu; + } + } + + fn recover_ds(&self, s: &[f64], z: &[f64], r_comp: &[f64], dz: &[f64], ds: &mut [f64]) { + for i in 0..self.n { + ds[i] = -(r_comp[i] / z[i]) - (s[i] / z[i]) * dz[i]; + } + } + + fn max_step(&self, v: &[f64], dv: &[f64], tau: f64) -> f64 { + let mut alpha = 1.0_f64; + for i in 0..self.n { + if dv[i] < 0.0 { + let a = -tau * v[i] / dv[i]; + if a < alpha { + alpha = a; + } + } + } + alpha + } + + fn in_dual_cone(&self, z: &[f64], tol: f64) -> bool { + // Self-dual: zᵢ ≥ −tol componentwise. + z[..self.n].iter().all(|&zi| zi >= -tol) + } + + fn kkt_block(&self, s: &[f64], z: &[f64]) -> ConeBlock { + ConeBlock::Diagonal((0..self.n).map(|i| s[i] / z[i]).collect()) + } + + fn rhs_comp_term(&self, _s: &[f64], z: &[f64], r_comp: &[f64], out: &mut [f64]) { + for i in 0..self.n { + out[i] = r_comp[i] / z[i]; + } + } + + fn recenter_warm(&self, s: &mut [f64], z: &mut [f64], floor: f64) { + let n = self.n; + // Positivity shift: lift s and z off the boundary by ≥ floor. + let s_min = s.iter().cloned().fold(f64::INFINITY, f64::min); + let z_min = z.iter().cloned().fold(f64::INFINITY, f64::min); + let ds = (-1.5 * s_min).max(floor); + let dz = (-1.5 * z_min).max(floor); + for i in 0..n { + s[i] += ds; + z[i] += dz; + } + // Mehrotra centering shift to balance s and z. + let sz: f64 = s.iter().zip(z.iter()).map(|(a, b)| a * b).sum(); + let sum_s: f64 = s.iter().sum(); + let sum_z: f64 = z.iter().sum(); + let ds2 = 0.5 * sz / sum_z; + let dz2 = 0.5 * sz / sum_s; + for i in 0..n { + s[i] += ds2; + z[i] += dz2; + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn mu_is_average_complementarity() { + let c = NonnegCone::new(2); + // ⟨s,z⟩ = 1*3 + 2*4 = 11, degree 2 → 5.5 + assert!((c.mu(&[1.0, 2.0], &[3.0, 4.0]) - 5.5).abs() < 1e-12); + } + + #[test] + fn max_step_caps_at_one_when_all_increasing() { + let c = NonnegCone::new(2); + assert!((c.max_step(&[1.0, 1.0], &[1.0, 0.5], 0.99) - 1.0).abs() < 1e-12); + } + + #[test] + fn max_step_limited_by_most_negative_ratio() { + let c = NonnegCone::new(1); + // v=2, dv=-1, tau=1 → α = -(2)/(-1) = 2, but capped... here it is + // the boundary at 2 so not capped below 1? -2*? recompute: + // a = -tau*v/dv = -1*2/(-1) = 2 → α stays min(1,2)=... 2>1 so 1. + assert!((c.max_step(&[2.0], &[-1.0], 1.0) - 1.0).abs() < 1e-12); + // v=1, dv=-2, tau=1 → a = -1*1/(-2)=0.5 → α=0.5 + assert!((c.max_step(&[1.0], &[-2.0], 1.0) - 0.5).abs() < 1e-12); + } +} diff --git a/crates/pounce-convex/src/cones/nonsym.rs b/crates/pounce-convex/src/cones/nonsym.rs new file mode 100644 index 00000000..004d27b4 --- /dev/null +++ b/crates/pounce-convex/src/cones/nonsym.rs @@ -0,0 +1,471 @@ +//! Generic 3-D non-symmetric-cone machinery, shared by the exponential and +//! power cones (and any future 3-D [`BarrierCone`]). +//! +//! A non-symmetric cone has no Nesterov–Todd scaling point; the path-following +//! driver instead needs, per iterate, three cone-agnostic ingredients built +//! only from the barrier oracles: +//! +//! - the **conjugate-barrier gradient** `x̃ = −F'_*(z)` (the shadow primal +//! iterate), computed by a damped Newton solve; +//! - the **dual-aware primal–dual scaling** `M = WᵀW` (the Tunçel scaling +//! specialized to 3-D, computed by a BFGS update — Dahl & Andersen 2021), +//! whose defining secants are the `W`-free identities `M·s = z`, `M·x̃ = s̃`; +//! - the **third-order term** `F'''(s)[u, v]` for the nonsymmetric corrector. +//! +//! All three are implemented here once, generic over the cone, so the exp and +//! power cones supply only their barrier oracles (`barrier`, `∇F`, `∇²F`, +//! membership, and an `interior_reference`). + +use super::BarrierCone; + +// --- small fixed-size 3-vector / 3×3 helpers ------------------------------ + +#[inline] +fn dot3(a: &[f64; 3], b: &[f64; 3]) -> f64 { + a[0] * b[0] + a[1] * b[1] + a[2] * b[2] +} + +#[inline] +fn cross3(a: &[f64; 3], b: &[f64; 3]) -> [f64; 3] { + [ + a[1] * b[2] - a[2] * b[1], + a[2] * b[0] - a[0] * b[2], + a[0] * b[1] - a[1] * b[0], + ] +} + +/// Symmetric `H` (lower triangle `[h00;h10,h11;h20,h21,h22]`) times a vector. +#[inline] +fn sym_matvec(h: &[f64; 6], v: &[f64; 3]) -> [f64; 3] { + [ + h[0] * v[0] + h[1] * v[1] + h[3] * v[2], + h[1] * v[0] + h[2] * v[1] + h[4] * v[2], + h[3] * v[0] + h[4] * v[1] + h[5] * v[2], + ] +} + +/// Solve the SPD 3×3 system `H x = b`, `H` given by its lower triangle +/// row-major `[h00; h10,h11; h20,h21,h22]`, via Cholesky. `None` if `H` is not +/// numerically positive definite. +pub(crate) fn chol_solve3(h: &[f64; 6], b: &[f64; 3]) -> Option<[f64; 3]> { + let l00 = h[0]; + if l00 <= 0.0 { + return None; + } + let l00 = l00.sqrt(); + let l10 = h[1] / l00; + let l11 = h[2] - l10 * l10; + if l11 <= 0.0 { + return None; + } + let l11 = l11.sqrt(); + let l20 = h[3] / l00; + let l21 = (h[4] - l20 * l10) / l11; + let l22 = h[5] - l20 * l20 - l21 * l21; + if l22 <= 0.0 { + return None; + } + let l22 = l22.sqrt(); + let y0 = b[0] / l00; + let y1 = (b[1] - l10 * y0) / l11; + let y2 = (b[2] - l20 * y0 - l21 * y1) / l22; + let x2 = y2 / l22; + let x1 = (y1 - l21 * x2) / l11; + let x0 = (y0 - l10 * x1 - l20 * x2) / l00; + Some([x0, x1, x2]) +} + +/// The dual-aware **primal–dual scaling** for a 3-D non-symmetric cone — the +/// Tunçel scaling specialized to 3-D and computed by a BFGS update, exactly as +/// in MOSEK's exp-cone solver (Dahl & Andersen 2021, §5–6). Built from *both* +/// the primal slack `s ∈ K` and the dual `z ∈ K*` (via the shadow iterates), +/// unlike the primal-only Hessian which stalls. +/// +/// The driver needs only `M = WᵀW`: Dahl–Andersen's reduced system places `M` +/// in the `(z,z)` cone block, and every RHS term reduces to `M` applied to a +/// vector. The defining double-secant equations (DA eq. 8/29), pre-multiplied +/// by `Wᵀ`, become the exact, `W`-free identities `M·s = z` and `M·x̃ = s̃`. +/// +/// pounce convention (`s` primal, `z` dual); the map to Dahl–Andersen's +/// `(x, s)` is `x = s`, `s_DA = z`, so `x̃ = −F'_*(z)` and `s̃ = −∇F(s)`. +#[derive(Debug, Clone)] +pub struct NonsymScaling { + /// `M = WᵀW`, lower triangle row-major `[m00;m10,m11;m20,m21,m22]` — the + /// dense `(z,z)` cone block. Satisfies `M·s = z`, `M·x̃ = s̃`. + pub wtw_lower: [f64; 6], + /// Shadow primal iterate `x̃ = −F'_*(z)` (∈ int K). + pub x_tilde: [f64; 3], + /// Shadow dual iterate `s̃ = −∇F(s)` (∈ int K*). + pub s_tilde: [f64; 3], + /// Duality measure `μ = ⟨s,z⟩/ν`. + pub mu: f64, + /// Shadow duality measure `μ̃ = ⟨x̃,s̃⟩/ν` (`μ·μ̃ ≥ 1`, `=1` only on path). + pub mu_tilde: f64, +} + +impl NonsymScaling { + /// Apply `M = WᵀW` to a 3-vector. + #[inline] + pub fn apply(&self, v: &[f64; 3]) -> [f64; 3] { + sym_matvec(&self.wtw_lower, v) + } + + /// `M⁻¹` as a full symmetric 3×3 — the dense `(z,z)` KKT block is `−M⁻¹`, + /// and the cone elimination/recovery applies `M⁻¹`. `None` if `M` is not + /// numerically SPD (should not happen for a valid scaling). + pub fn minv(&self) -> Option<[[f64; 3]; 3]> { + let c0 = chol_solve3(&self.wtw_lower, &[1.0, 0.0, 0.0])?; + let c1 = chol_solve3(&self.wtw_lower, &[0.0, 1.0, 0.0])?; + let c2 = chol_solve3(&self.wtw_lower, &[0.0, 0.0, 1.0])?; + Some([ + [c0[0], c1[0], c2[0]], + [c0[1], c1[1], c2[1]], + [c0[2], c1[2], c2[2]], + ]) + } +} + +/// The shadow primal iterate `x̃ = −F'_*(d)` for a dual-cone point +/// `d ∈ int K*`: the unique `p ∈ int K` solving `∇F(p) = −d`. The conjugate +/// barrier `F_*` has no closed form for these cones, so `x̃` is computed +/// numerically — it minimizes the strictly convex `G(p) = F(p) + ⟨d, p⟩` over +/// `int K`, solved by **damped Newton** with an Armijo line search guarded by +/// barrier-finiteness (an exact interiority test). Returns `false` if +/// `d ∉ int K*` (no solution) or the iteration fails. +pub(crate) fn conjugate_grad(cone: &C, d: &[f64], out: &mut [f64]) -> bool { + // Scaled interior start: along a ray p = t·p̂ the barrier problem has + // optimum t* = ν/⟨d,p̂⟩ = 3/⟨d,p̂⟩ (from log-homogeneity), which lands the + // start at the right scale; Newton then corrects the direction. + let mut phat = [0.0_f64; 3]; + cone.interior_reference(&mut phat); + let dp = d[0] * phat[0] + d[1] * phat[1] + d[2] * phat[2]; + // NaN-safe: `!(dp > 0.0)` rejects dp <= 0 *and* a NaN dp. + #[allow(clippy::neg_cmp_op_on_partial_ord)] + if !(dp > 0.0) { + return false; // d ∉ int K* (⟨d,p̂⟩ ≤ 0): no conjugate point. + } + let t = 3.0 / dp; + let mut p = [t * phat[0], t * phat[1], t * phat[2]]; + + let gval = |p: &[f64; 3]| cone.barrier(p) + d[0] * p[0] + d[1] * p[1] + d[2] * p[2]; + let mut gp = gval(&p); + if !gp.is_finite() { + return false; + } + + let mut g = [0.0_f64; 3]; + let mut l = [0.0_f64; 6]; + for _ in 0..200 { + cone.barrier_grad(&p, &mut g); + let r = [g[0] + d[0], g[1] + d[1], g[2] + d[2]]; // ∇G(p) = ∇F(p)+d + cone.barrier_hess_lower(&p, &mut l); + let delta = match chol_solve3(&l, &[-r[0], -r[1], -r[2]]) { + Some(v) => v, + None => return false, + }; + // Newton decrement λ² = rᵀ H⁻¹ r = −rᵀδ. + let lam2 = -(r[0] * delta[0] + r[1] * delta[1] + r[2] * delta[2]); + if lam2 <= 1e-24 { + break; // ∇F(p) ≈ −d. + } + let mut step = 1.0_f64; + loop { + let pc = [ + p[0] + step * delta[0], + p[1] + step * delta[1], + p[2] + step * delta[2], + ]; + let gc = gval(&pc); + if gc.is_finite() && gc <= gp - 0.25 * step * lam2 { + p = pc; + gp = gc; + break; + } + step *= 0.5; + if step < 1e-15 { + return false; // line search collapsed + } + } + } + out[0] = p[0]; + out[1] = p[1]; + out[2] = p[2]; + true +} + +/// Build the dual-aware scaling [`NonsymScaling`] at `(s, z)`. `None` if the +/// iterate is on (or numerically at) the central path — where the scaling +/// degenerates (`YᵀS` singular, `⟨δ_x,δ_s⟩ → 0`) — or if the shadow-iterate +/// solve fails. The driver falls back to the primal Hessian `μ∇²F(s)` then. +pub(crate) fn scaling(cone: &C, s: &[f64], z: &[f64]) -> Option { + let nu = 3.0; + let s3 = [s[0], s[1], s[2]]; + let z3 = [z[0], z[1], z[2]]; + let sz = dot3(&s3, &z3); + if sz <= 0.0 { + return None; + } + let mu = sz / nu; + + // Shadow iterates: x̃ = −F'_*(z) (conjugate-grad solve), s̃ = −∇F(s). + let mut xt = [0.0; 3]; + if !conjugate_grad(cone, &z3, &mut xt) { + return None; + } + let mut g = [0.0; 3]; + cone.barrier_grad(&s3, &mut g); + let st = [-g[0], -g[1], -g[2]]; + let mu_tilde = dot3(&xt, &st) / nu; + + // ⟨δ_x,δ_s⟩ = ⟨s−μx̃, z−μs̃⟩ → 0 on the central path (degenerate). + let dlt_p = [s3[0] - mu * xt[0], s3[1] - mu * xt[1], s3[2] - mu * xt[2]]; + let dlt_d = [z3[0] - mu * st[0], z3[1] - mu * st[1], z3[2] - mu * st[2]]; + if dot3(&dlt_p, &dlt_d) <= 1e-13 * sz { + return None; + } + + // M = Y(YᵀS)⁻¹Yᵀ + t·z_cp z_cpᵀ (DA §5), S = [s, x̃], Y = [z, s̃], + // z_cp ⊥ {s, x̃} the unit cross product. YᵀS is symmetric by the Euler + // identities ⟨z,x̃⟩ = ⟨s̃,s⟩ = ν. + let a00 = dot3(&z3, &s3); + let a01 = dot3(&z3, &xt); + let a10 = dot3(&st, &s3); + let a11 = dot3(&st, &xt); + let det = a00 * a11 - a01 * a10; + if det.abs() <= 1e-14 { + return None; + } + let (b00, b01, b10, b11) = (a11 / det, -a01 / det, -a10 / det, a00 / det); + + let zc = cross3(&s3, &xt); + let zc_norm = dot3(&zc, &zc).sqrt(); + if zc_norm <= 1e-14 { + return None; + } + let z_cp = [zc[0] / zc_norm, zc[1] / zc_norm, zc[2] / zc_norm]; + + // BFGS scalar t (DA 32): t = μ·‖ H − s̃s̃ᵀ/ν + // − (H x̃ − μ̃ s̃)(H x̃ − μ̃ s̃)ᵀ / (⟨x̃, H x̃⟩ − ν μ̃²) ‖_F . + let mut hl = [0.0; 6]; + cone.barrier_hess_lower(&s3, &mut hl); + let hxt = sym_matvec(&hl, &xt); + let xt_h_xt = dot3(&xt, &hxt); + let denom_t = xt_h_xt - nu * mu_tilde * mu_tilde; + if denom_t.abs() <= 1e-14 { + return None; + } + let qv = [ + hxt[0] - mu_tilde * st[0], + hxt[1] - mu_tilde * st[1], + hxt[2] - mu_tilde * st[2], + ]; + let h_full = [ + [hl[0], hl[1], hl[3]], + [hl[1], hl[2], hl[4]], + [hl[3], hl[4], hl[5]], + ]; + let mut fro2 = 0.0; + for i in 0..3 { + for j in 0..3 { + let m_ij = h_full[i][j] - st[i] * st[j] / nu - qv[i] * qv[j] / denom_t; + fro2 += m_ij * m_ij; + } + } + let t = mu * fro2.sqrt(); + // NaN-safe: `!(t > 0.0)` rejects t <= 0 *and* a NaN t (which `t <= 0.0` + // would let through). Bail out rather than build a degenerate factor. + #[allow(clippy::neg_cmp_op_on_partial_ord)] + if !(t > 0.0) { + return None; + } + + // M = Y B Yᵀ + t z_cp z_cpᵀ (columns of Y are y0=z, y1=s̃). + let y0 = z3; + let y1 = st; + let mut m_full = [[0.0_f64; 3]; 3]; + for i in 0..3 { + for j in 0..3 { + m_full[i][j] = b00 * y0[i] * y0[j] + + b01 * y0[i] * y1[j] + + b10 * y1[i] * y0[j] + + b11 * y1[i] * y1[j] + + t * z_cp[i] * z_cp[j]; + } + } + let wtw = [ + m_full[0][0], + m_full[1][0], + m_full[1][1], + m_full[2][0], + m_full[2][1], + m_full[2][2], + ]; + + Some(NonsymScaling { + wtw_lower: wtw, + x_tilde: xt, + s_tilde: st, + mu, + mu_tilde, + }) +} + +/// The third-order directional term `F'''(s)[u, v]` (a 3-vector) — the +/// ingredient of Dahl–Andersen's nonsymmetric Mehrotra-like corrector +/// (DA eq. 16): `η = −½ F'''(s)[Δxᵃ, (∇²F(s))⁻¹ Δsᵃ]`. Computed as the +/// directional derivative of the Hessian, `F'''(s)[u, v] = d/dt +/// (∇²F(s + t·u)·v)|₀`, by central finite differences of the analytic Hessian +/// (the barrier is smooth). The step `h` is scaled `∝ 1/‖u‖` so the third +/// derivative stays accurate even for a tiny affine step (the endgame). `None` +/// if either perturbed point leaves the cone (then the driver drops the +/// corrector for that block — still a valid centered step). +pub(crate) fn third_dir_apply( + cone: &C, + s: &[f64], + u: &[f64], + v: &[f64], +) -> Option<[f64; 3]> { + let s_scale = 1.0 + s[0].abs().max(s[1].abs()).max(s[2].abs()); + let u_norm = u[0].abs().max(u[1].abs()).max(u[2].abs()); + if u_norm <= 1e-300 { + return Some([0.0; 3]); // F'''(s)[0, v] = 0 + } + let h = 1e-6 * s_scale / u_norm; + let sp = [s[0] + h * u[0], s[1] + h * u[1], s[2] + h * u[2]]; + let sm = [s[0] - h * u[0], s[1] - h * u[1], s[2] - h * u[2]]; + if !cone.in_primal_cone(&sp, 1e-12) || !cone.in_primal_cone(&sm, 1e-12) { + return None; + } + let v3 = [v[0], v[1], v[2]]; + let mut lp = [0.0; 6]; + let mut lm = [0.0; 6]; + cone.barrier_hess_lower(&sp, &mut lp); + cone.barrier_hess_lower(&sm, &mut lm); + let hpv = sym_matvec(&lp, &v3); + let hmv = sym_matvec(&lm, &v3); + let inv = 1.0 / (2.0 * h); + Some([ + (hpv[0] - hmv[0]) * inv, + (hpv[1] - hmv[1]) * inv, + (hpv[2] - hmv[2]) * inv, + ]) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cones::{ExponentialCone, PowerCone}; + + /// Validate the generic machinery on one cone: the conjugate-gradient + /// round-trip, the scaling's defining secants `M·s = z`, `M·x̃ = s̃` (with + /// `M` SPD), and the third-derivative homogeneity identity + /// `F'''(s)[s, v] = −2∇²F·v`. + fn check_machinery(cone: &C, pts: &[[f64; 3]]) { + // --- conjugate-gradient round-trip: d = −∇F(p) ⇒ recover p. --- + for &p in pts { + let mut g = [0.0; 3]; + cone.barrier_grad(&p, &mut g); + let d = [-g[0], -g[1], -g[2]]; + assert!(cone.in_dual_cone(&d, 1e-12), "−∇F(p) must be dual-interior"); + let mut xt = [0.0; 3]; + assert!( + conjugate_grad(cone, &d, &mut xt), + "conjugate_grad failed at {p:?}" + ); + for k in 0..3 { + assert!( + (xt[k] - p[k]).abs() < 1e-8, + "round-trip[{k}] {} vs {}", + xt[k], + p[k] + ); + } + } + + // --- scaling secants on off-path pairs (s, z = −∇F(s2)), s2 ≁ s. --- + for i in 0..pts.len() { + for j in 0..pts.len() { + if i == j { + continue; + } + let s = pts[i]; + let mut g = [0.0; 3]; + cone.barrier_grad(&pts[j], &mut g); + let z = [-g[0], -g[1], -g[2]]; + let sc = match scaling(cone, &s, &z) { + Some(sc) => sc, + None => continue, // (rare) numerically on-path: skip + }; + let ms = sc.apply(&s); + for k in 0..3 { + assert!( + (ms[k] - z[k]).abs() < 1e-7, + "secant M·s=z [{k}]: {} vs {}", + ms[k], + z[k] + ); + } + let mxt = sc.apply(&sc.x_tilde); + for k in 0..3 { + assert!( + (mxt[k] - sc.s_tilde[k]).abs() < 1e-7, + "secant M·x̃=s̃ [{k}]: {} vs {}", + mxt[k], + sc.s_tilde[k] + ); + } + assert!( + chol_solve3(&sc.wtw_lower, &[1.0, 0.0, 0.0]).is_some(), + "M not SPD: {:?}", + sc.wtw_lower + ); + } + } + + // --- third-derivative homogeneity: F'''(s)[s, v] = −2∇²F·v. --- + let vs = [[1.0, 0.0, 0.0], [0.3, -0.7, 1.1], [-2.0, 0.5, 0.4]]; + for &p in pts { + let mut hl = [0.0; 6]; + cone.barrier_hess_lower(&p, &mut hl); + for v in vs { + let hv = sym_matvec(&hl, &v); + let t3 = third_dir_apply(cone, &p, &p, &v).expect("interior"); + for k in 0..3 { + assert!( + (t3[k] + 2.0 * hv[k]).abs() < 1e-6, + "F'''[s,v][{k}] {} vs −2Hv {}", + t3[k], + -2.0 * hv[k] + ); + } + } + } + } + + #[test] + fn machinery_on_exponential_cone() { + use std::f64::consts::E; + check_machinery( + &ExponentialCone, + &[ + [0.0, 1.0, E], + [-1.0, 2.0, 3.0], + [0.5, 1.5, 4.0], + [-2.0, 0.7, 1.2], + ], + ); + } + + #[test] + fn machinery_on_power_cone() { + let pts = [ + [0.0, 1.0, 1.0], + [0.3, 2.0, 1.5], + [-0.5, 1.2, 3.0], + [0.1, 0.7, 0.9], + ]; + for alpha in [0.5, 0.3, 0.7] { + check_machinery(&PowerCone::new(alpha), &pts); + } + } +} diff --git a/crates/pounce-convex/src/cones/power.rs b/crates/pounce-convex/src/cones/power.rs new file mode 100644 index 00000000..a5ea2dc1 --- /dev/null +++ b/crates/pounce-convex/src/cones/power.rs @@ -0,0 +1,314 @@ +//! The 3-dimensional power cone and its self-concordant barrier (Phase H6). +//! +//! The power cone is the second **non-symmetric** cone in `pounce-convex`, +//! after the exponential cone. It generalizes the (rotated) second-order cone +//! and is the building block for `p`-norm constraints (`‖x‖_p ≤ t`), general +//! geometric-programming monomials, and more. +//! +//! ## The cone +//! +//! For a fixed parameter `α ∈ (0, 1)`, +//! ```text +//! K_α = { (x, y, z) ∈ ℝ × ℝ₊² : |x| ≤ y^α · z^(1−α) }. +//! ``` +//! `α = 1/2` is the rotated quadratic cone; for other `α` it is non-symmetric. +//! Its dual is +//! ```text +//! K_α* = { (u, v, w) ∈ ℝ × ℝ₊² : |u| ≤ (v/α)^α · (w/(1−α))^(1−α) }. +//! ``` +//! +//! ## The barrier +//! +//! The degree-3 logarithmically-homogeneous self-concordant barrier +//! (Chares 2009; Skajaa–Ye 2015), with `ψ = y^{2α} z^{2−2α} − x²`: +//! ```text +//! F(x, y, z) = −log(ψ) − (1−α)·log y − α·log z, on ψ > 0, y > 0, z > 0. +//! ``` +//! It satisfies the exact log-homogeneity identities (`⟨∇F,p⟩ = −3`, +//! `∇²F·p = −∇F`, `F(tp) = F(p) − 3 log t`) used as validation invariants +//! alongside finite differences. + +use super::BarrierCone; + +/// The 3-dimensional power cone `K_α` and its degree-3 barrier. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct PowerCone { + /// The exponent `α ∈ (0, 1)` (`y^α z^{1−α}`). + pub alpha: f64, +} + +impl PowerCone { + /// Build a power cone with exponent `alpha ∈ (0, 1)`. + pub fn new(alpha: f64) -> Self { + assert!( + alpha > 0.0 && alpha < 1.0, + "power-cone exponent must be in (0, 1), got {alpha}" + ); + PowerCone { alpha } + } + + /// `a = y^{2α} z^{2−2α}` — the homogeneous-degree-2 term whose excess over + /// `x²` defines the cone. + #[inline] + fn a_term(&self, y: f64, z: f64) -> f64 { + y.powf(2.0 * self.alpha) * z.powf(2.0 - 2.0 * self.alpha) + } + + /// `ψ = y^{2α} z^{2−2α} − x²`, the slack whose positivity (with `y, z > 0`) + /// defines the open cone. + #[inline] + fn psi(&self, p: &[f64]) -> f64 { + self.a_term(p[1], p[2]) - p[0] * p[0] + } +} + +impl BarrierCone for PowerCone { + fn barrier_degree(&self) -> f64 { + 3.0 + } + + fn barrier(&self, point: &[f64]) -> f64 { + let (_, y, z) = (point[0], point[1], point[2]); + if y <= 0.0 || z <= 0.0 { + return f64::INFINITY; + } + let psi = self.psi(point); + if psi <= 0.0 { + return f64::INFINITY; + } + -psi.ln() - (1.0 - self.alpha) * y.ln() - self.alpha * z.ln() + } + + fn barrier_grad(&self, point: &[f64], out: &mut [f64]) { + let (al, om) = (self.alpha, 1.0 - self.alpha); + let (x, y, z) = (point[0], point[1], point[2]); + let a = self.a_term(y, z); + let psi = a - x * x; + // ∇ψ = (−2x, 2α·a/y, (2−2α)·a/z); ∇F = −∇ψ/ψ − (0, (1−α)/y, α/z). + out[0] = 2.0 * x / psi; + out[1] = -(2.0 * al * a / y) / psi - om / y; + out[2] = -(2.0 * om * a / z) / psi - al / z; + } + + fn barrier_hess_lower(&self, point: &[f64], out: &mut [f64]) { + let (al, om) = (self.alpha, 1.0 - self.alpha); + let (x, y, z) = (point[0], point[1], point[2]); + let a = self.a_term(y, z); + let psi = a - x * x; + let ip = 1.0 / psi; + let ip2 = ip * ip; + // ∇ψ components. + let p1 = -2.0 * x; + let p2 = 2.0 * al * a / y; + let p3 = 2.0 * om * a / z; + // ∇²ψ components. + let q11 = -2.0; + let q22 = 2.0 * al * (2.0 * al - 1.0) * a / (y * y); + let q23 = 4.0 * al * om * a / (y * z); + let q33 = 2.0 * om * (1.0 - 2.0 * al) * a / (z * z); + // H = (1/ψ²)∇ψ∇ψᵀ − (1/ψ)∇²ψ + diag(0, (1−α)/y², α/z²). + // (∇²ψ has zero (1,·) and (2,·) cross terms with x.) + let h_xx = p1 * p1 * ip2 - q11 * ip; + let h_yx = p2 * p1 * ip2; + let h_yy = p2 * p2 * ip2 - q22 * ip + om / (y * y); + let h_zx = p3 * p1 * ip2; + let h_zy = p3 * p2 * ip2 - q23 * ip; + let h_zz = p3 * p3 * ip2 - q33 * ip + al / (z * z); + // Lower triangle row-major: (0,0);(1,0),(1,1);(2,0),(2,1),(2,2). + out[0] = h_xx; + out[1] = h_yx; + out[2] = h_yy; + out[3] = h_zx; + out[4] = h_zy; + out[5] = h_zz; + } + + fn in_primal_cone(&self, point: &[f64], tol: f64) -> bool { + let (_, y, z) = (point[0], point[1], point[2]); + y > tol && z > tol && self.psi(point) > tol * (1.0 + y.abs() + z.abs()) + } + + fn in_dual_cone(&self, point: &[f64], tol: f64) -> bool { + // K_α* = { (u,v,w) : |u| ≤ (v/α)^α (w/(1−α))^(1−α), v,w > 0 }. + let (al, om) = (self.alpha, 1.0 - self.alpha); + let (u, v, w) = (point[0], point[1], point[2]); + if v <= tol || w <= tol { + return false; + } + let bound = (v / al).powf(al) * (w / om).powf(om); + bound - u.abs() > tol * (1.0 + u.abs()) + } + + fn interior_reference(&self, out: &mut [f64]) { + // (0, 1, 1) lies in int K_α (|0| < 1) and in int K_α* (for all + // α ∈ (0,1) the dual bound `(1/α)^α (1/(1−α))^(1−α) > 0`), so it is a + // valid self-dual start for any α. + out[0] = 0.0; + out[1] = 1.0; + out[2] = 1.0; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn cones() -> Vec { + vec![ + PowerCone::new(0.5), + PowerCone::new(0.3), + PowerCone::new(0.75), + ] + } + + fn full_hess(c: &PowerCone, point: &[f64]) -> [[f64; 3]; 3] { + let mut l = [0.0; 6]; + c.barrier_hess_lower(point, &mut l); + [[l[0], l[1], l[3]], [l[1], l[2], l[4]], [l[3], l[4], l[5]]] + } + + /// Interior points (y, z > 0 and ψ > 0) for each cone. + fn interior_points() -> Vec<[f64; 3]> { + vec![ + [0.0, 1.0, 1.0], + [0.3, 2.0, 1.5], + [-0.5, 1.2, 3.0], + [0.1, 0.7, 0.9], + ] + } + + #[test] + fn membership() { + for c in cones() { + // (0,1,1) is interior: |0| < 1. + assert!(c.in_primal_cone(&[0.0, 1.0, 1.0], 1e-9)); + // On/over the boundary: |x| = y^α z^(1-α). + let b = 1.0_f64.powf(c.alpha) * 1.0_f64.powf(1.0 - c.alpha); + assert!(!c.in_primal_cone(&[b + 0.1, 1.0, 1.0], 1e-9)); + // y or z ≤ 0 → outside. + assert!(!c.in_primal_cone(&[0.0, -1.0, 1.0], 1e-9)); + assert!(!c.in_primal_cone(&[0.0, 1.0, -1.0], 1e-9)); + } + } + + #[test] + fn dual_membership_via_conjugate_gradient() { + // For interior `p`, `−∇F(p)` must lie in the dual cone `K_α*`. + for c in cones() { + for p in interior_points() { + let mut g = [0.0; 3]; + c.barrier_grad(&p, &mut g); + let d = [-g[0], -g[1], -g[2]]; + assert!( + c.in_dual_cone(&d, 1e-9), + "−∇F(p) must be dual-interior: α={} p={p:?} d={d:?}", + c.alpha + ); + } + } + } + + #[test] + fn grad_matches_finite_difference() { + let h = 1e-6; + for c in cones() { + for p in interior_points() { + let mut g = [0.0; 3]; + c.barrier_grad(&p, &mut g); + for k in 0..3 { + let mut pp = p; + let mut pm = p; + pp[k] += h; + pm[k] -= h; + let fd = (c.barrier(&pp) - c.barrier(&pm)) / (2.0 * h); + assert!( + (g[k] - fd).abs() < 1e-5, + "grad[{k}] α={} at {p:?}: analytic {} vs fd {}", + c.alpha, + g[k], + fd + ); + } + } + } + } + + #[test] + fn hess_matches_finite_difference() { + let h = 1e-6; + for c in cones() { + for p in interior_points() { + let hess = full_hess(&c, &p); + for j in 0..3 { + let mut pp = p; + let mut pm = p; + pp[j] += h; + pm[j] -= h; + let mut gp = [0.0; 3]; + let mut gm = [0.0; 3]; + c.barrier_grad(&pp, &mut gp); + c.barrier_grad(&pm, &mut gm); + for i in 0..3 { + let fd = (gp[i] - gm[i]) / (2.0 * h); + assert!( + (hess[i][j] - fd).abs() < 1e-4, + "H[{i}][{j}] α={} at {p:?}: analytic {} vs fd {}", + c.alpha, + hess[i][j], + fd + ); + } + } + } + } + } + + /// Log-homogeneity of degree ν = 3: F(t·p) = F(p) − 3·log t. + #[test] + fn log_homogeneous_degree_three() { + for c in cones() { + for p in interior_points() { + for &t in &[0.5_f64, 2.0, 3.7] { + let tp = [t * p[0], t * p[1], t * p[2]]; + let lhs = c.barrier(&tp); + let rhs = c.barrier(&p) - 3.0 * t.ln(); + assert!((lhs - rhs).abs() < 1e-9, "F(tp)={lhs} vs {rhs}"); + } + } + } + } + + /// Euler identity for a degree-ν log-homogeneous barrier: ⟨∇F(p), p⟩ = −ν. + #[test] + fn euler_identity() { + for c in cones() { + for p in interior_points() { + let mut g = [0.0; 3]; + c.barrier_grad(&p, &mut g); + let dot = g[0] * p[0] + g[1] * p[1] + g[2] * p[2]; + assert!((dot + 3.0).abs() < 1e-9, " = {dot}, expected −3"); + } + } + } + + /// Hessian/gradient identity for log-homogeneous barriers: ∇²F(p)·p = −∇F(p). + #[test] + fn hessian_times_point_is_neg_grad() { + for c in cones() { + for p in interior_points() { + let mut g = [0.0; 3]; + c.barrier_grad(&p, &mut g); + let hess = full_hess(&c, &p); + for i in 0..3 { + let hp = hess[i][0] * p[0] + hess[i][1] * p[1] + hess[i][2] * p[2]; + assert!( + (hp + g[i]).abs() < 1e-9, + "(Hp)[{i}] = {hp} vs −g = {} (α={})", + -g[i], + c.alpha + ); + } + } + } + } +} diff --git a/crates/pounce-convex/src/cones/psd.rs b/crates/pounce-convex/src/cones/psd.rs new file mode 100644 index 00000000..c272a5ab --- /dev/null +++ b/crates/pounce-convex/src/cones/psd.rs @@ -0,0 +1,727 @@ +//! Positive-semidefinite (PSD) cone primitives — Phase H7 foundation. +//! +//! The PSD cone `Sⁿ₊ = { X = Xᵀ ∈ ℝⁿˣⁿ : X ⪰ 0 }` is a **self-scaled** +//! (symmetric) cone, like the orthant and the second-order cone, so it +//! carries a Nesterov–Todd scaling. This module supplies the building +//! blocks the conic IPM needs, all in the symmetric-vectorization (`svec`) +//! coordinates the solver's slack/dual vectors live in: +//! +//! - [`svec`] / [`smat`] — the isometry between a symmetric `n×n` matrix and +//! `ℝᵐ`, `m = n(n+1)/2`, with off-diagonals scaled by `√2` so that +//! `⟨X, Y⟩_F = svec(X)·svec(Y)`. +//! - The log-det barrier `F(X) = −log det X`, its gradient `−X⁻¹`, and the +//! Hessian action `D ↦ X⁻¹ D X⁻¹`. +//! - Membership / fraction-to-boundary via the eigenvalues of `X`. +//! - The **Nesterov–Todd scaling** `W` (symmetric PD, `W Z W = S`), the +//! matrix the dense `(z,z)` KKT block `W ⊗ₛ W` is built from (driver +//! integration is Phase H7's next step). +//! +//! Eigendecompositions reuse [`pounce_linalg::symmetric_eigen`] (the +//! cyclic-Jacobi solver shared with the NLP sensitivity path). + +use super::{Cone, ConeBlock}; +use pounce_linalg::symmetric_eigen; + +/// The PSD cone over symmetric `n×n` matrices. Its slack/dual vectors have +/// dimension `n(n+1)/2` in [`svec`] coordinates. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct PsdCone { + pub n: usize, +} + +impl PsdCone { + pub fn new(n: usize) -> Self { + PsdCone { n } + } + + /// Length of the `svec` vectors this cone owns, `n(n+1)/2`. + pub fn dim(&self) -> usize { + self.n * (self.n + 1) / 2 + } + + /// Barrier degree `ν` of `−log det` over `Sⁿ₊` — equal to `n`. + pub fn degree(&self) -> usize { + self.n + } +} + +/// `svec` ordering: lower triangle, column by column — `(0,0),(1,0),…, +/// (n−1,0),(1,1),(2,1),…`. Off-diagonals carry a `√2` so the map is an +/// isometry (`‖X‖_F = ‖svec(X)‖₂`). `mat` is row-major `n×n` (symmetric). +pub fn svec(mat: &[f64], n: usize, out: &mut [f64]) { + let r2 = std::f64::consts::SQRT_2; + let mut k = 0; + for j in 0..n { + for i in j..n { + out[k] = if i == j { + mat[i * n + i] + } else { + r2 * mat[i * n + j] + }; + k += 1; + } + } +} + +/// The `svec` index of the lower-triangle entry `(i, j)` (`i ≥ j`) for an +/// `n×n` matrix, matching [`svec`]'s column-by-column lower-triangle order. +pub fn svec_index(n: usize, i: usize, j: usize) -> usize { + debug_assert!(i >= j && i < n); + j * n - j * (j.wrapping_sub(1)) / 2 + (i - j) +} + +/// Inverse of [`svec`]: rebuild the symmetric `n×n` matrix (row-major) from +/// its `svec`, dividing off-diagonals by `√2`. +pub fn smat(v: &[f64], n: usize, out: &mut [f64]) { + let inv_r2 = std::f64::consts::FRAC_1_SQRT_2; + let mut k = 0; + for j in 0..n { + for i in j..n { + let val = if i == j { v[k] } else { inv_r2 * v[k] }; + out[i * n + j] = val; + out[j * n + i] = val; + k += 1; + } + } +} + +// ---- small dense symmetric-matrix helpers (row-major, modest n) ---- + +/// `c = a · b` for row-major `n×n` matrices. +fn matmul(a: &[f64], b: &[f64], n: usize, c: &mut [f64]) { + for i in 0..n { + for k in 0..n { + let mut acc = 0.0; + for j in 0..n { + acc += a[i * n + j] * b[j * n + k]; + } + c[i * n + k] = acc; + } + } +} + +/// Symmetric matrix function `f(A) = Q diag(f(λ)) Qᵀ` for a symmetric `A` +/// (row-major). Returns `None` if the eigensolver fails to converge. +fn sym_apply(a: &[f64], n: usize, f: impl Fn(f64) -> f64) -> Option> { + let mut vals = vec![0.0; n]; + let mut vecs = vec![0.0; n * n]; + if !symmetric_eigen(a, n, &mut vals, &mut vecs) { + return None; + } + // vecs is column-major: eigenvector j has component i at vecs[j*n + i]. + let mut out = vec![0.0; n * n]; + for i in 0..n { + for k in 0..n { + let mut acc = 0.0; + for j in 0..n { + acc += f(vals[j]) * vecs[j * n + i] * vecs[j * n + k]; + } + out[i * n + k] = acc; + } + } + Some(out) +} + +impl PsdCone { + /// The cone identity `e = svec(Iₙ)` — the well-centered cold-start point. + pub fn identity(&self, out: &mut [f64]) { + let n = self.n; + let mut k = 0; + for j in 0..n { + for i in j..n { + out[k] = if i == j { 1.0 } else { 0.0 }; + k += 1; + } + } + } + + /// Smallest eigenvalue of `smat(point)` — `> 0` iff strictly interior. + pub fn min_eig(&self, point: &[f64]) -> f64 { + let n = self.n; + let mut m = vec![0.0; n * n]; + smat(point, n, &mut m); + let mut vals = vec![0.0; n]; + let mut vecs = vec![0.0; n * n]; + if !symmetric_eigen(&m, n, &mut vals, &mut vecs) { + return f64::NEG_INFINITY; + } + vals[0] // ascending + } + + /// Whether `smat(point) ⪰ tol·I`. + pub fn in_cone(&self, point: &[f64], tol: f64) -> bool { + self.min_eig(point) > tol + } + + /// The log-det barrier `F = −log det smat(point)` (`+∞` outside the cone). + pub fn barrier(&self, point: &[f64]) -> f64 { + let n = self.n; + let mut m = vec![0.0; n * n]; + smat(point, n, &mut m); + let mut vals = vec![0.0; n]; + let mut vecs = vec![0.0; n * n]; + if !symmetric_eigen(&m, n, &mut vals, &mut vecs) { + return f64::INFINITY; + } + let mut acc = 0.0; + for &l in &vals { + if l <= 0.0 { + return f64::INFINITY; + } + acc += l.ln(); + } + -acc + } + + /// Gradient of the barrier, `∇F = −svec(X⁻¹)` (`X = smat(point)`). + // The eig of a correctly-sized symmetric matrix at a strictly-interior + // (PD) point always converges, so `sym_apply` cannot return `None` here. + #[allow(clippy::expect_used)] + pub fn barrier_grad(&self, point: &[f64], out: &mut [f64]) { + let n = self.n; + let mut m = vec![0.0; n * n]; + smat(point, n, &mut m); + let inv = sym_apply(&m, n, |l| 1.0 / l).expect("barrier_grad: eig failed"); + // out = −svec(X⁻¹). + svec(&inv, n, out); + for v in out.iter_mut() { + *v = -*v; + } + } + + /// Hessian action `H[d] = svec(X⁻¹ · smat(d) · X⁻¹)` — the operator + /// `∇²F(point)` applied to a direction `d` (both in `svec` coordinates). + // See `barrier_grad`: the interior-point eig always converges. + #[allow(clippy::expect_used)] + pub fn barrier_hess_apply(&self, point: &[f64], dir: &[f64], out: &mut [f64]) { + let n = self.n; + let mut x = vec![0.0; n * n]; + smat(point, n, &mut x); + let xinv = sym_apply(&x, n, |l| 1.0 / l).expect("hess: eig failed"); + let mut d = vec![0.0; n * n]; + smat(dir, n, &mut d); + let mut tmp = vec![0.0; n * n]; + let mut res = vec![0.0; n * n]; + matmul(&xinv, &d, n, &mut tmp); // X⁻¹ D + matmul(&tmp, &xinv, n, &mut res); // X⁻¹ D X⁻¹ + svec(&res, n, out); + } + + /// Largest `α ∈ (0, tau]` with `smat(v) + α·smat(dv) ⪰ 0`, scaled by the + /// fraction-to-boundary parameter `tau`. Computes the most-negative + /// eigenvalue of `L⁻¹ smat(dv) L⁻ᵀ` where `smat(v) = L Lᵀ` (here via the + /// symmetric form `V^{-1/2} smat(dv) V^{-1/2}`, `V = smat(v) ≻ 0`). + pub fn max_step(&self, v: &[f64], dv: &[f64], tau: f64) -> f64 { + let n = self.n; + let mut vmat = vec![0.0; n * n]; + smat(v, n, &mut vmat); + let vinv_half = match sym_apply(&vmat, n, |l| 1.0 / l.max(1e-300).sqrt()) { + Some(m) => m, + None => return tau, // can't scale; let the caller's safeguard handle it + }; + let mut dmat = vec![0.0; n * n]; + smat(dv, n, &mut dmat); + // M = V^{-1/2} dV V^{-1/2} (symmetric). + let mut tmp = vec![0.0; n * n]; + let mut mmat = vec![0.0; n * n]; + matmul(&vinv_half, &dmat, n, &mut tmp); + matmul(&tmp, &vinv_half, n, &mut mmat); + let mut vals = vec![0.0; n]; + let mut vecs = vec![0.0; n * n]; + if !symmetric_eigen(&mmat, n, &mut vals, &mut vecs) { + return tau; + } + let min_eig = vals[0]; // ascending + if min_eig >= 0.0 { + 1.0 // direction keeps PSD for all α ⇒ full step + } else { + (tau * (-1.0 / min_eig)).min(1.0) + } + } + + /// The Nesterov–Todd scaling matrix `W` (symmetric PD) for the + /// primal/dual interior pair `(s, z)` (both `svec` of PD matrices): + /// `W = S^{1/2} (S^{1/2} Z S^{1/2})^{-1/2} S^{1/2}`, which satisfies the + /// defining identity `W Z W = S`. Returned as a row-major `n×n` matrix. + /// The dense `(z,z)` KKT scaling block is the symmetric Kronecker + /// product `W ⊗ₛ W` built from this (Phase H7 driver integration). + pub fn nt_scaling(&self, s: &[f64], z: &[f64]) -> Option> { + let n = self.n; + let mut smat_s = vec![0.0; n * n]; + let mut smat_z = vec![0.0; n * n]; + smat(s, n, &mut smat_s); + smat(z, n, &mut smat_z); + let s_half = sym_apply(&smat_s, n, |l| l.max(0.0).sqrt())?; + // M = S^{1/2} Z S^{1/2}. + let mut tmp = vec![0.0; n * n]; + let mut m = vec![0.0; n * n]; + matmul(&s_half, &smat_z, n, &mut tmp); + matmul(&tmp, &s_half, n, &mut m); + let m_inv_half = sym_apply(&m, n, |l| 1.0 / l.max(1e-300).sqrt())?; + // W = S^{1/2} M^{-1/2} S^{1/2}. + let mut tmp2 = vec![0.0; n * n]; + let mut w = vec![0.0; n * n]; + matmul(&s_half, &m_inv_half, n, &mut tmp2); + matmul(&tmp2, &s_half, n, &mut w); + Some(w) + } +} + +impl PsdCone { + /// Jordan product `S ∘ Z = (SZ + ZS)/2`, in `svec` coordinates. + fn jordan(&self, s: &[f64], z: &[f64], out: &mut [f64]) { + let n = self.n; + let (mut sm, mut zm) = (vec![0.0; n * n], vec![0.0; n * n]); + smat(s, n, &mut sm); + smat(z, n, &mut zm); + let (mut sz, mut zs) = (vec![0.0; n * n], vec![0.0; n * n]); + matmul(&sm, &zm, n, &mut sz); + matmul(&zm, &sm, n, &mut zs); + let mut j = vec![0.0; n * n]; + for i in 0..n * n { + j[i] = 0.5 * (sz[i] + zs[i]); + } + svec(&j, n, out); + } + + /// Apply the NT scaling operator `W ⊗ₛ W` to a direction `d`: + /// `out = svec(W · smat(d) · W)` (`w` is the row-major `n×n` scaling). + fn apply_scaling(&self, w: &[f64], d: &[f64], out: &mut [f64]) { + let n = self.n; + let mut dm = vec![0.0; n * n]; + smat(d, n, &mut dm); + let (mut tmp, mut res) = (vec![0.0; n * n], vec![0.0; n * n]); + matmul(w, &dm, n, &mut tmp); + matmul(&tmp, w, n, &mut res); + svec(&res, n, out); + } + + /// Solve the Jordan system `z ∘ D = R` — i.e. the Lyapunov equation + /// `Z D + D Z = 2·smat(r)` — for symmetric `D`, returning `svec(D)`. + /// This is `Arw(z)⁻¹ r` for the PSD cone. Via `Z = QΛQᵀ`: + /// `D = Q [ (Qᵀ(2R)Q)_{ij} / (λᵢ+λⱼ) ] Qᵀ`. + #[allow(clippy::expect_used)] + fn lyapunov_solve(&self, z: &[f64], r: &[f64], out: &mut [f64]) { + let n = self.n; + let mut zm = vec![0.0; n * n]; + smat(z, n, &mut zm); + let mut vals = vec![0.0; n]; + let mut q = vec![0.0; n * n]; // column-major eigenvectors + assert!( + symmetric_eigen(&zm, n, &mut vals, &mut q), + "lyapunov: eig failed" + ); + let mut rm = vec![0.0; n * n]; + smat(r, n, &mut rm); + // R̃ = Qᵀ R Q. q column j: q[j*n + i] = Q[i][j]. + let mut rtilde = vec![0.0; n * n]; + for a in 0..n { + for b in 0..n { + let mut acc = 0.0; + for i in 0..n { + for j in 0..n { + acc += q[a * n + i] * rm[i * n + j] * q[b * n + j]; + } + } + rtilde[a * n + b] = acc; + } + } + // D̃_{ab} = 2 R̃_{ab} / (λ_a + λ_b). + let mut dtilde = vec![0.0; n * n]; + for a in 0..n { + for b in 0..n { + dtilde[a * n + b] = 2.0 * rtilde[a * n + b] / (vals[a] + vals[b]); + } + } + // D = Q D̃ Qᵀ. + let mut dm = vec![0.0; n * n]; + for i in 0..n { + for k in 0..n { + let mut acc = 0.0; + for a in 0..n { + for b in 0..n { + acc += q[a * n + i] * dtilde[a * n + b] * q[b * n + k]; + } + } + dm[i * n + k] = acc; + } + } + svec(&dm, n, out); + } +} + +impl Cone for PsdCone { + fn degree(&self) -> usize { + self.n + } + + fn identity(&self, out: &mut [f64]) { + PsdCone::identity(self, out); + } + + fn dim(&self) -> usize { + PsdCone::dim(self) + } + + fn mu(&self, s: &[f64], z: &[f64]) -> f64 { + // ⟨s, z⟩ = svec(S)·svec(Z) = tr(SZ); μ = ⟨s,z⟩ / degree. + let dot: f64 = s.iter().zip(z).map(|(a, b)| a * b).sum(); + dot / self.n as f64 + } + + fn in_dual_cone(&self, z: &[f64], tol: f64) -> bool { + // Self-dual: z ∈ K iff λ_min(smat z) ≥ −tol. + self.min_eig(z) >= -tol + } + + fn scaling_diag(&self, _s: &[f64], _z: &[f64], _out: &mut [f64]) { + unimplemented!("PSD uses kkt_block (dense), not scaling_diag") + } + + fn comp_residual(&self, s: &[f64], z: &[f64], sigma_mu: f64, out: &mut [f64]) { + // s ∘ z − σμ·svec(I). + self.jordan(s, z, out); + let mut e = vec![0.0; self.dim()]; + PsdCone::identity(self, &mut e); + for k in 0..self.dim() { + out[k] -= sigma_mu * e[k]; + } + } + + fn comp_residual_corrector( + &self, + s: &[f64], + z: &[f64], + ds_aff: &[f64], + dz_aff: &[f64], + sigma_mu: f64, + out: &mut [f64], + ) { + // s∘z + ds_aff∘dz_aff − σμ·svec(I). + self.jordan(s, z, out); + let mut second = vec![0.0; self.dim()]; + self.jordan(ds_aff, dz_aff, &mut second); + let mut e = vec![0.0; self.dim()]; + PsdCone::identity(self, &mut e); + for k in 0..self.dim() { + out[k] += second[k] - sigma_mu * e[k]; + } + } + + // The NT scaling always succeeds at strictly-interior (PD) iterates. + #[allow(clippy::expect_used)] + fn recover_ds(&self, s: &[f64], z: &[f64], r_comp: &[f64], dz: &[f64], ds: &mut [f64]) { + // ds = −Arw(z)⁻¹ r_comp − (W⊗ₛW) dz, consistent with `kkt_block` + // (the scaling operator) and `rhs_comp_term` (the Lyapunov solve). + let m = self.dim(); + let mut inv = vec![0.0; m]; + self.lyapunov_solve(z, r_comp, &mut inv); + let w = self.nt_scaling(s, z).expect("recover_ds: NT scaling"); + let mut hdz = vec![0.0; m]; + self.apply_scaling(&w, dz, &mut hdz); + for k in 0..m { + ds[k] = -inv[k] - hdz[k]; + } + } + + #[allow(clippy::expect_used)] + fn kkt_block(&self, s: &[f64], z: &[f64]) -> ConeBlock { + // The (z,z) block is the symmetric Kronecker H = W ⊗ₛ W, an m×m SPD + // matrix with H·svec(z) = svec(WZW) = svec(s). Form it column by + // column and return its lower triangle (row-major). + let m = self.dim(); + let w = self.nt_scaling(s, z).expect("kkt_block: NT scaling"); + let mut cols = vec![0.0; m * m]; // cols[b*m + a] = M[a][b] + let mut e = vec![0.0; m]; + let mut col = vec![0.0; m]; + for b in 0..m { + e.iter_mut().for_each(|v| *v = 0.0); + e[b] = 1.0; + self.apply_scaling(&w, &e, &mut col); + for a in 0..m { + cols[b * m + a] = col[a]; + } + } + // Lower triangle, row-major: (0,0); (1,0),(1,1); … + let mut lower = Vec::with_capacity(m * (m + 1) / 2); + for a in 0..m { + for b in 0..=a { + lower.push(cols[b * m + a]); + } + } + ConeBlock::DenseLower { dim: m, lower } + } + + fn rhs_comp_term(&self, _s: &[f64], z: &[f64], r_comp: &[f64], out: &mut [f64]) { + // Arw(z)⁻¹ r_comp — the Lyapunov solve Z D + D Z = 2·smat(r_comp). + self.lyapunov_solve(z, r_comp, out); + } + + fn recenter_warm(&self, s: &mut [f64], z: &mut [f64], floor: f64) { + // Like the SOC: a converged PSD point sits on the boundary (a zero + // eigenvalue), where the NT scaling is singular. Re-center each block + // to a well-conditioned multiple of the identity c·I (so S∘Z = c²I), + // preserving magnitude; the warm benefit comes from the primal x. + let n = self.n; + let center = |u: &mut [f64]| { + let mag = u + .iter() + .fold(0.0_f64, |m, &v| m.max(v.abs())) + .max(floor) + .max(1.0); + let mut e = vec![0.0; u.len()]; + PsdCone { n }.identity(&mut e); + for k in 0..u.len() { + u[k] = mag * e[k]; + } + }; + center(s); + center(z); + } + + fn max_step(&self, v: &[f64], dv: &[f64], tau: f64) -> f64 { + PsdCone::max_step(self, v, dv, tau) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn matmul_v(a: &[f64], b: &[f64], n: usize) -> Vec { + let mut c = vec![0.0; n * n]; + matmul(a, b, n, &mut c); + c + } + + #[test] + fn svec_smat_roundtrip_and_isometry() { + let n = 3; + // A symmetric matrix (row-major). + let x = vec![ + 2.0, 0.5, -1.0, // + 0.5, 3.0, 0.25, // + -1.0, 0.25, 1.5, + ]; + let m = n * (n + 1) / 2; + let mut v = vec![0.0; m]; + svec(&x, n, &mut v); + let mut back = vec![0.0; n * n]; + smat(&v, n, &mut back); + for i in 0..n * n { + assert!((x[i] - back[i]).abs() < 1e-12, "roundtrip at {i}"); + } + // Isometry: ⟨X,X⟩_F = ‖svec‖². + let fro: f64 = x.iter().map(|a| a * a).sum(); + let sv: f64 = v.iter().map(|a| a * a).sum(); + assert!((fro - sv).abs() < 1e-12, "isometry {fro} vs {sv}"); + } + + #[test] + fn inner_product_preserved() { + let n = 2; + let x = vec![1.0, 2.0, 2.0, 3.0]; + let y = vec![0.5, -1.0, -1.0, 4.0]; + let fro: f64 = (0..n * n).map(|i| x[i] * y[i]).sum(); + let m = n * (n + 1) / 2; + let (mut xv, mut yv) = (vec![0.0; m], vec![0.0; m]); + svec(&x, n, &mut xv); + svec(&y, n, &mut yv); + let dot: f64 = (0..m).map(|i| xv[i] * yv[i]).sum(); + assert!((fro - dot).abs() < 1e-12, "{fro} vs {dot}"); + } + + #[test] + fn identity_is_in_cone_and_barrier_zero() { + let c = PsdCone::new(3); + let mut e = vec![0.0; c.dim()]; + c.identity(&mut e); + assert!(c.in_cone(&e, 1e-9)); + assert!((c.barrier(&e) - 0.0).abs() < 1e-12); // −log det I = 0 + assert!((c.min_eig(&e) - 1.0).abs() < 1e-9); + } + + #[test] + fn barrier_grad_matches_finite_difference() { + let c = PsdCone::new(2); + // X = [[2, 0.3],[0.3, 1.5]] ≻ 0. + let point = { + let x = vec![2.0, 0.3, 0.3, 1.5]; + let mut v = vec![0.0; c.dim()]; + svec(&x, 2, &mut v); + v + }; + let mut g = vec![0.0; c.dim()]; + c.barrier_grad(&point, &mut g); + let h = 1e-6; + for k in 0..c.dim() { + let mut pp = point.clone(); + let mut pm = point.clone(); + pp[k] += h; + pm[k] -= h; + let fd = (c.barrier(&pp) - c.barrier(&pm)) / (2.0 * h); + assert!((g[k] - fd).abs() < 1e-5, "grad[{k}] {} vs fd {fd}", g[k]); + } + } + + #[test] + fn nt_scaling_satisfies_w_z_w_equals_s() { + let c = PsdCone::new(3); + // Two distinct PD matrices in svec coords. + let to_v = |x: &[f64]| { + let mut v = vec![0.0; c.dim()]; + svec(x, 3, &mut v); + v + }; + let smat_s = vec![ + 4.0, 1.0, 0.0, // + 1.0, 3.0, 0.5, // + 0.0, 0.5, 2.0, + ]; + let smat_z = vec![ + 2.0, -0.3, 0.2, // + -0.3, 1.0, 0.1, // + 0.2, 0.1, 1.5, + ]; + let s = to_v(&smat_s); + let z = to_v(&smat_z); + let w = c.nt_scaling(&s, &z).expect("nt scaling"); + // Check W Z W = S. + let wz = matmul_v(&w, &smat_z, 3); + let wzw = matmul_v(&wz, &w, 3); + for i in 0..9 { + assert!( + (wzw[i] - smat_s[i]).abs() < 1e-8, + "W Z W ≠ S at {i}: {} vs {}", + wzw[i], + smat_s[i] + ); + } + // W is symmetric. + for i in 0..3 { + for j in 0..3 { + assert!((w[i * 3 + j] - w[j * 3 + i]).abs() < 1e-10); + } + } + } + + #[test] + fn max_step_lands_on_the_boundary() { + let c = PsdCone::new(2); + // v = I; dv = −I ⇒ I − α I ⪰ 0 needs α ≤ 1; with τ=1, step = 1. + let mut v = vec![0.0; c.dim()]; + c.identity(&mut v); + let mut dv = vec![0.0; c.dim()]; + c.identity(&mut dv); + for x in dv.iter_mut() { + *x = -*x; + } + let a = c.max_step(&v, &dv, 1.0); + assert!((a - 1.0).abs() < 1e-9, "step {a}"); + // At α just below 1 the point is still PD; with τ = 0.99, step ≈ 0.99. + let a2 = c.max_step(&v, &dv, 0.99); + assert!((a2 - 0.99).abs() < 1e-9, "step {a2}"); + } + + #[test] + fn max_step_full_when_direction_keeps_psd() { + let c = PsdCone::new(2); + let mut v = vec![0.0; c.dim()]; + c.identity(&mut v); + // dv = +I ⇒ stays PD for all α ⇒ capped at 1. + let mut dv = vec![0.0; c.dim()]; + c.identity(&mut dv); + assert!((c.max_step(&v, &dv, 0.99) - 1.0).abs() < 1e-9); + } + + fn to_v(c: &PsdCone, x: &[f64]) -> Vec { + let mut v = vec![0.0; c.dim()]; + svec(x, c.n, &mut v); + v + } + + fn dense_lower_to_full(block: &ConeBlock) -> (usize, Vec) { + match block { + ConeBlock::DenseLower { dim, lower } => { + let m = *dim; + let mut full = vec![0.0; m * m]; + let mut k = 0; + for a in 0..m { + for b in 0..=a { + full[a * m + b] = lower[k]; + full[b * m + a] = lower[k]; + k += 1; + } + } + (m, full) + } + _ => panic!("expected DenseLower"), + } + } + + /// The defining NT property of the `(z,z)` block: `H·svec(z) = svec(s)`. + #[test] + fn kkt_block_maps_z_to_s() { + use crate::cones::Cone; + let c = PsdCone::new(3); + let s = to_v(&c, &[4.0, 1.0, 0.0, 1.0, 3.0, 0.5, 0.0, 0.5, 2.0]); + let z = to_v(&c, &[2.0, -0.3, 0.2, -0.3, 1.0, 0.1, 0.2, 0.1, 1.5]); + let (m, h) = dense_lower_to_full(&c.kkt_block(&s, &z)); + for a in 0..m { + let acc: f64 = (0..m).map(|b| h[a * m + b] * z[b]).sum(); + assert!((acc - s[a]).abs() < 1e-7, "row {a}: {acc} vs {}", s[a]); + } + } + + /// `rhs_comp_term` = `Arw(z)⁻¹ r`, so `z ∘ (Arw(z)⁻¹ r) = r`. + #[test] + fn lyapunov_inverts_jordan() { + use crate::cones::Cone; + let c = PsdCone::new(3); + let z = to_v(&c, &[2.0, -0.3, 0.2, -0.3, 1.0, 0.1, 0.2, 0.1, 1.5]); + let r = to_v(&c, &[0.5, 0.1, -0.2, 0.1, 0.3, 0.05, -0.2, 0.05, 0.4]); + let mut d = vec![0.0; c.dim()]; + c.rhs_comp_term(&z, &z, &r, &mut d); + let mut zd = vec![0.0; c.dim()]; + c.jordan(&z, &d, &mut zd); + for k in 0..c.dim() { + assert!((zd[k] - r[k]).abs() < 1e-9, "{k}: {} vs {}", zd[k], r[k]); + } + } + + /// At `s = z = e`, `s∘z = I` and the centered residual is `(1−σμ)·e`. + #[test] + fn comp_residual_at_identity() { + use crate::cones::Cone; + let c = PsdCone::new(2); + let mut e = vec![0.0; c.dim()]; + c.identity(&mut e); + let mut out = vec![0.0; c.dim()]; + Cone::comp_residual(&c, &e, &e, 0.3, &mut out); + for k in 0..c.dim() { + assert!((out[k] - 0.7 * e[k]).abs() < 1e-12, "{k}"); + } + } + + /// `recover_ds` is consistent with the assembled block and rhs term: + /// it must reproduce `−Arw(z)⁻¹ r − H·dz`. + #[test] + fn recover_ds_matches_block_and_rhs() { + use crate::cones::Cone; + let c = PsdCone::new(3); + let s = to_v(&c, &[4.0, 1.0, 0.0, 1.0, 3.0, 0.5, 0.0, 0.5, 2.0]); + let z = to_v(&c, &[2.0, -0.3, 0.2, -0.3, 1.0, 0.1, 0.2, 0.1, 1.5]); + let r = to_v(&c, &[0.5, 0.1, -0.2, 0.1, 0.3, 0.05, -0.2, 0.05, 0.4]); + let dz = to_v(&c, &[0.2, 0.0, 0.1, 0.0, -0.1, 0.05, 0.1, 0.05, 0.3]); + let mut ds = vec![0.0; c.dim()]; + c.recover_ds(&s, &z, &r, &dz, &mut ds); + // Reference: −rhs_comp_term − H·dz. + let mut rhs = vec![0.0; c.dim()]; + c.rhs_comp_term(&s, &z, &r, &mut rhs); + let (m, h) = dense_lower_to_full(&c.kkt_block(&s, &z)); + for a in 0..m { + let hdz: f64 = (0..m).map(|b| h[a * m + b] * dz[b]).sum(); + assert!((ds[a] - (-rhs[a] - hdz)).abs() < 1e-9, "row {a}"); + } + } +} diff --git a/crates/pounce-convex/src/cones/soc.rs b/crates/pounce-convex/src/cones/soc.rs new file mode 100644 index 00000000..e8203293 --- /dev/null +++ b/crates/pounce-convex/src/cones/soc.rs @@ -0,0 +1,443 @@ +//! Second-order (Lorentz) cone `K = { (t, x) : t ≥ ‖x‖₂ }` for the convex +//! IPM. +//! +//! Phase 2 of the SOCP extension (see `dev-notes/socp-extension.md`). This +//! module ships the parts whose correctness is unambiguous and +//! independently testable: +//! +//! - the Jordan-algebra geometry (`∘`, identity `e`, the `det` quadratic), +//! - the central-path measure `μ = ⟨s, z⟩ / 2` (rank 2, regardless of +//! dimension), +//! - the fraction-to-boundary `max_step` (the cone-boundary root), and +//! - the **Nesterov–Todd scaling Hessian** `W² = η²(2 w̄ w̄ᵀ − J)` that +//! enters the KKT `(z, z)` block, with its defining identities +//! (`W² s = z`, symmetric PD, `W² = I` at `s = z`) verified in tests. +//! +//! The *reduced-system* methods (`recover_ds`, `rhs_comp_term`, the +//! corrector) carry the NT scaling/sign conventions whose end-to-end +//! correctness must be validated against a reference solver; they are +//! deferred to Phase 2b and `unimplemented!` here so they cannot be used +//! before that validation. The driver builds an orthant-only cone until +//! then, so SOC is a tested building block, not yet a solvable cone. + +use super::{Cone, ConeBlock}; + +/// The second-order cone of a given dimension `m` (`m ≥ 1`): +/// `{ u ∈ ℝᵐ : u₀ ≥ ‖u_{1..}‖₂ }`. +#[derive(Debug, Clone, Copy)] +pub struct SecondOrderCone { + m: usize, +} + +impl SecondOrderCone { + pub fn new(m: usize) -> Self { + assert!(m >= 1, "second-order cone needs dimension ≥ 1"); + SecondOrderCone { m } + } + + /// `det(u) = u₀² − ‖u_{1..}‖²` — the cone's quadratic form (`uᵀJu`, + /// `J = diag(1,−1,…,−1)`). Positive in the interior. + pub fn det(u: &[f64]) -> f64 { + let tail: f64 = u[1..].iter().map(|v| v * v).sum(); + u[0] * u[0] - tail + } + + /// Jordan product `s ∘ z = (sᵀz, s₀ z_{1..} + z₀ s_{1..})`. + pub fn jordan(s: &[f64], z: &[f64], out: &mut [f64]) { + let dot: f64 = s.iter().zip(z).map(|(a, b)| a * b).sum(); + out[0] = dot; + for k in 1..s.len() { + out[k] = s[0] * z[k] + z[0] * s[k]; + } + } + + /// The Nesterov–Todd scaling: returns `(η, w̄)` with `w̄` the scaling + /// point (`det(w̄) = 1`, `w̄₀ > 0`) and `η² = √det(s)/√det(z)`. The + /// scaling Hessian is then `W² = η²(2 w̄ w̄ᵀ − J)`. + fn nt_scaling(s: &[f64], z: &[f64]) -> (f64, Vec) { + let m = s.len(); + let s_det = Self::det(s).max(0.0).sqrt(); // √det(s) + let z_det = Self::det(z).max(0.0).sqrt(); + // Normalize to the cone's unit-determinant sphere. + let s_bar: Vec = s.iter().map(|v| v / s_det).collect(); + let z_bar: Vec = z.iter().map(|v| v / z_det).collect(); + let sz: f64 = s_bar.iter().zip(&z_bar).map(|(a, b)| a * b).sum(); + let gamma = ((1.0 + sz) / 2.0).sqrt(); + // w̄ = (s̄ + J z̄) / (2γ), J z̄ = (z̄₀, −z̄_{1..}). + let mut w_bar = vec![0.0; m]; + w_bar[0] = (s_bar[0] + z_bar[0]) / (2.0 * gamma); + for k in 1..m { + w_bar[k] = (s_bar[k] - z_bar[k]) / (2.0 * gamma); + } + let eta = (s_det / z_det).sqrt(); + (eta, w_bar) + } + + /// Apply the scaling block `W² = η²(2 w̄ w̄ᵀ − J)` to a vector — the + /// matrix-free form of the dense block returned by [`Self::kkt_block`], + /// used in `recover_ds` so the recovered slack step is *exactly* + /// consistent with the assembled KKT block. + fn apply_w2(eta: f64, w_bar: &[f64], dz: &[f64], out: &mut [f64]) { + let eta2 = eta * eta; + let wd: f64 = w_bar.iter().zip(dz).map(|(w, d)| w * d).sum(); + out[0] = eta2 * (2.0 * w_bar[0] * wd - dz[0]); // (J dz)₀ = dz₀ + for k in 1..w_bar.len() { + out[k] = eta2 * (2.0 * w_bar[k] * wd + dz[k]); // (J dz)_k = −dz_k + } + } + + /// Apply `Arw(z)⁻¹` to `b` (solve the arrow system `Arw(z) x = b`), + /// where `Arw(z) = [[z₀, z₁ᵀ], [z₁, z₀ I]]`. This is the cone's + /// "division by z"; for a 1-D cone it is `b / z`. + fn arw_inv(z: &[f64], b: &[f64], out: &mut [f64]) { + let m = z.len(); + let z1_b1: f64 = z[1..].iter().zip(&b[1..]).map(|(p, q)| p * q).sum(); + let det = Self::det(z); + let x0 = (z[0] * b[0] - z1_b1) / det; + out[0] = x0; + for k in 1..m { + out[k] = (b[k] - x0 * z[k]) / z[0]; + } + } +} + +impl Cone for SecondOrderCone { + fn degree(&self) -> usize { + 2 // rank of the second-order cone, independent of dimension + } + + fn identity(&self, out: &mut [f64]) { + out.iter_mut().for_each(|v| *v = 0.0); + out[0] = 1.0; // e = (1, 0, …, 0) + } + + fn dim(&self) -> usize { + self.m + } + + fn mu(&self, s: &[f64], z: &[f64]) -> f64 { + let dot: f64 = s.iter().zip(z).map(|(a, b)| a * b).sum(); + dot / 2.0 + } + + fn kkt_block(&self, s: &[f64], z: &[f64]) -> ConeBlock { + // Diagonal-plus-rank-1 form of W² = η²(2 w̄w̄ᵀ − J) + // = diag(η²·(−J)) + (√2 η w̄)(√2 η w̄)ᵀ, + // so the KKT assembly can keep it sparse via one auxiliary variable. + let (eta, w_bar) = Self::nt_scaling(s, z); + let eta2 = eta * eta; + let mut diag = vec![eta2; self.m]; + diag[0] = -eta2; // −J = diag(−1, 1, …, 1) ⇒ η²·(−J)₀ = −η² + let scale = (2.0_f64).sqrt() * eta; + let u: Vec = w_bar.iter().map(|w| scale * w).collect(); + ConeBlock::DiagPlusRank1 { diag, u } + } + + fn comp_residual(&self, s: &[f64], z: &[f64], sigma_mu: f64, out: &mut [f64]) { + // s ∘ z − σμ e. + Self::jordan(s, z, out); + out[0] -= sigma_mu; + } + + fn max_step(&self, v: &[f64], dv: &[f64], tau: f64) -> f64 { + // Largest α with v + α dv in int(K): det(v+αdv) ≥ 0 and first + // coordinate ≥ 0. det is the quadratic a α² + b α + c with + // a = det(dv), c = det(v) > 0, b = 2 (v J dv). + let a = Self::det(dv); + let c = Self::det(v); + let tail: f64 = v[1..].iter().zip(&dv[1..]).map(|(p, q)| p * q).sum(); + let b = 2.0 * (v[0] * dv[0] - tail); + + let mut alpha = f64::INFINITY; + // Determinant boundary (smallest positive root of a α² + b α + c). + let disc = b * b - 4.0 * a * c; + if a.abs() <= 1e-300 { + if b < 0.0 { + alpha = alpha.min(-c / b); + } + } else if disc >= 0.0 { + let sq = disc.sqrt(); + for r in [(-b - sq) / (2.0 * a), (-b + sq) / (2.0 * a)] { + if r > 0.0 { + alpha = alpha.min(r); + } + } + } + // First-coordinate boundary v₀ + α dv₀ ≥ 0. + if dv[0] < 0.0 { + alpha = alpha.min(-v[0] / dv[0]); + } + if !alpha.is_finite() { + return 1.0; // no binding boundary in the step direction + } + (tau * alpha).min(1.0) + } + + fn in_dual_cone(&self, z: &[f64], tol: f64) -> bool { + // Self-dual: z ∈ K iff z₀ ≥ ‖z₁..‖ − tol. + let tail: f64 = z[1..self.m].iter().map(|v| v * v).sum::().sqrt(); + z[0] >= tail - tol + } + + fn scaling_diag(&self, _s: &[f64], _z: &[f64], _out: &mut [f64]) { + // SOC's (z,z) block is dense — the driver consumes `kkt_block`, not + // the orthant's diagonal-only `scaling_diag`. + unimplemented!("SOC uses kkt_block, not scaling_diag") + } + + fn comp_residual_corrector( + &self, + s: &[f64], + z: &[f64], + ds_aff: &[f64], + dz_aff: &[f64], + sigma_mu: f64, + out: &mut [f64], + ) { + // s∘z + ds_aff∘dz_aff − σμ e (Mehrotra second-order term, Jordan). + let mut second = vec![0.0; self.m]; + Self::jordan(s, z, out); + Self::jordan(ds_aff, dz_aff, &mut second); + for k in 0..self.m { + out[k] += second[k]; + } + out[0] -= sigma_mu; + } + + fn rhs_comp_term(&self, _s: &[f64], z: &[f64], r_comp: &[f64], out: &mut [f64]) { + // Reduced-KKT (z)-row term: Arw(z)⁻¹ r_comp. Coincides with the NT + // term −W⁻¹ r̂ via the identity W⁻¹λ⁻¹ = z⁻¹; reduces to r_comp/z in + // 1-D. + Self::arw_inv(z, r_comp, out); + } + + fn recenter_warm(&self, s: &mut [f64], z: &mut [f64], floor: f64) { + // A *converged* conic warm point sits on the cone boundary + // (λ_min = u₀ − ‖u₁‖ ≈ 0), where the NT scaling is singular + // (det → 0). Unlike the orthant, the IPM cannot dwell near that + // boundary without the factorization blowing up, so seeding the SOC + // duals there is unstable. We therefore **re-center** each block to + // a well-conditioned axis point `c·e` (so `s∘z = c²e`, perfectly + // centered): the warm benefit for SOC comes from the primal `x` + // (which seeds `s = h − Gx` and the residuals), while the cone duals + // restart centered. Magnitude is preserved so the scale is sensible. + let center = |u: &mut [f64]| { + let mag = u + .iter() + .fold(0.0_f64, |m, &v| m.max(v.abs())) + .max(floor) + .max(1.0); + u.iter_mut().for_each(|v| *v = 0.0); + u[0] = mag; + }; + center(s); + center(z); + } + + fn recover_ds(&self, s: &[f64], z: &[f64], r_comp: &[f64], dz: &[f64], ds: &mut [f64]) { + // ds = −Arw(z)⁻¹ r_comp − W⁻² dz, exactly consistent with the + // assembled block (`apply_w2` ≡ `kkt_block` as an operator) and the + // rhs term above. Reduces to −r_comp/z − (s/z) dz in 1-D. + let (eta, w_bar) = Self::nt_scaling(s, z); + let mut rhs = vec![0.0; self.m]; + Self::arw_inv(z, r_comp, &mut rhs); + let mut w2dz = vec![0.0; self.m]; + Self::apply_w2(eta, &w_bar, dz, &mut w2dz); + for k in 0..self.m { + ds[k] = -rhs[k] - w2dz[k]; + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn in_interior(u: &[f64]) -> bool { + u[0] > 0.0 && SecondOrderCone::det(u) > 0.0 + } + + /// Reconstruct the dense symmetric `W² = diag(d) + u uᵀ` from the + /// cone's diagonal-plus-rank-1 block. + fn dense(block: &ConeBlock, m: usize) -> Vec> { + let (diag, u) = match block { + ConeBlock::DiagPlusRank1 { diag, u } => { + assert_eq!(diag.len(), m); + (diag, u) + } + _ => panic!("expected diag-plus-rank-1 block"), + }; + let mut w = vec![vec![0.0; m]; m]; + for i in 0..m { + for j in 0..m { + w[i][j] = u[i] * u[j] + if i == j { diag[i] } else { 0.0 }; + } + } + w + } + + fn matvec(w: &[Vec], x: &[f64]) -> Vec { + w.iter() + .map(|row| row.iter().zip(x).map(|(a, b)| a * b).sum()) + .collect() + } + + #[test] + fn mu_is_half_inner_product() { + let c = SecondOrderCone::new(3); + // rank 2 ⇒ μ = ⟨s,z⟩ / 2. + let s = [2.0, 0.5, 0.5]; + let z = [3.0, -1.0, 0.0]; + let dot = 2.0 * 3.0 + 0.5 * -1.0 + 0.5 * 0.0; + assert!((c.mu(&s, &z) - dot / 2.0).abs() < 1e-12); + } + + #[test] + fn nt_hessian_maps_z_to_s() { + // The (z,z) scaling block maps z → s, matching the orthant's + // diag(s/z) (which satisfies diag(s/z)·z = s). For the SOC this is + // W² = η² Q_{w̄}, with W² symmetric PD. (Equivalently the NT + // identity z = W² s holds with the inverse scaling; we test the + // form the KKT block actually uses.) + let c = SecondOrderCone::new(3); + let s = [2.0, 0.5, -0.5]; // det = 4 - 0.5 = 3.5 > 0 + let z = [3.0, 1.0, 0.5]; // det = 9 - 1.25 > 0 + assert!(in_interior(&s) && in_interior(&z)); + let w2 = dense(&c.kkt_block(&s, &z), 3); + let wz = matvec(&w2, &z); + for k in 0..3 { + assert!((wz[k] - s[k]).abs() < 1e-9, "W²z[{k}]={} s={}", wz[k], s[k]); + } + // Symmetry. + for i in 0..3 { + for j in 0..3 { + assert!((w2[i][j] - w2[j][i]).abs() < 1e-12); + } + } + // Positive definiteness via positive determinant + positive (0,0) + // leading minor chain on this 3×3 (cheap check: xᵀW²x > 0 on a few + // probes including the cone axis). + for x in [[1.0, 0.0, 0.0], [0.3, 0.7, -0.2], [-0.5, 0.1, 0.9]] { + let q: f64 = x.iter().zip(matvec(&w2, &x)).map(|(a, b)| a * b).sum(); + assert!(q > 0.0, "W² not PD on probe {x:?}: {q}"); + } + } + + #[test] + fn nt_hessian_is_identity_at_s_equals_z() { + let c = SecondOrderCone::new(4); + let s = [3.0, 1.0, -0.5, 0.5]; + let w2 = dense(&c.kkt_block(&s, &s), 4); + for i in 0..4 { + for j in 0..4 { + let want = if i == j { 1.0 } else { 0.0 }; + assert!((w2[i][j] - want).abs() < 1e-9, "W²[{i}][{j}]={}", w2[i][j]); + } + } + } + + #[test] + fn comp_residual_is_jordan_minus_sigma_mu_e() { + let c = SecondOrderCone::new(3); + let s = [2.0, 0.5, -0.5]; + let z = [3.0, 1.0, 0.5]; + let mut out = [0.0; 3]; + c.comp_residual(&s, &z, 0.7, &mut out); + let dot = 2.0 * 3.0 + 0.5 * 1.0 + -0.5 * 0.5; + assert!((out[0] - (dot - 0.7)).abs() < 1e-12); + assert!((out[1] - (s[0] * z[1] + z[0] * s[1])).abs() < 1e-12); + assert!((out[2] - (s[0] * z[2] + z[0] * s[2])).abs() < 1e-12); + } + + #[test] + fn max_step_lands_on_the_cone_boundary() { + let c = SecondOrderCone::new(3); + let v = [2.0, 0.0, 0.0]; // interior, det = 4 + let dv = [-1.0, 1.0, 0.0]; // heads toward / out of the cone + // Step to boundary (tau = 1): det(v+αdv) = 0. + let alpha = c.max_step(&v, &dv, 1.0); + let p: Vec = (0..3).map(|k| v[k] + alpha * dv[k]).collect(); + // Either on the determinant boundary or the step was capped at 1. + assert!(alpha <= 1.0 + 1e-12); + if alpha < 1.0 - 1e-9 { + assert!( + SecondOrderCone::det(&p).abs() < 1e-7, + "det={}", + SecondOrderCone::det(&p) + ); + } + } + + #[test] + fn max_step_caps_at_one_when_staying_interior() { + let c = SecondOrderCone::new(3); + let v = [5.0, 0.0, 0.0]; + let dv = [1.0, 0.1, -0.1]; // det(dv)=1-0.02>0, b>0 ⇒ stays interior + assert!((c.max_step(&v, &dv, 0.99) - 1.0).abs() < 1e-12); + } + + /// `arw_inv` is a genuine inverse: Arw(z)·arw_inv(z,b) = b. This is the + /// operator the reduced-system rhs / `recover_ds` rely on. + #[test] + fn arw_inv_inverts_the_arrow_operator() { + let z = [3.0, 1.0, -0.5]; // interior + let b = [0.7, -0.2, 0.4]; + let mut x = [0.0; 3]; + SecondOrderCone::arw_inv(&z, &b, &mut x); + // Arw(z) x = (z·x, z₀ x₁ + x₀ z₁). + let zx: f64 = z.iter().zip(&x).map(|(a, c)| a * c).sum(); + assert!((zx - b[0]).abs() < 1e-12); + for k in 1..3 { + assert!((z[0] * x[k] + x[0] * z[k] - b[k]).abs() < 1e-12); + } + } + + /// `apply_w2` (matrix-free) equals the dense `kkt_block` matrix times + /// the vector — so `recover_ds`'s `W⁻²dz` is *exactly* the assembled + /// KKT block, the consistency the reduced system depends on. + #[test] + fn apply_w2_matches_dense_kkt_block() { + let c = SecondOrderCone::new(4); + let s = [2.0, 0.5, -0.5, 0.3]; + let z = [3.0, 1.0, 0.5, -0.2]; + let w2 = dense(&c.kkt_block(&s, &z), 4); + let dz = [0.3, -0.7, 0.2, 0.9]; + let want = matvec(&w2, &dz); + let (eta, w_bar) = SecondOrderCone::nt_scaling(&s, &z); + let mut got = [0.0; 4]; + SecondOrderCone::apply_w2(eta, &w_bar, &dz, &mut got); + for k in 0..4 { + assert!( + (got[k] - want[k]).abs() < 1e-12, + "k={k}: {} vs {}", + got[k], + want[k] + ); + } + } + + /// Reduced-system triple reduces to the orthant in 1-D: for `m = 1`, + /// the block is `s/z`, the rhs term is `r/z`, and `recover_ds` is + /// `−r/z − (s/z)dz`. + #[test] + fn one_dimensional_cone_matches_orthant() { + let c = SecondOrderCone::new(1); + let s = [2.0]; + let z = [5.0]; + match c.kkt_block(&s, &z) { + ConeBlock::DiagPlusRank1 { diag, u } => { + // 1-D: W²[0] = diag + u² = −η² + 2η² = η² = s/z. + assert!((diag[0] + u[0] * u[0] - s[0] / z[0]).abs() < 1e-12); + } + _ => panic!(), + } + let r = [0.6]; + let mut term = [0.0]; + c.rhs_comp_term(&s, &z, &r, &mut term); + assert!((term[0] - r[0] / z[0]).abs() < 1e-12); + let dz = [0.4]; + let mut ds = [0.0]; + c.recover_ds(&s, &z, &r, &dz, &mut ds); + assert!((ds[0] - (-r[0] / z[0] - (s[0] / z[0]) * dz[0])).abs() < 1e-12); + } +} diff --git a/crates/pounce-convex/src/debug.rs b/crates/pounce-convex/src/debug.rs new file mode 100644 index 00000000..2bb6f3e1 --- /dev/null +++ b/crates/pounce-convex/src/debug.rs @@ -0,0 +1,281 @@ +//! Debugger glue for the convex interior-point method. +//! +//! [`ConvexDebugState`] adapts one iteration of the convex IPM / +//! HSDE loops to the shared [`DebugState`] surface, so the CLI's +//! `SolverDebugger` (a [`DebugHook`]) can step, inspect, **mutate**, and +//! break on a convex LP / QP / conic solve as it does on the NLP path. +//! +//! Block names follow the QP standard form: `x` (variables), `s` (cone +//! slacks), `y` (equality multipliers), `z` (inequality / cone +//! multipliers); their search-direction counterparts are addressed by the +//! same names. The HSDE drivers additionally expose the homogenizing +//! scalars `tau` / `kappa` as 1-element blocks. +//! +//! The state borrows the live iterate **mutably**, so `set ` edits +//! it in place and `snapshot`/`restore` (the `goto` rewind) round-trip it. +//! `set mu` is rejected: the convex μ is *derived* from `⟨s, z⟩`, not a +//! free knob — edit `s`/`z` instead. There is no backtracking line search +//! or restoration phase, so [`ls_count`](DebugState::ls_count) reports +//! "n/a". + +use pounce_common::debug::{Checkpoint, DebugAction, DebugHook, DebugState, IterSnapshot}; +use pounce_common::types::Number; +use std::any::Any; + +/// A captured convex/HSDE iterate for `goto`/rewind. Stores the primal-dual +/// blocks plus the homogenizing scalars (HSDE) so a restore is exact. +pub(crate) struct ConvexSnapshot { + iter: i32, + mu: f64, + x: Vec, + s: Vec, + y: Vec, + z: Vec, + tau: Option, + kappa: Option, +} + +impl IterSnapshot for ConvexSnapshot { + fn iter(&self) -> i32 { + self.iter + } + fn mu(&self) -> Number { + self.mu + } + fn block(&self, name: &str) -> Option> { + match name { + "x" => Some(self.x.clone()), + "s" => Some(self.s.clone()), + "y" => Some(self.y.clone()), + "z" => Some(self.z.clone()), + "tau" => self.tau.map(|t| vec![t]), + "kappa" => self.kappa.map(|k| vec![k]), + _ => None, + } + } + fn as_any(&self) -> &dyn Any { + self + } +} + +/// A live, mutable view of one convex-IPM / HSDE iteration for the debugger. +/// +/// Holds mutable borrows of the live iterate (`x`/`s`/`y`/`z`, and for the +/// HSDE drivers the scalars `τ`/`κ`) plus read-only borrows of the current +/// search direction (`dx`/…). Cheap to build and dropped before the loop +/// touches the iterate again. +pub(crate) struct ConvexDebugState<'a> { + pub cp: Checkpoint, + pub iter: i32, + pub mu: f64, + /// Max-norm primal infeasibility (max over equality / cone residuals). + pub pinf: f64, + /// Max-norm dual (stationarity) infeasibility. + pub dinf: f64, + /// `max(pinf, dinf, mu)` — the scalar convergence test. + pub res: f64, + pub obj: f64, + pub alpha: (f64, f64), + pub x: &'a mut [f64], + pub s: &'a mut [f64], + pub y: &'a mut [f64], + pub z: &'a mut [f64], + pub dx: &'a [f64], + pub dy: &'a [f64], + pub dz: &'a [f64], + pub ds: &'a [f64], + /// HSDE homogenizing variable τ (the iterate is the homogeneous + /// `(x, s, y, z, τ, κ)`; the recovered solution is `x/τ`). `None` for + /// the direct (non-homogeneous) driver. + pub tau: Option<&'a mut f64>, + /// HSDE homogenizing variable κ. `None` for the direct driver. + pub kappa: Option<&'a mut f64>, + pub status: Option<&'a str>, +} + +impl ConvexDebugState<'_> { + /// Write `vals` into a named iterate block in place (length-checked). + fn write_block(&mut self, name: &str, vals: &[Number]) -> Result<(), String> { + let slot: &mut [f64] = match name { + "x" => self.x, + "s" => self.s, + "y" => self.y, + "z" => self.z, + "tau" => { + return set_scalar(self.tau.as_deref_mut(), "tau", vals); + } + "kappa" => { + return set_scalar(self.kappa.as_deref_mut(), "kappa", vals); + } + _ => return Err(format!("unknown block `{name}`")), + }; + if vals.len() != slot.len() { + return Err(format!( + "block `{name}` has dimension {}, got {} value(s)", + slot.len(), + vals.len() + )); + } + slot.copy_from_slice(vals); + Ok(()) + } +} + +/// Set a single-element scalar "block" (`tau`/`kappa`) if it exists. +fn set_scalar(slot: Option<&mut f64>, name: &str, vals: &[Number]) -> Result<(), String> { + let Some(slot) = slot else { + return Err(format!("this solver has no `{name}`")); + }; + match vals { + [v] => { + *slot = *v; + Ok(()) + } + _ => Err(format!( + "`{name}` is a scalar; expected 1 value, got {}", + vals.len() + )), + } +} + +impl DebugState for ConvexDebugState<'_> { + fn checkpoint(&self) -> Checkpoint { + self.cp + } + fn iter(&self) -> i32 { + self.iter + } + fn mu(&self) -> Number { + self.mu + } + fn objective(&self) -> Number { + self.obj + } + fn inf_pr(&self) -> Number { + self.pinf + } + fn inf_du(&self) -> Number { + self.dinf + } + fn complementarity(&self) -> Number { + // For a symmetric cone μ = ⟨s, z⟩ / degree is exactly the average + // complementarity, so it doubles as the central-path gauge. + self.mu + } + fn alpha(&self) -> (Number, Number) { + self.alpha + } + fn block_dims(&self) -> Vec<(&'static str, usize)> { + let mut v = vec![ + ("x", self.x.len()), + ("s", self.s.len()), + ("y", self.y.len()), + ("z", self.z.len()), + ]; + // The homogenizing scalars are addressable as 1-element blocks on + // the HSDE driver (`print tau` / `print kappa`). + if self.tau.is_some() { + v.push(("tau", 1)); + } + if self.kappa.is_some() { + v.push(("kappa", 1)); + } + v + } + fn block(&self, name: &str) -> Option> { + match name { + "x" => Some(self.x.to_vec()), + "s" => Some(self.s.to_vec()), + "y" => Some(self.y.to_vec()), + "z" => Some(self.z.to_vec()), + "tau" => self.tau.as_deref().copied().map(|t| vec![t]), + "kappa" => self.kappa.as_deref().copied().map(|k| vec![k]), + _ => None, + } + } + fn delta_block(&self, name: &str) -> Option> { + match name { + "x" => Some(self.dx.to_vec()), + "s" => Some(self.ds.to_vec()), + "y" => Some(self.dy.to_vec()), + "z" => Some(self.dz.to_vec()), + _ => None, + } + } + fn status(&self) -> Option<&str> { + self.status + } + /// The convex IPM's scalar convergence error `max(pinf, dinf, μ)`, so + /// `break if err<…` works the same as on the NLP path. + fn nlp_error(&self) -> Number { + self.res + } + + // ---- mutation ------------------------------------------------------- + + /// Rejected: the convex/HSDE μ is derived from `⟨s, z⟩` (and `τκ`), not + /// a free parameter — editing it would be silently overwritten next + /// iteration. Edit `s`/`z` to move μ. + fn set_mu(&mut self, _mu: Number) -> Result<(), String> { + Err("convex μ is derived from ⟨s,z⟩; edit the `s`/`z` blocks instead".into()) + } + + fn set_block(&mut self, name: &str, vals: &[Number]) -> Result<(), String> { + self.write_block(name, vals) + } + + // ---- snapshot / rewind --------------------------------------------- + + fn snapshot(&self) -> Option> { + Some(Box::new(ConvexSnapshot { + iter: self.iter, + mu: self.mu, + x: self.x.to_vec(), + s: self.s.to_vec(), + y: self.y.to_vec(), + z: self.z.to_vec(), + tau: self.tau.as_deref().copied(), + kappa: self.kappa.as_deref().copied(), + })) + } + + fn restore(&mut self, snap: &dyn IterSnapshot) -> bool { + let Some(s) = snap.as_any().downcast_ref::() else { + return false; + }; + // Dimensions must match the live iterate (a snapshot from a + // different problem/driver is refused rather than truncated). + if s.x.len() != self.x.len() + || s.s.len() != self.s.len() + || s.y.len() != self.y.len() + || s.z.len() != self.z.len() + || s.tau.is_some() != self.tau.is_some() + { + return false; + } + self.x.copy_from_slice(&s.x); + self.s.copy_from_slice(&s.s); + self.y.copy_from_slice(&s.y); + self.z.copy_from_slice(&s.z); + if let (Some(dst), Some(v)) = (self.tau.as_deref_mut(), s.tau) { + *dst = v; + } + if let (Some(dst), Some(v)) = (self.kappa.as_deref_mut(), s.kappa) { + *dst = v; + } + true + } +} + +/// Fire a checkpoint at `state` if a hook is attached. A no-op (and +/// always [`DebugAction::Resume`]) when `hook` is `None`, so the +/// hook-free solve path pays nothing. +pub(crate) fn fire( + hook: &mut Option<&mut dyn DebugHook>, + state: &mut dyn DebugState, +) -> DebugAction { + match hook.as_mut() { + Some(h) => h.at_checkpoint(state), + None => DebugAction::Resume, + } +} diff --git a/crates/pounce-convex/src/equilibrate.rs b/crates/pounce-convex/src/equilibrate.rs new file mode 100644 index 00000000..814d5673 --- /dev/null +++ b/crates/pounce-convex/src/equilibrate.rs @@ -0,0 +1,325 @@ +//! Ruiz equilibration for the convex LP/QP interior-point method. +//! +//! The direct primal–dual IPM ([`crate::ipm::solve_qp_ipm`]) factorizes the +//! KKT system of the **raw** problem data. On a badly-scaled LP/QP — large +//! dynamic range across the rows of `A`/`G`, the columns (variables), or the +//! objective — that system is ill-conditioned, the Newton steps are wild, the +//! iterates blow up, and the cone-scaling block `S⁻¹Z` eventually drives the +//! KKT matrix singular, surfacing as a `NumericalFailure`. (The NLP solver and +//! Ipopt/MA57 avoid this because they equilibrate the problem first.) +//! +//! This module supplies the missing piece for the orthant (LP/QP) path: a few +//! sweeps of **Ruiz scaling** on the symmetric augmented matrix +//! +//! ```text +//! K = | P Aᵀ Gᵀ | +//! | A 0 0 | +//! | G 0 0 | +//! ``` +//! +//! followed by a scalar **cost scaling** σ that brings the objective gradient +//! to O(1). Each Ruiz sweep rescales every row/column of `K` by the inverse +//! square root of its current ∞-norm; because `K` is symmetric the row and +//! column scalings coincide, yielding one scale vector split into a per-column +//! (variable) scaling `Dc`, per-equality-row `R_A`, and per-inequality-row +//! `R_G`. +//! +//! Equilibration is a *change of variables*, so the recovered optimum is the +//! same KKT point — only the conditioning of the iteration changes. The +//! substitution is `x = Dc x̂`, giving the scaled data +//! +//! ```text +//! P̂ = σ·Dc P Dc, ĉ = σ·Dc c, +//! Â = R_A A Dc, b̂ = R_A b, +//! Ĝ = R_G G Dc, ĥ = R_G h, +//! lb̂ = Dc⁻¹ lb, ûb = Dc⁻¹ ub, +//! ``` +//! +//! and the dual unscaling (derived in [`Scaling::unscale_solution`]) +//! +//! ```text +//! x = Dc x̂, y = R_A ŷ / σ, z = R_G ẑ / σ, +//! z_lb = ẑ_lb /(σ·Dc), z_ub = ẑ_ub /(σ·Dc). +//! ``` +//! +//! **Scope.** This is valid only for the **nonnegative orthant** (the LP/QP +//! inequalities and the expanded variable bounds): per-row scaling of `G` +//! preserves `z ≥ 0`. It must NOT be applied to second-order / exponential / +//! power cones, whose rows must scale uniformly to preserve the cone — hence +//! it is wired only into [`crate::ipm::solve_qp_ipm`] and skipped under the +//! HSDE/conic drivers. + +use crate::qp::{QpProblem, QpSolution, Triplet, BOUND_INF, NEG_INF, POS_INF}; +use crate::QpWarmStart; + +/// Number of Ruiz sweeps. Ruiz converges geometrically; a handful of passes +/// brings the row/column ∞-norms to within a few percent of 1, which is all +/// the conditioning improvement the IPM needs. More passes cost +/// `O(nnz)` each for negligible further gain. +const RUIZ_SWEEPS: usize = 10; + +/// Clamp on the scalar cost-scaling factor σ, so a degenerate objective +/// (tiny or huge gradient) cannot itself create an extreme scaling. +const SIGMA_LO: f64 = 1e-8; +const SIGMA_HI: f64 = 1e8; + +/// The diagonal scaling recovered by [`equilibrate`], retained so a scaled +/// solution can be mapped back to the original problem's variables and duals. +pub(crate) struct Scaling { + /// Per-variable (column) scaling `Dc`; `x = Dc x̂`. + dcol: Vec, + /// Per-equality-row scaling `R_A`. + drow_a: Vec, + /// Per-inequality-row scaling `R_G`. + drow_g: Vec, + /// Scalar objective (cost) scaling σ > 0. + sigma: f64, +} + +/// Ruiz-equilibrate `prob`, returning the scaled problem and the [`Scaling`] +/// needed to undo it. The scaled problem has the same dimensions, sparsity +/// pattern, and bound structure as the original; only the numeric data is +/// rescaled. A solution of the scaled problem maps back via +/// [`Scaling::unscale_solution`]. +pub(crate) fn equilibrate(prob: &QpProblem) -> (QpProblem, Scaling) { + let n = prob.n; + let me = prob.m_eq(); + let mi = prob.m_ineq(); + let dim = n + me + mi; + + // Cumulative symmetric scaling for each row/column of the augmented K. + // Index layout: [0, n) variables, [n, n+me) equality rows, + // [n+me, n+me+mi) inequality rows. + let mut s = vec![1.0f64; dim]; + let mut rownorm = vec![0.0f64; dim]; + + for _ in 0..RUIZ_SWEEPS { + rownorm.iter_mut().for_each(|v| *v = 0.0); + // P (lower triangle): symmetric var–var entries. + for t in &prob.p_lower { + let v = (s[t.row] * t.val * s[t.col]).abs(); + if v > rownorm[t.row] { + rownorm[t.row] = v; + } + if t.row != t.col && v > rownorm[t.col] { + rownorm[t.col] = v; + } + } + // A entry (r, c) sits at K(n+r, c) and its transpose K(c, n+r). + for t in &prob.a { + let (ri, ci) = (n + t.row, t.col); + let v = (s[ri] * t.val * s[ci]).abs(); + if v > rownorm[ri] { + rownorm[ri] = v; + } + if v > rownorm[ci] { + rownorm[ci] = v; + } + } + // G entry (r, c) sits at K(n+me+r, c) and its transpose. + for t in &prob.g { + let (ri, ci) = (n + me + t.row, t.col); + let v = (s[ri] * t.val * s[ci]).abs(); + if v > rownorm[ri] { + rownorm[ri] = v; + } + if v > rownorm[ci] { + rownorm[ci] = v; + } + } + // Ruiz update: s_i ← s_i / sqrt(‖row_i‖∞). An all-zero row (e.g. an + // empty column) is left unscaled. + for i in 0..dim { + if rownorm[i] > 0.0 { + s[i] /= rownorm[i].sqrt(); + } + } + } + + let dcol = s[..n].to_vec(); + let drow_a = s[n..n + me].to_vec(); + let drow_g = s[n + me..].to_vec(); + + // Apply the column/row scalings to the data: P̂₀ = Dc P Dc, ĉ₀ = Dc c, + // Â = R_A A Dc, b̂ = R_A b, Ĝ = R_G G Dc, ĥ = R_G h. + let mut p_lower: Vec = prob + .p_lower + .iter() + .map(|t| Triplet::new(t.row, t.col, t.val * dcol[t.row] * dcol[t.col])) + .collect(); + let mut c: Vec = prob + .c + .iter() + .enumerate() + .map(|(i, &ci)| ci * dcol[i]) + .collect(); + let a: Vec = prob + .a + .iter() + .map(|t| Triplet::new(t.row, t.col, t.val * drow_a[t.row] * dcol[t.col])) + .collect(); + let b: Vec = prob + .b + .iter() + .enumerate() + .map(|(r, &br)| br * drow_a[r]) + .collect(); + let g: Vec = prob + .g + .iter() + .map(|t| Triplet::new(t.row, t.col, t.val * drow_g[t.row] * dcol[t.col])) + .collect(); + let h: Vec = prob + .h + .iter() + .enumerate() + .map(|(r, &hr)| hr * drow_g[r]) + .collect(); + let lb = scale_bounds(&prob.lb, &dcol, NEG_INF); + let ub = scale_bounds(&prob.ub, &dcol, POS_INF); + + // Cost scaling σ, applied to the objective **only for a pure LP** + // (empty/zero `P`). Rationale: the Ruiz pass above already normalizes the + // `P` block of the augmented matrix to O(1), so for a QP the objective is + // *already* commensurate with the constraint blocks — and because σ must + // scale `P` and `c` together to preserve the minimizer, applying σ < 1 to a + // QP would shrink the Hessian below the constraint scale, degrading the + // scaled problem's strong convexity, diverging the dual iterates, and + // tripping the direct path's Farkas detector with a false `PrimalInfeasible`. + // + // An LP has no `P` block for Ruiz to anchor the objective scale against, so + // a large linear term `c` (e.g. NETLIB `nl`, ‖c‖ ~ 1e6) survives + // equilibration, drives huge Newton steps, and pushes the cone-scaling block + // until the KKT factorization goes singular. Here σ = 1/max|ĉ| is both + // necessary and harmless (no Hessian to unbalance). + let is_lp = p_lower.iter().all(|t| t.val == 0.0); + let cmax = c.iter().fold(0.0f64, |m, &v| m.max(v.abs())); + let sigma = if is_lp && cmax > 0.0 { + (1.0 / cmax).clamp(SIGMA_LO, SIGMA_HI) + } else { + 1.0 + }; + if sigma != 1.0 { + // (`p_lower` is empty here, but scale it for completeness/robustness.) + p_lower.iter_mut().for_each(|t| t.val *= sigma); + c.iter_mut().for_each(|v| *v *= sigma); + } + + let scaled = QpProblem { + n, + p_lower, + c, + a, + b, + g, + h, + lb, + ub, + }; + ( + scaled, + Scaling { + dcol, + drow_a, + drow_g, + sigma, + }, + ) +} + +/// Scale a bound vector by `1/dcol` (since `x̂ = Dc⁻¹ x`), preserving the +/// ±∞ sentinels and the "no bounds" empty-vector convention. `dcol > 0`, so +/// the sign and finiteness of each bound are preserved. +fn scale_bounds(bnd: &[f64], dcol: &[f64], inf: f64) -> Vec { + if bnd.is_empty() { + return Vec::new(); + } + bnd.iter() + .enumerate() + .map(|(i, &v)| { + if v.abs() >= BOUND_INF { + inf + } else { + v / dcol[i] + } + }) + .collect() +} + +impl Scaling { + /// Map a solution of the scaled problem back to the original problem's + /// variables and duals, in place. `orig` is the unscaled problem, used to + /// recompute the objective `½xᵀPx + cᵀx` directly at the recovered `x` + /// (cheaper and more robust than dividing the scaled objective by σ). + pub(crate) fn unscale_solution(&self, orig: &QpProblem, sol: &mut QpSolution) { + for (xi, &d) in sol.x.iter_mut().zip(&self.dcol) { + *xi *= d; + } + for (yi, &d) in sol.y.iter_mut().zip(&self.drow_a) { + *yi *= d / self.sigma; + } + for (zi, &d) in sol.z.iter_mut().zip(&self.drow_g) { + *zi *= d / self.sigma; + } + for (zi, &d) in sol.z_lb.iter_mut().zip(&self.dcol) { + *zi /= self.sigma * d; + } + for (zi, &d) in sol.z_ub.iter_mut().zip(&self.dcol) { + *zi /= self.sigma * d; + } + // Recompute the objective at the unscaled primal point. + let mut px = vec![0.0; orig.n]; + orig.p_mul(&sol.x, &mut px); + let mut obj = 0.0; + for ((&xi, &pxi), &ci) in sol.x.iter().zip(&px).zip(&orig.c) { + obj += 0.5 * xi * pxi + ci * xi; + } + sol.obj = obj; + } + + /// Map a warm-start point given in the **original** problem's coordinates + /// into the scaled problem's coordinates — the exact inverse of + /// [`Scaling::unscale_solution`]'s primal/dual maps: + /// + /// ```text + /// x̂ = Dc⁻¹ x, ŷ = σ y / R_A, ẑ = σ z / R_G, + /// ẑ_lb = σ·Dc·z_lb, ẑ_ub = σ·Dc·z_ub. + /// ``` + /// + /// Used so the equilibrated warm path seeds the scaled solve with a point + /// equivalent to the caller's warm start, preserving the warm-start benefit. + pub(crate) fn scale_warm_start(&self, warm: &QpWarmStart) -> QpWarmStart { + QpWarmStart { + x: warm + .x + .iter() + .zip(&self.dcol) + .map(|(&xi, &d)| xi / d) + .collect(), + y: warm + .y + .iter() + .zip(&self.drow_a) + .map(|(&yi, &d)| yi * self.sigma / d) + .collect(), + z: warm + .z + .iter() + .zip(&self.drow_g) + .map(|(&zi, &d)| zi * self.sigma / d) + .collect(), + z_lb: warm + .z_lb + .iter() + .zip(&self.dcol) + .map(|(&zi, &d)| zi * self.sigma * d) + .collect(), + z_ub: warm + .z_ub + .iter() + .zip(&self.dcol) + .map(|(&zi, &d)| zi * self.sigma * d) + .collect(), + } + } +} diff --git a/crates/pounce-convex/src/hsde.rs b/crates/pounce-convex/src/hsde.rs new file mode 100644 index 00000000..69855df8 --- /dev/null +++ b/crates/pounce-convex/src/hsde.rs @@ -0,0 +1,995 @@ +//! Homogeneous self-dual embedding (HSDE) driver for the convex IPM. +//! +//! This is the foundation for Clarabel cone parity (see +//! `dev-notes/hsde.md` and `dev-notes/clarabel-parity.md`). It reformulates +//! the interior-point iteration as a *single self-dual system* in the +//! embedded variables `(x, y, z, s, τ, κ)`, so that +//! +//! - a self-starting iterate handles primal- and dual-infeasible problems +//! uniformly (no infeasible start), and +//! - infeasibility/unboundedness falls out of the embedding (`τ → 0`, +//! `κ > 0`) rather than from a bolt-on certificate watch. +//! +//! **The per-cone math and the KKT factorization are reused verbatim.** The +//! embedding borders the existing symmetric `(x, y, z)` block `M` +//! (assembled by [`crate::ipm::KktStructure`], with each cone's NT scaling +//! `W²` from [`Cone::kkt_block`]) by the scalar `τ`, and solves it with +//! **two** back-solves through the *same* factorization plus a scalar Schur +//! complement (the SCS/ECOS scheme): `M p = (−c, b, h)` (the constant +//! direction) and `M q = residual`, combined with `Δτ` from the τ/κ row. +//! +//! ## Scope (Phases H2–H3) +//! +//! This driver implements the embedding over a product of nonnegative-orthant +//! and second-order cones — it solves LPs, QPs, and SOCPs (the full current +//! problem class). The **quadratic objective** (`P ⪰ 0`) is handled via +//! Clarabel's QP embedding: the τ-row gains the `xᵀPx/τ` coupling, so its +//! gradient becomes `g̃ = (c + (2/τ)Px, b, h)` and its scalar Schur +//! complement a `−xᵀPx/τ²` term. Crucially, `P` already sits in `M`'s +//! `(x, x)` block and in the dual residual `ρ_x`, so the two M-solves, the +//! cone elimination, and the step are *identical* to the linear case — only +//! the τ-row scalar is new (and reduces to the linear case at `P = 0`). +//! +//! The switch-over to make HSDE the default (Phase H4) still follows; for +//! now `solve_qp_ipm`/`solve_socp_ipm` remain the production path and this +//! module is validated to reproduce their optima and certificates. + +use crate::cones::{CompositeCone, Cone}; +use crate::debug::{fire, ConvexDebugState}; +use crate::ipm::{ + build_factorization, build_rhs, detect_infeasibility_cone, dot, inf_norm, split_step, QpOptions, +}; +use crate::qp::{QpIterate, QpProblem, QpSolution, QpStatus}; +use pounce_common::debug::{Checkpoint, DebugAction, DebugHook}; +use pounce_linsol::SparseSymLinearSolverInterface; + +/// Fraction-to-boundary step for a positive scalar ray `v + α dv > 0`, +/// scaled by `tau` and capped at 1 (the scalar analogue of `Cone::max_step` +/// for the homogenizing variables `τ`, `κ`). +fn ray_step(v: f64, dv: f64, tau: f64) -> f64 { + if dv < 0.0 { + (tau * (-v / dv)).min(1.0) + } else { + 1.0 + } +} + +/// Solve `min ½xᵀPx + cᵀx s.t. Ax = b, Gx ⪯_K h` via the homogeneous +/// self-dual embedding, returning the un-homogenized solution. `P = 0` is an +/// LP/SOCP; `P ⪰ 0` a QP (the τ-row picks up the `xᵀPx/τ` coupling). +/// +/// `cone` is the product cone `K` over the `m_ineq` inequality rows (built +/// by the caller exactly as for [`crate::ipm::solve_socp_ipm`]). Variable +/// bounds must already be expanded into `cone` rows by the caller. +pub(crate) fn solve_conic_hsde( + prob: &QpProblem, + cone: &CompositeCone, + opts: &QpOptions, + mut make_backend: F, + mut hook: Option<&mut dyn DebugHook>, +) -> QpSolution +where + F: FnMut() -> Box, +{ + let n = prob.n; + let m_eq = prob.m_eq(); + let m_ineq = prob.m_ineq(); + let degree = cone.degree(); + + let (kkt, mut fact) = match build_factorization(prob, cone, opts, &mut make_backend) { + Ok(pair) => pair, + Err(()) => return failed(prob), + }; + + // Constant border data: −b, −h (so `build_rhs` yields the `(−c, b, h)` + // right-hand side of the constant direction `p`). + let neg_b: Vec = prob.b.iter().map(|v| -v).collect(); + let neg_h: Vec = prob.h.iter().map(|v| -v).collect(); + let zeros_m = vec![0.0; m_ineq]; + + // Self-dual start: x = y = 0, s = z = e (cone identity), τ = κ = 1. + let mut x = vec![0.0; n]; + let mut y = vec![0.0; m_eq]; + let mut e = vec![0.0; m_ineq]; + cone.identity(&mut e); + let mut s = e.clone(); + let mut z = e; + let mut tau = 1.0_f64; + let mut kappa = 1.0_f64; + + // Residual + work buffers. + let mut rho_x = vec![0.0; n]; + let mut rho_y = vec![0.0; m_eq]; + let mut rho_z = vec![0.0; m_ineq]; + let mut px_vec = vec![0.0; n]; // P x (quadratic-objective coupling) + let mut r_c = vec![0.0; m_ineq]; + let mut comp = vec![0.0; m_ineq]; + let mut kkt_vals = kkt.values.clone(); + let mut rhs = vec![0.0; kkt.dim]; + + // Direction buffers: p = constant direction, (dx,dy,dz) = the running + // step, with affine slack/dual kept for the Mehrotra corrector. + let mut p_x = vec![0.0; n]; + let mut p_y = vec![0.0; m_eq]; + let mut p_z = vec![0.0; m_ineq]; + let mut dx = vec![0.0; n]; + let mut dy = vec![0.0; m_eq]; + let mut dz = vec![0.0; m_ineq]; + let mut ds = vec![0.0; m_ineq]; + let mut dz_aff = vec![0.0; m_ineq]; + let mut ds_aff = vec![0.0; m_ineq]; + + let mut status = QpStatus::IterationLimit; + let mut iters = 0; + // Opt-in per-iteration convergence trace (mirrors the direct path's + // `collect_iterates`): one record per stepping iteration plus a terminal + // record at the converged iterate (α = 0). + let mut trace: Vec = Vec::new(); + + for it in 0..opts.max_iter { + iters = it; + + // --- quadratic-objective coupling: Px and xᵀPx (zero for an LP) --- + for v in px_vec.iter_mut() { + *v = 0.0; + } + prob.p_mul(&x, &mut px_vec); + let xpx = dot(&x, &px_vec); + + // --- homogeneous residuals --- + // ρ_x = P x + Aᵀy + Gᵀz + c·τ + for (r, (&ci, &pxi)) in rho_x.iter_mut().zip(prob.c.iter().zip(&px_vec)) { + *r = ci * tau + pxi; + } + prob.at_mul(&y, &mut rho_x); + prob.gt_mul(&z, &mut rho_x); + // ρ_y = A x − b·τ + for (r, &bi) in rho_y.iter_mut().zip(&prob.b) { + *r = -bi * tau; + } + prob.a_mul(&x, &mut rho_y); + // ρ_z = G x + s − h·τ + for i in 0..m_ineq { + rho_z[i] = s[i] - prob.h[i] * tau; + } + prob.g_mul(&x, &mut rho_z); + // ρ_τ = κ + cᵀx + bᵀy + hᵀz + xᵀPx/τ + let ctx = dot(&prob.c, &x); + let bty = dot(&prob.b, &y); + let htz = dot(&prob.h, &z); + let rho_tau = kappa + ctx + bty + htz + xpx / tau; + + let sz = dot(&s, &z); + let mu = (sz + tau * kappa) / (degree as f64 + 1.0); + + // --- convergence (un-homogenized residuals; divide out τ) --- + // Gap = x̂ᵀPx̂ + cᵀx̂ + bᵀŷ + hᵀẑ = (xᵀPx/τ + cᵀx + bᵀy + hᵀz)/τ. + let pres = inf_norm(&rho_y).max(inf_norm(&rho_z)) / tau; + let dres = inf_norm(&rho_x) / tau; + let gap = (xpx / tau + ctx + bty + htz).abs() / tau; + let res = pres.max(dres).max(gap); + // "Acceptable level": near the cone boundary the scaling/factorization + // can break down a hair short of `tol`. If the unregularized KKT + // residuals are already tiny (within `~1e3·tol`) when that happens, the + // current iterate *is* essentially optimal — accept it rather than + // reporting a spurious `NumericalFailure`. This mirrors the + // non-symmetric HSDE driver (`hsde_nonsym.rs`), which already does this; + // the two drivers were inconsistent (the symmetric one discarded usable + // SOC/orthant iterates that the non-symmetric one would have accepted). + let near_opt = res < 1e3 * opts.tol; + // Un-homogenized objective `½x̂ᵀPx̂ + cᵀx̂` (x̂ = x/τ) — what the + // trace and debugger report. + let obj_hat = 0.5 * xpx / (tau * tau) + ctx / tau; + + // Debugger checkpoint: top of iteration. Blocks expose the + // homogeneous iterate `(x, s, y, z, τ, κ)`; the objective is the + // un-homogenized `½x̂ᵀPx̂ + cᵀx̂` with `x̂ = x/τ` (what the user reads). + if hook.is_some() { + let mut st = ConvexDebugState { + cp: Checkpoint::IterStart, + iter: it as i32, + mu, + pinf: pres, + dinf: dres, + res, + obj: obj_hat, + alpha: (0.0, 0.0), + x: &mut x, + s: &mut s, + y: &mut y, + z: &mut z, + dx: &dx, + dy: &dy, + dz: &dz, + ds: &ds, + tau: Some(&mut tau), + kappa: Some(&mut kappa), + status: None, + }; + if fire(&mut hook, &mut st) == DebugAction::Stop { + break; + } + } + + if pres < opts.tol && dres < opts.tol && gap < opts.tol { + status = QpStatus::Optimal; + // Terminal record at the converged iterate (no step taken). + if opts.collect_iterates { + trace.push(QpIterate { + iter: it, + objective: obj_hat, + primal_infeasibility: pres, + dual_infeasibility: dres, + mu, + alpha_primal: 0.0, + alpha_dual: 0.0, + }); + } + break; + } + + // --- infeasibility (the embedding drives the iterate onto the + // Farkas/recession ray as τ → 0; the same verified relative checks + // as the direct driver apply to the homogeneous (x, y, z)). --- + if tau < 1e-2 * kappa.max(1.0) { + if let Some(st) = detect_infeasibility_cone(prob, &x, &y, &z, opts, cone) { + status = st; + break; + } + } + + // --- refactor M with the current cone scaling --- + kkt.update_blocks(cone, &s, &z, opts.reg, &mut kkt_vals); + if fact.refactor(&kkt_vals).is_err() { + status = if near_opt { + QpStatus::Optimal + } else { + QpStatus::NumericalFailure + }; + break; + } + + // --- constant direction p: M p = (−c, b, h) --- + build_rhs(&prob.c, &neg_b, &neg_h, &zeros_m, n, m_eq, m_ineq, &mut rhs); + if fact.solve_one(&mut rhs).is_err() { + status = if near_opt { + QpStatus::Optimal + } else { + QpStatus::NumericalFailure + }; + break; + } + split_step(&rhs, n, m_eq, m_ineq, &mut p_x, &mut p_y, &mut p_z); + // τ-row gradient g̃ = (c + (2/τ)Px, b, h) and the scalar Schur + // denominator g̃ᵀp − κ/τ − xᵀPx/τ² (the last two terms are the τ/κ + // ray and the quadratic coupling; both vanish for an LP). + let two_over_tau = 2.0 / tau; + let gtp = dot(&prob.c, &p_x) + + two_over_tau * dot(&px_vec, &p_x) + + dot(&prob.b, &p_y) + + dot(&prob.h, &p_z); + let denom = gtp - kappa / tau - xpx / (tau * tau); + + // === Predictor (affine, σ = 0) === + cone.comp_residual(&s, &z, 0.0, &mut r_c); + cone.rhs_comp_term(&s, &z, &r_c, &mut comp); + build_rhs(&rho_x, &rho_y, &rho_z, &comp, n, m_eq, m_ineq, &mut rhs); + if fact.solve_one(&mut rhs).is_err() { + status = if near_opt { + QpStatus::Optimal + } else { + QpStatus::NumericalFailure + }; + break; + } + split_step(&rhs, n, m_eq, m_ineq, &mut dx, &mut dy, &mut dz); + let gtq = dot(&prob.c, &dx) + + two_over_tau * dot(&px_vec, &dx) + + dot(&prob.b, &dy) + + dot(&prob.h, &dz); + // Δτ = [−ρ_τ − g̃ᵀq − (σμ − τκ)/τ] / denom; predictor σμ = 0, + // so −(0 − τκ)/τ = +κ. + let dtau_aff = (-rho_tau - gtq + kappa) / denom; + // Full affine directions dw = q + Δτ·p (only dz needed downstream). + for i in 0..m_ineq { + dz_aff[i] = dz[i] + dtau_aff * p_z[i]; + } + let dkappa_aff = (-tau * kappa - kappa * dtau_aff) / tau; + cone.recover_ds(&s, &z, &r_c, &dz_aff, &mut ds_aff); + + // Affine step length over the cone and the τ/κ rays. + let mut alpha_aff = + ray_step(tau, dtau_aff, opts.tau).min(ray_step(kappa, dkappa_aff, opts.tau)); + if m_ineq > 0 { + alpha_aff = alpha_aff + .min(cone.max_step(&s, &ds_aff, opts.tau)) + .min(cone.max_step(&z, &dz_aff, opts.tau)); + } + // μ_aff and Mehrotra centering σ = (μ_aff/μ)³. + let mut dot_aff = (tau + alpha_aff * dtau_aff) * (kappa + alpha_aff * dkappa_aff); + for i in 0..m_ineq { + dot_aff += (s[i] + alpha_aff * ds_aff[i]) * (z[i] + alpha_aff * dz_aff[i]); + } + let mu_aff = dot_aff / (degree as f64 + 1.0); + let sigma = if mu > 0.0 { (mu_aff / mu).powi(3) } else { 0.0 }; + let sigma_mu = sigma * mu; + + // === Corrector (centered target + second-order term) === + cone.comp_residual_corrector(&s, &z, &ds_aff, &dz_aff, sigma_mu, &mut r_c); + cone.rhs_comp_term(&s, &z, &r_c, &mut comp); + build_rhs(&rho_x, &rho_y, &rho_z, &comp, n, m_eq, m_ineq, &mut rhs); + if fact.solve_one(&mut rhs).is_err() { + status = if near_opt { + QpStatus::Optimal + } else { + QpStatus::NumericalFailure + }; + break; + } + split_step(&rhs, n, m_eq, m_ineq, &mut dx, &mut dy, &mut dz); + let gtq = dot(&prob.c, &dx) + + two_over_tau * dot(&px_vec, &dx) + + dot(&prob.b, &dy) + + dot(&prob.h, &dz); + // τκ corrector residual: τκ + Δτ_aff·Δκ_aff (target σμ). + let r_tk = tau * kappa + dtau_aff * dkappa_aff; + let dtau = (-rho_tau - gtq - (sigma_mu - r_tk) / tau) / denom; + // Combine: dw = q + Δτ·p. + for i in 0..n { + dx[i] += dtau * p_x[i]; + } + for i in 0..m_eq { + dy[i] += dtau * p_y[i]; + } + for i in 0..m_ineq { + dz[i] += dtau * p_z[i]; + } + let dkappa = (sigma_mu - r_tk - kappa * dtau) / tau; + cone.recover_ds(&s, &z, &r_c, &dz, &mut ds); + + // Single fraction-to-boundary step (HSDE is primal/dual-symmetric). + let mut alpha = ray_step(tau, dtau, opts.tau).min(ray_step(kappa, dkappa, opts.tau)); + if m_ineq > 0 { + alpha = alpha + .min(cone.max_step(&s, &ds, opts.tau)) + .min(cone.max_step(&z, &dz, opts.tau)); + } + + // Debugger checkpoint: the combined Newton direction and the single + // symmetric step length are known but not yet applied (α reported + // in both the primal and dual slots). + // Stepping record: the residuals/μ/objective at the start of this + // iteration, paired with the symmetric step length just computed. + if opts.collect_iterates { + trace.push(QpIterate { + iter: it, + objective: obj_hat, + primal_infeasibility: pres, + dual_infeasibility: dres, + mu, + alpha_primal: alpha, + alpha_dual: alpha, + }); + } + + if hook.is_some() { + let mut st = ConvexDebugState { + cp: Checkpoint::AfterSearchDirection, + iter: it as i32, + mu, + pinf: pres, + dinf: dres, + res, + obj: obj_hat, + alpha: (alpha, alpha), + x: &mut x, + s: &mut s, + y: &mut y, + z: &mut z, + dx: &dx, + dy: &dy, + dz: &dz, + ds: &ds, + tau: Some(&mut tau), + kappa: Some(&mut kappa), + status: None, + }; + if fire(&mut hook, &mut st) == DebugAction::Stop { + break; + } + } + + for i in 0..n { + x[i] += alpha * dx[i]; + } + for i in 0..m_eq { + y[i] += alpha * dy[i]; + } + for i in 0..m_ineq { + s[i] += alpha * ds[i]; + z[i] += alpha * dz[i]; + } + tau += alpha * dtau; + kappa += alpha * dkappa; + + // Debugger checkpoint: the new homogeneous iterate is in place. + if hook.is_some() { + // Recompute the objective at the *new* point (`x`, `τ` just moved). + let mut pxn = vec![0.0; n]; + prob.p_mul(&x, &mut pxn); + let obj_hat = 0.5 * dot(&x, &pxn) / (tau * tau) + dot(&prob.c, &x) / tau; + let mut st = ConvexDebugState { + cp: Checkpoint::AfterStep, + iter: it as i32, + mu, + pinf: pres, + dinf: dres, + res, + obj: obj_hat, + alpha: (alpha, alpha), + x: &mut x, + s: &mut s, + y: &mut y, + z: &mut z, + dx: &dx, + dy: &dy, + dz: &dz, + ds: &ds, + tau: Some(&mut tau), + kappa: Some(&mut kappa), + status: None, + }; + if fire(&mut hook, &mut st) == DebugAction::Stop { + break; + } + } + } + + // Un-homogenize: divide by τ to recover the original-space solution. + let inv = if tau.abs() > 0.0 { 1.0 / tau } else { 1.0 }; + let mut x: Vec = x.iter().map(|v| v * inv).collect(); + let mut y: Vec = y.iter().map(|v| v * inv).collect(); + let mut z: Vec = z.iter().map(|v| v * inv).collect(); + // Objective ½xᵀPx + cᵀx. + let mut px = vec![0.0; n]; + prob.p_mul(&x, &mut px); + let obj = 0.5 * dot(&x, &px) + dot(&prob.c, &x); + + // Debugger post-mortem at the recovered (un-homogenized) solution. `s` + // stays in its homogeneous scaling; `dx`/… are the last step. + if hook.is_some() { + let status_str = format!("{status:?}"); + let mut st = ConvexDebugState { + cp: Checkpoint::Terminated, + iter: iters as i32, + mu: 0.0, + pinf: 0.0, + dinf: 0.0, + res: 0.0, + obj, + alpha: (0.0, 0.0), + x: &mut x, + s: &mut s, + y: &mut y, + z: &mut z, + dx: &dx, + dy: &dy, + dz: &dz, + ds: &ds, + tau: Some(&mut tau), + kappa: Some(&mut kappa), + status: Some(&status_str), + }; + let _ = fire(&mut hook, &mut st); + } + + QpSolution { + status, + x, + y, + z, + z_lb: vec![0.0; n], + z_ub: vec![0.0; n], + obj, + iters, + iterates: trace, + } +} + +fn failed(prob: &QpProblem) -> QpSolution { + QpSolution { + status: QpStatus::NumericalFailure, + x: vec![0.0; prob.n], + y: vec![0.0; prob.m_eq()], + z: vec![1.0; prob.m_ineq()], + z_lb: vec![0.0; prob.n], + z_ub: vec![0.0; prob.n], + obj: 0.0, + iters: 0, + iterates: Vec::new(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cones::ConeSpec; + use crate::ipm::{solve_qp_ipm, solve_socp_ipm}; + use crate::qp::{QpProblem, Triplet}; + use pounce_feral::FeralSolverInterface; + use pounce_linsol::SparseSymLinearSolverInterface; + + fn backend() -> Box { + Box::new(FeralSolverInterface::new()) + } + + fn opts() -> QpOptions { + QpOptions { + max_iter: 200, + ..QpOptions::default() + } + } + + /// Solve the same (P=0) problem with the HSDE driver and the direct + /// driver; assert both converge and agree on the primal. + fn assert_agrees(prob: &QpProblem, specs: &[ConeSpec], tol: f64) -> QpSolution { + let cone = CompositeCone::from_specs(specs); + let hsde = solve_conic_hsde(prob, &cone, &opts(), backend, None); + let direct = solve_socp_ipm(prob, specs, &opts(), backend); + assert_eq!(hsde.status, QpStatus::Optimal, "HSDE not optimal"); + assert_eq!(direct.status, QpStatus::Optimal, "direct not optimal"); + assert_eq!(hsde.x.len(), direct.x.len()); + for i in 0..hsde.x.len() { + assert!( + (hsde.x[i] - direct.x[i]).abs() < tol, + "x[{i}] HSDE {} vs direct {}", + hsde.x[i], + direct.x[i] + ); + } + hsde + } + + /// LP with one inequality and a known vertex optimum. + /// min −x0 − x1 s.t. x0+x1 ≤ 1, x ≥ 0 → obj −1 on the face x0+x1=1. + #[test] + fn lp_inequality_matches_direct() { + // rows: x0+x1 ≤ 1 ; −x0 ≤ 0 ; −x1 ≤ 0 (all nonneg slacks) + let prob = QpProblem { + n: 2, + p_lower: vec![], + c: vec![-1.0, -1.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(1, 0, -1.0), + Triplet::new(2, 1, -1.0), + ], + h: vec![1.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let sol = assert_agrees(&prob, &[ConeSpec::Nonneg(3)], 1e-6); + assert!((sol.obj + 1.0).abs() < 1e-6, "obj {}", sol.obj); + assert!((sol.x[0] + sol.x[1] - 1.0).abs() < 1e-6); + } + + /// LP with an equality constraint: min cᵀx s.t. 1ᵀx = 1, x ≥ 0. + /// min x0 + 2x1 s.t. x0+x1=1, x≥0 → x=(1,0), obj 1. + #[test] + fn lp_equality_matches_direct() { + let prob = QpProblem { + n: 2, + p_lower: vec![], + c: vec![1.0, 2.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + b: vec![1.0], + g: vec![Triplet::new(0, 0, -1.0), Triplet::new(1, 1, -1.0)], + h: vec![0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let sol = assert_agrees(&prob, &[ConeSpec::Nonneg(2)], 1e-6); + assert!((sol.obj - 1.0).abs() < 1e-5, "obj {}", sol.obj); + assert!(sol.x[0] > 0.99 && sol.x[1] < 1e-4, "x {:?}", sol.x); + } + + /// SOCP norm minimization: min t s.t. (t, x−a) ∈ SOC(3). + /// With G=−I, h=(0,−a0,−a1): optimum t=0, x=a. + #[test] + fn socp_norm_min_matches_direct() { + let a = [2.0_f64, -1.0]; + let prob = QpProblem { + n: 3, + p_lower: vec![], + c: vec![1.0, 0.0, 0.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, -1.0), + Triplet::new(1, 1, -1.0), + Triplet::new(2, 2, -1.0), + ], + h: vec![0.0, -a[0], -a[1]], + lb: vec![], + ub: vec![], + }; + let sol = assert_agrees(&prob, &[ConeSpec::SecondOrder(3)], 1e-5); + assert!(sol.x[0].abs() < 1e-5, "t {}", sol.x[0]); + assert!((sol.x[1] - a[0]).abs() < 1e-5 && (sol.x[2] - a[1]).abs() < 1e-5); + } + + /// Mixed cone: a nonneg row and a second-order block together. + /// min −x1 s.t. x1 ≤ 0.5 (nonneg), ‖x‖ ≤ 1 (soc (1,x0,x1)). + #[test] + fn socp_mixed_matches_direct() { + let prob = QpProblem { + n: 2, + p_lower: vec![], + c: vec![0.0, -1.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 1, 1.0), // nonneg: 0.5 − x1 ≥ 0 + Triplet::new(2, 0, -1.0), // soc s1 = x0 + Triplet::new(3, 1, -1.0), // soc s2 = x1 + ], + h: vec![0.5, 1.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + assert_agrees( + &prob, + &[ConeSpec::Nonneg(1), ConeSpec::SecondOrder(3)], + 1e-5, + ); + } + + /// Equality-constrained QP with a closed-form optimum: + /// min ½‖x‖² − pᵀx s.t. 1ᵀx = 1 → x = p + (1 − Σp)/n. + #[test] + fn qp_equality_closed_form() { + let p = [0.2_f64, 0.5, 0.1]; + let n = 3; + let prob = QpProblem { + n, + p_lower: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(1, 1, 1.0), + Triplet::new(2, 2, 1.0), + ], + c: vec![-p[0], -p[1], -p[2]], + a: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(0, 2, 1.0), + ], + b: vec![1.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let sol = assert_agrees(&prob, &[], 1e-6); + let shift = (1.0 - p.iter().sum::()) / n as f64; + for i in 0..n { + assert!((sol.x[i] - (p[i] + shift)).abs() < 1e-6, "x {:?}", sol.x); + } + } + + /// Inequality QP with a known optimum: + /// min ‖x‖² − 3x0 − 4x1 s.t. x0+x1 ≤ 1, x ≥ 0 → x = (0.25, 0.75). + #[test] + fn qp_inequality_matches_direct() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-3.0, -4.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(1, 0, -1.0), + Triplet::new(2, 1, -1.0), + ], + h: vec![1.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let sol = assert_agrees(&prob, &[ConeSpec::Nonneg(3)], 1e-6); + assert!((sol.x[0] - 0.25).abs() < 1e-5 && (sol.x[1] - 0.75).abs() < 1e-5); + assert!((sol.obj + 3.125).abs() < 1e-5, "obj {}", sol.obj); + } + + /// Quadratic objective *and* a second-order cone together (P in the + /// (x,x) block, SOC scaling in the (z,z) block): + /// min ‖x‖² − 3x0 − 4x1 s.t. ‖x‖ ≤ 1 (slack (1, x0, x1) ∈ SOC). + #[test] + fn qp_with_soc_matches_direct() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-3.0, -4.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(1, 0, -1.0), Triplet::new(2, 1, -1.0)], + h: vec![1.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let sol = assert_agrees(&prob, &[ConeSpec::SecondOrder(3)], 1e-5); + // Constraint active: the optimum lies on the unit ball. + assert!( + (sol.x[0].hypot(sol.x[1]) - 1.0).abs() < 1e-5, + "x {:?}", + sol.x + ); + } + + /// Primal-infeasible LP: x ≥ 2 and x ≤ 1. + #[test] + fn detects_primal_infeasible() { + let prob = QpProblem { + n: 1, + p_lower: vec![], + c: vec![1.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, -1.0), Triplet::new(1, 0, 1.0)], + h: vec![-2.0, 1.0], // −x ≤ −2 (x≥2) ; x ≤ 1 + lb: vec![], + ub: vec![], + }; + let cone = CompositeCone::from_specs(&[ConeSpec::Nonneg(2)]); + let sol = solve_conic_hsde(&prob, &cone, &opts(), backend, None); + assert_eq!(sol.status, QpStatus::PrimalInfeasible); + } + + /// The `use_hsde` flag routes a bound-constrained QP through the + /// embedding via the *public* entry point (exercising bound expansion + /// into cone rows and the z_lb/z_ub split on the way back). The result + /// must match the default driver. + #[test] + fn flag_routes_through_public_entry_with_bounds() { + // min ‖x‖² − 3x0 − 4x1 s.t. x0+x1 ≤ 1, 0 ≤ x ≤ 1. + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-3.0, -4.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + h: vec![1.0], + lb: vec![0.0, 0.0], + ub: vec![1.0, 1.0], + }; + let direct = solve_qp_ipm(&prob, &opts(), backend); + let hsde_opts = QpOptions { + use_hsde: true, + ..opts() + }; + let hsde = solve_qp_ipm(&prob, &hsde_opts, backend); + assert_eq!(direct.status, QpStatus::Optimal); + assert_eq!(hsde.status, QpStatus::Optimal); + for i in 0..2 { + assert!( + (direct.x[i] - hsde.x[i]).abs() < 1e-5, + "x[{i}] direct {} vs hsde {}", + direct.x[i], + hsde.x[i] + ); + // Bound multipliers must survive the round-trip split. + assert!((direct.z_lb[i] - hsde.z_lb[i]).abs() < 1e-5); + assert!((direct.z_ub[i] - hsde.z_ub[i]).abs() < 1e-5); + } + assert!((direct.x[0] - 0.25).abs() < 1e-5 && (direct.x[1] - 0.75).abs() < 1e-5); + } + + /// Dual-infeasible / unbounded LP: min −x s.t. x ≥ 0 (no upper bound). + #[test] + fn detects_dual_infeasible() { + let prob = QpProblem { + n: 1, + p_lower: vec![], + c: vec![-1.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, -1.0)], + h: vec![0.0], + lb: vec![], + ub: vec![], + }; + let cone = CompositeCone::from_specs(&[ConeSpec::Nonneg(1)]); + let sol = solve_conic_hsde(&prob, &cone, &opts(), backend, None); + assert_eq!(sol.status, QpStatus::DualInfeasible); + } + + /// SDP `max λ s.t. M − λI ⪰ 0` ⇒ `λ = λ_min(M)`. Diagonal `M = diag(2,5)` + /// (λ_min = 2): the PSD slack `s = svec(M − λI)` exercises the dense + /// `(z,z)` block on a diagonal matrix. Solved through the public conic + /// entry `solve_socp_ipm` with a `Psd(2)` cone. + #[test] + fn psd_min_eigenvalue_diagonal() { + // x = (λ); minimize −λ. G·x places λ on the diagonal svec entries + // (positions 0 and 2 for a 2×2), h = svec(M), s = svec(M − λI) ⪰ 0. + let prob = QpProblem { + n: 1, + p_lower: vec![], + c: vec![-1.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0), Triplet::new(2, 0, 1.0)], + h: vec![2.0, 0.0, 5.0], // svec(diag(2,5)) + lb: vec![], + ub: vec![], + }; + let sol = solve_socp_ipm(&prob, &[ConeSpec::Psd(2)], &opts(), backend); + assert_eq!(sol.status, QpStatus::Optimal, "{:?}", sol.status); + assert!((sol.x[0] - 2.0).abs() < 1e-5, "λ = {}", sol.x[0]); + assert!((sol.obj + 2.0).abs() < 1e-5, "obj = {}", sol.obj); + } + + /// Same SDP with a **non-diagonal** `M = [[2,1],[1,2]]` (λ_min = 1), so + /// the PSD slack has a nonzero off-diagonal — exercising the off-diagonal + /// entries of the dense `W ⊗ₛ W` scaling block. + #[test] + fn psd_min_eigenvalue_offdiagonal() { + let r2 = std::f64::consts::SQRT_2; + let prob = QpProblem { + n: 1, + p_lower: vec![], + c: vec![-1.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0), Triplet::new(2, 0, 1.0)], + h: vec![2.0, r2, 2.0], // svec([[2,1],[1,2]]) + lb: vec![], + ub: vec![], + }; + let sol = solve_socp_ipm(&prob, &[ConeSpec::Psd(2)], &opts(), backend); + assert_eq!(sol.status, QpStatus::Optimal, "{:?}", sol.status); + assert!((sol.x[0] - 1.0).abs() < 1e-5, "λ = {}", sol.x[0]); + assert!((sol.obj + 1.0).abs() < 1e-5, "obj = {}", sol.obj); + } + + /// A block-diagonal PSD cone (4×4 = two 2×2 blocks, no cross coupling) + /// decomposes into two `Psd(2)` cones, dropping the structurally-zero + /// cross rows. svec(4×4) indices: diag at k∈{0,4,7,9}; the within-block + /// off-diagonals (1,0)=k1 and (3,2)=k8 are present; the cross entries + /// k∈{2,3,5,6} are absent. + #[test] + fn psd_decompose_splits_block_diagonal() { + use crate::ipm::decompose_psd; + let prob = QpProblem { + n: 1, + p_lower: vec![], + c: vec![0.0], + a: vec![], + b: vec![], + g: vec![], + h: vec![1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], + lb: vec![], + ub: vec![], + }; + let (_p2, cones2, row_map) = decompose_psd(&prob, &[ConeSpec::Psd(4)]); + assert_eq!(cones2, vec![ConeSpec::Psd(2), ConeSpec::Psd(2)]); + assert_eq!(row_map, vec![0, 1, 4, 7, 8, 9]); // cross rows 2,3,5,6 dropped + } + + /// A genuinely coupled PSD cone (a cross entry present) stays one block. + #[test] + fn psd_decompose_keeps_coupled() { + use crate::ipm::decompose_psd; + let prob = QpProblem { + n: 1, + p_lower: vec![], + c: vec![0.0], + a: vec![], + b: vec![], + // k=2 is the cross entry (2,0); making it present couples the blocks. + h: vec![1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], + g: vec![], + lb: vec![], + ub: vec![], + }; + let (_p2, cones2, _) = decompose_psd(&prob, &[ConeSpec::Psd(4)]); + assert_eq!(cones2, vec![ConeSpec::Psd(4)]); + } + + /// End-to-end: a block-diagonal SDP declared as a single `Psd(4)` cone + /// solves correctly through the auto-decomposition. `max λ s.t. M−λI⪰0` + /// with `M = blkdiag([[2,1],[1,2]], [[4,1],[1,4]])` has + /// `λ_min(M) = min(1, 3) = 1`. The decomposed cross rows get dual 0. + #[test] + fn psd_block_diagonal_solves_end_to_end() { + let r2 = std::f64::consts::SQRT_2; + // G column = svec(I₄): diagonal entries k ∈ {0,4,7,9}. + let prob = QpProblem { + n: 1, + p_lower: vec![], + c: vec![-1.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(4, 0, 1.0), + Triplet::new(7, 0, 1.0), + Triplet::new(9, 0, 1.0), + ], + // svec(M): (0,0)=2,(1,0)=√2,(1,1)=2 | (2,2)=4,(3,2)=√2,(3,3)=4. + h: vec![2.0, r2, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, r2, 4.0], + lb: vec![], + ub: vec![], + }; + let sol = solve_socp_ipm(&prob, &[ConeSpec::Psd(4)], &opts(), backend); + assert_eq!(sol.status, QpStatus::Optimal, "{:?}", sol.status); + assert!((sol.x[0] - 1.0).abs() < 1e-5, "λ = {}", sol.x[0]); + assert!((sol.obj + 1.0).abs() < 1e-5, "obj = {}", sol.obj); + // z is returned in the original 10-row layout (dropped rows = 0). + assert_eq!(sol.z.len(), 10); + for &k in &[2usize, 3, 5, 6] { + assert_eq!(sol.z[k], 0.0, "dropped cross row {k} should have dual 0"); + } + } + + /// Connected **sparse** PSD cone: chordal range-space decomposition. + /// `max λ s.t. M − λI ⪰ 0` with tridiagonal `M` (path 0–1–2, so the + /// (2,0) entry is structurally zero). The pattern is chordal with + /// overlapping cliques {0,1},{1,2}, so `solve_socp_ipm` rewrites it via + /// clique blocks + consistency equalities. The optimum (`λ = λ_min(M)`) + /// and objective must match a direct **dense** `Psd(3)` solve (the primal + /// is unique; the PSD dual is not, so only x/obj are compared). + #[test] + fn psd_chordal_matches_dense_on_path_sdp() { + let r2 = std::f64::consts::SQRT_2; + // svec(M), M tridiagonal diag 2, off 0.5: (2,0)=k2 is structurally 0. + let prob = QpProblem { + n: 1, + p_lower: vec![], + c: vec![-1.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(3, 0, 1.0), + Triplet::new(5, 0, 1.0), + ], + h: vec![2.0, 0.5 * r2, 0.0, 2.0, 0.5 * r2, 2.0], + lb: vec![], + ub: vec![], + }; + // Dense reference: the HSDE driver on a single Psd(3) (no decomposition). + let dense = solve_conic_hsde( + &prob, + &CompositeCone::from_specs(&[ConeSpec::Psd(3)]), + &opts(), + backend, + None, + ); + // solve_socp_ipm auto-applies the chordal decomposition. + let decomp = solve_socp_ipm(&prob, &[ConeSpec::Psd(3)], &opts(), backend); + assert_eq!(dense.status, QpStatus::Optimal, "dense {:?}", dense.status); + assert_eq!( + decomp.status, + QpStatus::Optimal, + "decomp {:?}", + decomp.status + ); + assert!( + (dense.x[0] - decomp.x[0]).abs() < 1e-5, + "λ: dense {} vs decomp {}", + dense.x[0], + decomp.x[0] + ); + assert!( + (dense.obj - decomp.obj).abs() < 1e-5, + "obj: dense {} vs decomp {}", + dense.obj, + decomp.obj + ); + assert_eq!(decomp.z.len(), 6, "dual returned in original svec layout"); + } +} diff --git a/crates/pounce-convex/src/hsde_nonsym.rs b/crates/pounce-convex/src/hsde_nonsym.rs new file mode 100644 index 00000000..de2cd118 --- /dev/null +++ b/crates/pounce-convex/src/hsde_nonsym.rs @@ -0,0 +1,1586 @@ +//! Non-symmetric homogeneous self-dual embedding driver (Phases H5–H6). +//! +//! The non-symmetric counterpart of [`crate::hsde`]. It solves +//! `min cᵀx s.t. Ax = b, Gx + s = h, s ∈ K` where `K` is a product of +//! nonnegative-orthant, second-order, **exponential**, and **power** cones, +//! via the same homogeneous self-dual embedding and two-solve τ scheme. The +//! exp/power blocks use the **dual-aware primal–dual scaling** of Dahl & +//! Andersen (2021) (in place of a Nesterov–Todd point); the orthant and +//! second-order blocks are self-scaled and reuse their NT machinery, so all +//! four cone families coexist in one KKT. +//! +//! ## What differs from the symmetric driver +//! +//! The whole non-symmetric algorithm collapses onto the symmetric structure +//! once the right scaling `M = WᵀW` is in hand (see `dev-notes/hsde.md`): +//! +//! - the cone's `(z, z)` block is `−M⁻¹` (dense 3×3 for the exp cone), which +//! reduces to `−diag(s/z) = −W²` for the orthant and to the primal-Hessian +//! block `−(1/μ)∇²F⁻¹` on the central path; +//! - the complementarity right-hand side is `rc = −z + γμ·s̃ − η` with +//! `s̃ = −∇F(s)` the shadow dual (the corrector `η` is Phase-H5b; here 0), +//! `comp_term = −M⁻¹·rc`, and the slack recovery `Δs = −comp_term − M⁻¹·Δz`; +//! - for the orthant this is **identical** to the symmetric Mehrotra step, +//! which is the correctness anchor; +//! - the exp cone has no closed-form fraction-to-boundary, so the step length +//! is found by backtracking on cone membership. +//! +//! The barrier oracles, conjugate-gradient shadow iterate, and the scaling +//! itself live in [`crate::cones::exp`]; this module is the outer iteration. + +use crate::cones::{BarrierCone, Cone, ConeBlock, ExponentialCone, PowerCone, SecondOrderCone}; +use crate::debug::{fire, ConvexDebugState}; +use crate::ipm::{build_rhs, detect_infeasibility, dot, inf_norm, split_step, QpOptions}; +use crate::qp::{QpProblem, QpSolution, QpStatus}; +use pounce_common::debug::{Checkpoint, DebugAction, DebugHook}; +use pounce_common::types::{Index, Number}; +use pounce_linsol::{Factorization, SparseSymLinearSolverInterface}; +use std::collections::BTreeMap; + +/// A 3-dimensional non-symmetric cone the driver supports. It implements +/// [`BarrierCone`] by dispatching to the concrete cone, so the generic scaling +/// / conjugate-gradient / corrector machinery (in [`crate::cones::nonsym`]) +/// works over it unchanged. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum NonsymCone { + /// The exponential cone. + Exp(ExponentialCone), + /// The power cone `K_α`. + Power(PowerCone), +} + +macro_rules! ns_dispatch { + ($self:ident, $c:ident => $body:expr) => { + match $self { + NonsymCone::Exp($c) => $body, + NonsymCone::Power($c) => $body, + } + }; +} + +impl BarrierCone for NonsymCone { + fn barrier_degree(&self) -> f64 { + ns_dispatch!(self, c => c.barrier_degree()) + } + fn barrier(&self, p: &[f64]) -> f64 { + ns_dispatch!(self, c => c.barrier(p)) + } + fn barrier_grad(&self, p: &[f64], out: &mut [f64]) { + ns_dispatch!(self, c => c.barrier_grad(p, out)) + } + fn barrier_hess_lower(&self, p: &[f64], out: &mut [f64]) { + ns_dispatch!(self, c => c.barrier_hess_lower(p, out)) + } + fn in_primal_cone(&self, p: &[f64], tol: f64) -> bool { + ns_dispatch!(self, c => c.in_primal_cone(p, tol)) + } + fn in_dual_cone(&self, p: &[f64], tol: f64) -> bool { + ns_dispatch!(self, c => c.in_dual_cone(p, tol)) + } + fn interior_reference(&self, out: &mut [f64]) { + ns_dispatch!(self, c => c.interior_reference(out)) + } +} + +/// One block of the cone product, by row offset. The non-symmetric driver +/// also accepts self-scaled **second-order** cones (handled via their NT +/// scaling), so an exp/power problem can carry SOC constraints too. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum NsBlock { + /// Nonnegative orthant of the given number of rows. + Orthant(usize), + /// Second-order (Lorentz) cone of the given dimension. + SecondOrder(usize), + /// A 3-dimensional non-symmetric cone (exponential or power). + Nonsym(NonsymCone), +} + +impl NsBlock { + /// A 3-dimensional exponential-cone block. + pub fn exp() -> Self { + NsBlock::Nonsym(NonsymCone::Exp(ExponentialCone)) + } + /// A 3-dimensional power-cone block `K_α`. + pub fn power(alpha: f64) -> Self { + NsBlock::Nonsym(NonsymCone::Power(PowerCone::new(alpha))) + } + + fn dim(&self) -> usize { + match self { + NsBlock::Orthant(n) | NsBlock::SecondOrder(n) => *n, + NsBlock::Nonsym(_) => 3, + } + } + /// Barrier degree (orthant: its dimension; second-order cone: 2; + /// a 3-D non-symmetric cone: 3). + fn degree(&self) -> usize { + match self { + NsBlock::Orthant(n) => *n, + NsBlock::SecondOrder(_) => 2, + NsBlock::Nonsym(_) => 3, + } + } +} + +/// The cone product with each block's row offset precomputed. +pub(crate) struct NsCone { + blocks: Vec<(usize, NsBlock)>, + dim: usize, + degree: usize, +} + +impl NsCone { + pub(crate) fn new(specs: &[NsBlock]) -> Self { + let mut blocks = Vec::with_capacity(specs.len()); + let (mut dim, mut degree) = (0, 0); + for b in specs { + blocks.push((dim, *b)); + dim += b.dim(); + degree += b.degree(); + } + NsCone { + blocks, + dim, + degree, + } + } + + /// Self-dual starting iterate `e` (orthant: ones; non-symmetric cone: the + /// cone's `interior_reference`, which lies in both `K` and `K*`). The + /// corrector recenters from here, so an exact central point is not needed. + fn identity(&self, out: &mut [f64]) { + for (off, b) in &self.blocks { + match b { + NsBlock::Orthant(n) => { + for v in &mut out[*off..off + n] { + *v = 1.0; + } + } + NsBlock::SecondOrder(m) => { + // e = (1, 0, …, 0), the SOC identity / well-centered start. + for v in &mut out[*off..off + m] { + *v = 0.0; + } + out[*off] = 1.0; + } + NsBlock::Nonsym(cone) => { + cone.interior_reference(&mut out[*off..off + 3]); + } + } + } + } +} + +/// Fraction-to-boundary step for a positive scalar ray `v + α dv > 0`. +fn ray_step(v: f64, dv: f64, tau: f64) -> f64 { + if dv < 0.0 { + (tau * (-v / dv)).min(1.0) + } else { + 1.0 + } +} + +/// Per-block, per-iteration scaling data: `M⁻¹` (applied in the RHS and +/// recovery) and the shadow dual `s̃ = −∇F(s)`. +enum BlockScaling { + /// Orthant: `M⁻¹ = diag(s/z)`, `s̃ = 1/s`. + Orthant { + sz_ratio: Vec, + s_tilde: Vec, + }, + /// Second-order cone: its NT scaling `W² = diag(d) + u uᵀ`, kept in + /// diag-plus-rank-1 form so the recover step applies `W²·Δz` cheaply. + SecondOrder { diag: Vec, u: Vec }, + /// Non-symmetric cone (exp/power): dense `M⁻¹` (3×3) and the shadow dual. + Nonsym { + minv: [[f64; 3]; 3], + s_tilde: [f64; 3], + }, +} + +/// KKT value-array positions for one cone block. +enum ZPos { + /// Orthant: one diagonal value position per row. + Diag(Vec), + /// Second-order cone: the dense lower-triangle value positions, row-major + /// `[(0,0); (1,0),(1,1); …]` (length `m(m+1)/2`). + SecondOrder { dim: usize, pos: Vec }, + /// Exp/power: the three diagonal positions and the three strict-lower + /// positions `(1,0),(2,0),(2,1)`. + Dense { diag: [usize; 3], lower: [usize; 3] }, +} + +/// The constant KKT pattern (lower triangle, 1-based) plus the scaling-block +/// value positions, so each iteration only rewrites the cone block and +/// `refactor`s (reusing the symbolic factor). +struct NsKkt { + airn: Vec, + ajcn: Vec, + values: Vec, + dim: usize, + z_pos: Vec, +} + +impl NsKkt { + fn build(prob: &QpProblem, cone: &NsCone, reg: f64) -> Self { + let n = prob.n; + let m_eq = prob.m_eq(); + let m_ineq = prob.m_ineq(); + let mut entries: BTreeMap<(usize, usize), f64> = BTreeMap::new(); + let mut add = |r: usize, c: usize, v: f64| { + let (r, c) = if r >= c { (r, c) } else { (c, r) }; + *entries.entry((r, c)).or_insert(0.0) += v; + }; + // (x,x): P + reg·I. + for t in &prob.p_lower { + add(t.row, t.col, t.val); + } + for i in 0..n { + add(i, i, reg); + } + // (y,x): A; (y,y): −reg. + for t in &prob.a { + add(n + t.row, t.col, t.val); + } + for i in 0..m_eq { + add(n + i, n + i, -reg); + } + // (z,x): G. + for t in &prob.g { + add(n + m_eq + t.row, t.col, t.val); + } + // (z,z): per block, seeded with −reg on the diagonal. Exp blocks also + // reserve the strict-lower 3×3 off-diagonals (a genuine dense block). + for (off, b) in &cone.blocks { + let zb = n + m_eq + off; + match b { + NsBlock::Orthant(d) => { + for i in 0..*d { + add(zb + i, zb + i, -reg); + } + } + NsBlock::SecondOrder(m) => { + // Genuine dense m×m lower triangle for the NT scaling W². + for i in 0..*m { + for j in 0..=i { + add(zb + i, zb + j, if i == j { -reg } else { 0.0 }); + } + } + } + NsBlock::Nonsym(_) => { + for i in 0..3 { + add(zb + i, zb + i, -reg); + } + add(zb + 1, zb, 0.0); + add(zb + 2, zb, 0.0); + add(zb + 2, zb + 1, 0.0); + } + } + } + + let nnz = entries.len(); + let mut airn = Vec::with_capacity(nnz); + let mut ajcn = Vec::with_capacity(nnz); + let mut values = Vec::with_capacity(nnz); + let mut coord_to_pos: BTreeMap<(usize, usize), usize> = BTreeMap::new(); + for (pos, ((r, c), v)) in entries.into_iter().enumerate() { + airn.push((r + 1) as Index); + ajcn.push((c + 1) as Index); + values.push(v); + coord_to_pos.insert((r, c), pos); + } + + let mut z_pos = Vec::with_capacity(cone.blocks.len()); + for (off, b) in &cone.blocks { + let zb = n + m_eq + off; + match b { + NsBlock::Orthant(d) => { + z_pos.push(ZPos::Diag( + (0..*d).map(|i| coord_to_pos[&(zb + i, zb + i)]).collect(), + )); + } + NsBlock::SecondOrder(m) => { + let mut pos = Vec::with_capacity(m * (m + 1) / 2); + for i in 0..*m { + for j in 0..=i { + pos.push(coord_to_pos[&(zb + i, zb + j)]); + } + } + z_pos.push(ZPos::SecondOrder { dim: *m, pos }); + } + NsBlock::Nonsym(_) => { + let diag = [ + coord_to_pos[&(zb, zb)], + coord_to_pos[&(zb + 1, zb + 1)], + coord_to_pos[&(zb + 2, zb + 2)], + ]; + let lower = [ + coord_to_pos[&(zb + 1, zb)], + coord_to_pos[&(zb + 2, zb)], + coord_to_pos[&(zb + 2, zb + 1)], + ]; + z_pos.push(ZPos::Dense { diag, lower }); + } + } + } + let _ = m_ineq; + NsKkt { + airn, + ajcn, + values, + dim: n + m_eq + m_ineq, + z_pos, + } + } + + /// Write `−M⁻¹ − reg·I` into the cone block of `out` (a copy of + /// `self.values`), returning the per-block scaling for use in the RHS and + /// slack recovery. `None` if any exp scaling fails. + fn update_blocks( + &self, + cone: &NsCone, + s: &[f64], + z: &[f64], + reg: f64, + out: &mut [Number], + ) -> Option> { + let mut scalings = Vec::with_capacity(cone.blocks.len()); + for ((off, b), zp) in cone.blocks.iter().zip(&self.z_pos) { + match (b, zp) { + (NsBlock::Orthant(d), ZPos::Diag(pos)) => { + let mut sz_ratio = vec![0.0; *d]; + let mut s_tilde = vec![0.0; *d]; + for i in 0..*d { + let (si, zi) = (s[off + i], z[off + i]); + sz_ratio[i] = si / zi; // (M⁻¹)_ii + s_tilde[i] = 1.0 / si; // −∇F(s)_i + out[pos[i]] = -sz_ratio[i] - reg; + } + scalings.push(BlockScaling::Orthant { sz_ratio, s_tilde }); + } + (NsBlock::SecondOrder(m), ZPos::SecondOrder { dim, pos }) => { + debug_assert_eq!(m, dim); + let sb = &s[*off..off + m]; + let zb = &z[*off..off + m]; + // W² = diag(d) + u uᵀ from the SOC's NT scaling. + let (diag, u) = match SecondOrderCone::new(*m).kkt_block(sb, zb) { + ConeBlock::DiagPlusRank1 { diag, u } => (diag, u), + _ => unreachable!("SOC kkt_block is DiagPlusRank1"), + }; + // Write −W² − reg into the dense lower triangle. + let mut k = 0; + for i in 0..*m { + for j in 0..=i { + let mut w2 = u[i] * u[j]; + if i == j { + w2 += diag[i]; + } + out[pos[k]] = -w2 - if i == j { reg } else { 0.0 }; + k += 1; + } + } + scalings.push(BlockScaling::SecondOrder { diag, u }); + } + (NsBlock::Nonsym(nscone), ZPos::Dense { diag, lower }) => { + let sb = &s[*off..off + 3]; + let zb = &z[*off..off + 3]; + let (minv, s_tilde) = block_minv(nscone, sb, zb)?; + out[diag[0]] = -minv[0][0] - reg; + out[diag[1]] = -minv[1][1] - reg; + out[diag[2]] = -minv[2][2] - reg; + out[lower[0]] = -minv[1][0]; + out[lower[1]] = -minv[2][0]; + out[lower[2]] = -minv[2][1]; + scalings.push(BlockScaling::Nonsym { minv, s_tilde }); + } + _ => unreachable!("block/position shape mismatch"), + } + } + Some(scalings) + } +} + +/// `M⁻¹` and shadow dual for a non-symmetric cone block. Uses the dual-aware +/// scaling off the central path; falls back to the primal Hessian +/// `M = μ∇²F(s)` (so `M⁻¹ = (1/μ)∇²F⁻¹`) when the dual-aware scaling +/// degenerates (near-center). Generic over the cone (exp or power). +fn block_minv(cone: &C, s: &[f64], z: &[f64]) -> Option<([[f64; 3]; 3], [f64; 3])> { + use crate::cones::nonsym::{chol_solve3, scaling}; + if let Some(sc) = scaling(cone, s, z) { + if let Some(minv) = sc.minv() { + return Some((minv, sc.s_tilde)); + } + } + // Fallback: M = μ∇²F(s), μ = ⟨s,z⟩/3. + let mu = (s[0] * z[0] + s[1] * z[1] + s[2] * z[2]) / 3.0; + if mu <= 0.0 { + return None; + } + let mut hl = [0.0; 6]; + cone.barrier_hess_lower(s, &mut hl); + // M = μH ⇒ M⁻¹ = (1/μ)H⁻¹. + let scaled = [ + mu * hl[0], + mu * hl[1], + mu * hl[2], + mu * hl[3], + mu * hl[4], + mu * hl[5], + ]; + let c0 = chol_solve3(&scaled, &[1.0, 0.0, 0.0])?; + let c1 = chol_solve3(&scaled, &[0.0, 1.0, 0.0])?; + let c2 = chol_solve3(&scaled, &[0.0, 0.0, 1.0])?; + let minv = [ + [c0[0], c1[0], c2[0]], + [c0[1], c1[1], c2[1]], + [c0[2], c1[2], c2[2]], + ]; + let mut g = [0.0; 3]; + cone.barrier_grad(s, &mut g); + Some((minv, [-g[0], -g[1], -g[2]])) +} + +/// Apply a symmetric 3×3 to a 3-slice. +fn matvec3(m: &[[f64; 3]; 3], v: &[f64]) -> [f64; 3] { + [ + m[0][0] * v[0] + m[0][1] * v[1] + m[0][2] * v[2], + m[1][0] * v[0] + m[1][1] * v[1] + m[1][2] * v[2], + m[2][0] * v[0] + m[2][1] * v[1] + m[2][2] * v[2], + ] +} + +/// Predictor right-hand side. For orthant/non-symmetric blocks +/// `comp = −M⁻¹·rc`, `rc = −z + σμ·s̃`. For a second-order cone it is the +/// self-scaled term `Arw(z)⁻¹·(s∘z − σμe)` (the cone's `rhs_comp_term`). +fn comp_term( + cone: &NsCone, + scalings: &[BlockScaling], + s: &[f64], + z: &[f64], + sigma_mu: f64, + out: &mut [f64], +) { + for (&(off, b), sc) in cone.blocks.iter().zip(scalings) { + let d = b.dim(); + match sc { + BlockScaling::Orthant { sz_ratio, s_tilde } => { + for i in 0..d { + let rc = -z[off + i] + sigma_mu * s_tilde[i]; + out[off + i] = -sz_ratio[i] * rc; + } + } + BlockScaling::SecondOrder { .. } => { + let soc = SecondOrderCone::new(d); + let (sb, zb) = (&s[off..off + d], &z[off..off + d]); + let mut r_c = vec![0.0; d]; + soc.comp_residual(sb, zb, sigma_mu, &mut r_c); + soc.rhs_comp_term(sb, zb, &r_c, &mut out[off..off + d]); + } + BlockScaling::Nonsym { minv, s_tilde } => { + let rc = [ + -z[off] + sigma_mu * s_tilde[0], + -z[off + 1] + sigma_mu * s_tilde[1], + -z[off + 2] + sigma_mu * s_tilde[2], + ]; + let mc = matvec3(minv, &rc); + out[off] = -mc[0]; + out[off + 1] = -mc[1]; + out[off + 2] = -mc[2]; + } + } + } +} + +/// Corrector right-hand side: `comp_term = −M⁻¹·rc` with +/// `rc = −z + σμ·s̃ − η`, where `η` is the nonsymmetric corrector +/// (Dahl–Andersen eq. 16). For an orthant block `η_i = ds_aff_i·dz_aff_i/s_i` +/// — exactly the Mehrotra second-order term, so the orthant corrector +/// reproduces standard Mehrotra. For an exp block +/// `η = −½ F'''(s)[ds_aff, (∇²F(s))⁻¹ dz_aff]`. If the exp third-derivative +/// FD leaves the cone, `η = 0` for that block (still a valid centered step). +#[allow(clippy::too_many_arguments)] +fn comp_term_corr( + cone: &NsCone, + scalings: &[BlockScaling], + s: &[f64], + z: &[f64], + sigma_mu: f64, + ds_aff: &[f64], + dz_aff: &[f64], + out: &mut [f64], +) { + use crate::cones::nonsym::{chol_solve3, third_dir_apply}; + for (&(off, b), sc) in cone.blocks.iter().zip(scalings) { + let d = b.dim(); + match (b, sc) { + (_, BlockScaling::Orthant { sz_ratio, s_tilde }) => { + for i in 0..d { + let eta = s_tilde[i] * ds_aff[off + i] * dz_aff[off + i]; + let rc = -z[off + i] + sigma_mu * s_tilde[i] - eta; + out[off + i] = -sz_ratio[i] * rc; + } + } + (NsBlock::Nonsym(nscone), BlockScaling::Nonsym { minv, s_tilde }) => { + // η = −½ F'''(s)[ds_aff, H⁻¹ dz_aff], H = ∇²F(s) of *this* cone. + let sb = &s[off..off + 3]; + let mut hl = [0.0; 6]; + nscone.barrier_hess_lower(sb, &mut hl); + let dza = [dz_aff[off], dz_aff[off + 1], dz_aff[off + 2]]; + let hinv_dza = chol_solve3(&hl, &dza).unwrap_or([0.0; 3]); + let u = [ds_aff[off], ds_aff[off + 1], ds_aff[off + 2]]; + let eta = match third_dir_apply(&nscone, sb, &u, &hinv_dza) { + Some(t3) => [-0.5 * t3[0], -0.5 * t3[1], -0.5 * t3[2]], + None => [0.0; 3], + }; + let rc = [ + -z[off] + sigma_mu * s_tilde[0] - eta[0], + -z[off + 1] + sigma_mu * s_tilde[1] - eta[1], + -z[off + 2] + sigma_mu * s_tilde[2] - eta[2], + ]; + let mc = matvec3(minv, &rc); + out[off] = -mc[0]; + out[off + 1] = -mc[1]; + out[off + 2] = -mc[2]; + } + (NsBlock::SecondOrder(_), BlockScaling::SecondOrder { .. }) => { + // Self-scaled corrector: rhs from the Jordan second-order term + // s∘z + ds_aff∘dz_aff − σμe (the cone's own corrector). + let soc = SecondOrderCone::new(d); + let (sb, zb) = (&s[off..off + d], &z[off..off + d]); + let mut r_c = vec![0.0; d]; + soc.comp_residual_corrector( + sb, + zb, + &ds_aff[off..off + d], + &dz_aff[off..off + d], + sigma_mu, + &mut r_c, + ); + soc.rhs_comp_term(sb, zb, &r_c, &mut out[off..off + d]); + } + _ => unreachable!("block/scaling shape mismatch"), + } + } +} + +/// Recover the slack step `Δs = −comp_term − M⁻¹·Δz`. +fn recover_ds(cone: &NsCone, scalings: &[BlockScaling], comp: &[f64], dz: &[f64], ds: &mut [f64]) { + for (&(off, b), sc) in cone.blocks.iter().zip(scalings) { + let d = b.dim(); + match sc { + BlockScaling::Orthant { sz_ratio, .. } => { + for i in 0..d { + ds[off + i] = -comp[off + i] - sz_ratio[i] * dz[off + i]; + } + } + BlockScaling::SecondOrder { diag, u } => { + // Δs = −comp − W²·Δz, with W²·Δz = diag∘Δz + u·(uᵀΔz). + let dzb = &dz[off..off + d]; + let utdz: f64 = u.iter().zip(dzb).map(|(ui, di)| ui * di).sum(); + for i in 0..d { + ds[off + i] = -comp[off + i] - (diag[i] * dzb[i] + u[i] * utdz); + } + } + BlockScaling::Nonsym { minv, .. } => { + let mdz = matvec3(minv, &dz[off..off + 3]); + for i in 0..3 { + ds[off + i] = -comp[off + i] - mdz[i]; + } + } + } + } +} + +/// Largest `α ∈ (0, α_cap]` keeping `s + α ds ∈ int K` and `z + α dz ∈ int K*` +/// for every block, by closed form on orthant blocks and backtracking on exp +/// blocks (no closed-form boundary root). Returns a strictly interior step. +fn max_step( + cone: &NsCone, + s: &[f64], + ds: &[f64], + z: &[f64], + dz: &[f64], + tau: f64, + alpha_cap: f64, +) -> f64 { + let mut alpha = alpha_cap; + // Orthant + second-order cone closed forms first. + for &(off, b) in &cone.blocks { + if let NsBlock::SecondOrder(m) = b { + let soc = SecondOrderCone::new(m); + alpha = alpha.min(soc.max_step(&s[off..off + m], &ds[off..off + m], tau)); + alpha = alpha.min(soc.max_step(&z[off..off + m], &dz[off..off + m], tau)); + } + } + for &(off, b) in &cone.blocks { + if let NsBlock::Orthant(d) = b { + for i in 0..d { + alpha = alpha.min(ray_step(s[off + i], ds[off + i], tau)); + alpha = alpha.min(ray_step(z[off + i], dz[off + i], tau)); + } + } + } + // Backtrack on each non-symmetric block's membership (primal s ∈ K, dual + // z ∈ K*), using that block's own cone. + let interior = |alpha: f64| -> bool { + for &(off, b) in &cone.blocks { + if let NsBlock::Nonsym(nscone) = b { + let sp = [ + s[off] + alpha * ds[off], + s[off + 1] + alpha * ds[off + 1], + s[off + 2] + alpha * ds[off + 2], + ]; + let zp = [ + z[off] + alpha * dz[off], + z[off + 1] + alpha * dz[off + 1], + z[off + 2] + alpha * dz[off + 2], + ]; + if !nscone.in_primal_cone(&sp, 1e-12) || !nscone.in_dual_cone(&zp, 1e-12) { + return false; + } + } + } + true + }; + let mut bt = 0; + while !interior(alpha) && bt < 100 { + alpha *= 0.8; + bt += 1; + } + if bt >= 100 { + 0.0 + } else { + alpha + } +} + +/// Solve `min cᵀx s.t. Ax = b, Gx + s = h, s ∈ K` with `K` a product of +/// orthant and exponential cones, via the non-symmetric HSDE. +fn run_nonsym( + prob: &QpProblem, + specs: &[NsBlock], + opts: &QpOptions, + warm_x: Option<&[f64]>, + mut make_backend: F, + mut hook: Option<&mut dyn DebugHook>, +) -> QpSolution +where + F: FnMut() -> Box, +{ + let n = prob.n; + let m_eq = prob.m_eq(); + let m_ineq = prob.m_ineq(); + let cone = NsCone::new(specs); + debug_assert_eq!(cone.dim, m_ineq, "cone dim must cover all inequality rows"); + let degree = cone.degree; + + let kkt = NsKkt::build(prob, &cone, opts.reg); + let dim = kkt.dim; + + // Seed the factorization at the cone identity (any SPD block works). + let mut e = vec![0.0; m_ineq]; + cone.identity(&mut e); + let mut seed_vals = kkt.values.clone(); + if kkt + .update_blocks(&cone, &e, &e, opts.reg, &mut seed_vals) + .is_none() + { + return failed(prob); + } + let mut fact = match Factorization::new( + dim as Index, + kkt.airn.clone(), + kkt.ajcn.clone(), + seed_vals, + make_backend(), + ) { + Ok(f) => f, + Err(_) => return failed(prob), + }; + + let neg_b: Vec = prob.b.iter().map(|v| -v).collect(); + let neg_h: Vec = prob.h.iter().map(|v| -v).collect(); + let zeros_m = vec![0.0; m_ineq]; + + // Self-dual start: x = y = 0, s = z = e, τ = κ = 1. A warm start seeds the + // **primal** `x` from a previous (nearby) solution while keeping the cones + // centered at `e` — this lowers the initial primal residual without + // destabilizing the embedding. (The HSDE iteration count is start- + // dependent and is not guaranteed to drop, so this is a primal hook, not a + // promised speedup; the solution is start-independent regardless.) + let mut x = match warm_x { + Some(w) if w.len() == n => w.to_vec(), + _ => vec![0.0; n], + }; + let mut y = vec![0.0; m_eq]; + let mut s = e.clone(); + let mut z = e; + let mut tau = 1.0_f64; + let mut kappa = 1.0_f64; + + let mut rho_x = vec![0.0; n]; + let mut rho_y = vec![0.0; m_eq]; + let mut rho_z = vec![0.0; m_ineq]; + let mut px_vec = vec![0.0; n]; + let mut comp = vec![0.0; m_ineq]; + let mut kkt_vals = kkt.values.clone(); + let mut rhs = vec![0.0; dim]; + + let mut p_x = vec![0.0; n]; + let mut p_y = vec![0.0; m_eq]; + let mut p_z = vec![0.0; m_ineq]; + let mut dx = vec![0.0; n]; + let mut dy = vec![0.0; m_eq]; + let mut dz = vec![0.0; m_ineq]; + let mut ds = vec![0.0; m_ineq]; + let mut dz_aff = vec![0.0; m_ineq]; + let mut ds_aff = vec![0.0; m_ineq]; + + let mut status = QpStatus::IterationLimit; + let mut iters = 0; + + // Best iterate seen, by un-homogenized KKT residual. A feasible conic + // program can stall a hair short of `tol` when an iterate rides deep on a + // non-symmetric cone boundary: the barrier Hessian blows up, the + // fraction-to-boundary step collapses, and the duality gap is amplified by + // a small τ even though primal/dual feasibility are already tight. We + // snapshot the lowest-residual iterate so that, if the iteration later + // breaks down or hits the cap, we can return the point we actually reached + // (and judge its accuracy) rather than whatever degenerate iterate we died + // on. See the reduced-accuracy acceptance after the loop. + let mut best_res = f64::INFINITY; + let mut best: Option<(Vec, Vec, Vec, Vec, f64, f64)> = None; + + for it in 0..opts.max_iter { + iters = it; + + for v in px_vec.iter_mut() { + *v = 0.0; + } + prob.p_mul(&x, &mut px_vec); + let xpx = dot(&x, &px_vec); + + // Homogeneous residuals (identical to the symmetric driver). + for (r, (&ci, &pxi)) in rho_x.iter_mut().zip(prob.c.iter().zip(&px_vec)) { + *r = ci * tau + pxi; + } + prob.at_mul(&y, &mut rho_x); + prob.gt_mul(&z, &mut rho_x); + for (r, &bi) in rho_y.iter_mut().zip(&prob.b) { + *r = -bi * tau; + } + prob.a_mul(&x, &mut rho_y); + for i in 0..m_ineq { + rho_z[i] = s[i] - prob.h[i] * tau; + } + prob.g_mul(&x, &mut rho_z); + let ctx = dot(&prob.c, &x); + let bty = dot(&prob.b, &y); + let htz = dot(&prob.h, &z); + let rho_tau = kappa + ctx + bty + htz + xpx / tau; + + let sz = dot(&s, &z); + let mu = (sz + tau * kappa) / (degree as f64 + 1.0); + + // Convergence (un-homogenized). + let pres = inf_norm(&rho_y).max(inf_norm(&rho_z)) / tau; + let dres = inf_norm(&rho_x) / tau; + let gap = (xpx / tau + ctx + bty + htz).abs() / tau; + let res = pres.max(dres).max(gap); + + // Snapshot the best (lowest-residual) iterate for the reduced-accuracy + // fallback. τ > 0 only — the recovery un-homogenizes by 1/τ. + if res < best_res && tau > 0.0 { + best_res = res; + best = Some((x.clone(), y.clone(), z.clone(), s.clone(), tau, kappa)); + } + + // Debugger checkpoint: top of iteration. Same homogeneous-iterate + // view as the symmetric HSDE driver (blocks x/s/y/z + τ/κ). + if hook.is_some() { + let obj_hat = 0.5 * xpx / (tau * tau) + ctx / tau; + let mut st = ConvexDebugState { + cp: Checkpoint::IterStart, + iter: it as i32, + mu, + pinf: pres, + dinf: dres, + res, + obj: obj_hat, + alpha: (0.0, 0.0), + x: &mut x, + s: &mut s, + y: &mut y, + z: &mut z, + dx: &dx, + dy: &dy, + dz: &dz, + ds: &ds, + tau: Some(&mut tau), + kappa: Some(&mut kappa), + status: None, + }; + if fire(&mut hook, &mut st) == DebugAction::Stop { + break; + } + } + + if pres < opts.tol && dres < opts.tol && gap < opts.tol { + status = QpStatus::Optimal; + break; + } + // "Acceptable level": near the cone boundary the barrier Hessian blows + // up (ψ → 0) and the scaling/factorization can break down a hair short + // of `tol`. If that happens while the KKT residuals are already tiny + // (within `~1e3·tol`), the current iterate *is* essentially optimal — + // accept it rather than reporting a spurious NumericalFailure. + let near_opt = res < 1e3 * opts.tol; + // Infeasibility certificate as τ → 0. + if tau < 1e-2 * kappa.max(1.0) { + if let Some(st) = detect_infeasibility(prob, &x, &y, &z, opts) { + status = st; + break; + } + } + + // Refactor M with the dual-aware scaling. + kkt_vals.copy_from_slice(&kkt.values); + let scalings = match kkt.update_blocks(&cone, &s, &z, opts.reg, &mut kkt_vals) { + Some(sc) => sc, + None => { + status = if near_opt { + QpStatus::Optimal + } else { + QpStatus::NumericalFailure + }; + break; + } + }; + if fact.refactor(&kkt_vals).is_err() { + status = if near_opt { + QpStatus::Optimal + } else { + QpStatus::NumericalFailure + }; + break; + } + + // Constant direction p: M p = (−c, b, h). + build_rhs(&prob.c, &neg_b, &neg_h, &zeros_m, n, m_eq, m_ineq, &mut rhs); + if fact.solve_one(&mut rhs).is_err() { + status = if near_opt { + QpStatus::Optimal + } else { + QpStatus::NumericalFailure + }; + break; + } + split_step(&rhs, n, m_eq, m_ineq, &mut p_x, &mut p_y, &mut p_z); + let two_over_tau = 2.0 / tau; + let gtp = dot(&prob.c, &p_x) + + two_over_tau * dot(&px_vec, &p_x) + + dot(&prob.b, &p_y) + + dot(&prob.h, &p_z); + let denom = gtp - kappa / tau - xpx / (tau * tau); + + // Predictor (σ = 0): rc = −z, comp_term = −M⁻¹·rc = M⁻¹·z. + comp_term(&cone, &scalings, &s, &z, 0.0, &mut comp); + build_rhs(&rho_x, &rho_y, &rho_z, &comp, n, m_eq, m_ineq, &mut rhs); + if fact.solve_one(&mut rhs).is_err() { + status = if near_opt { + QpStatus::Optimal + } else { + QpStatus::NumericalFailure + }; + break; + } + split_step(&rhs, n, m_eq, m_ineq, &mut dx, &mut dy, &mut dz); + let gtq = dot(&prob.c, &dx) + + two_over_tau * dot(&px_vec, &dx) + + dot(&prob.b, &dy) + + dot(&prob.h, &dz); + let dtau_aff = (-rho_tau - gtq + kappa) / denom; + for i in 0..m_ineq { + dz_aff[i] = dz[i] + dtau_aff * p_z[i]; + } + let dkappa_aff = (-tau * kappa - kappa * dtau_aff) / tau; + recover_ds(&cone, &scalings, &comp, &dz_aff, &mut ds_aff); + + // Affine step (closed form on τ/κ + orthant, backtracking on exp). + let cap = ray_step(tau, dtau_aff, opts.tau).min(ray_step(kappa, dkappa_aff, opts.tau)); + let alpha_aff = if m_ineq > 0 { + max_step(&cone, &s, &ds_aff, &z, &dz_aff, opts.tau, cap) + } else { + cap + }; + let mut dot_aff = (tau + alpha_aff * dtau_aff) * (kappa + alpha_aff * dkappa_aff); + for i in 0..m_ineq { + dot_aff += (s[i] + alpha_aff * ds_aff[i]) * (z[i] + alpha_aff * dz_aff[i]); + } + let mu_aff = dot_aff / (degree as f64 + 1.0); + let sigma = if mu > 0.0 { + (mu_aff / mu).powi(3).min(1.0) + } else { + 0.0 + }; + let sigma_mu = sigma * mu; + + // Centering + corrector step. rc = −z + σμ·s̃ − η, with the + // nonsymmetric corrector η (Mehrotra second-order for orthant/τκ, + // third-order for exp). `use_corr = false` drops η (a plain centering + // step) — the safeguard fallback when the corrector overshoots. + // Use the corrector in the bulk iterations only. Near convergence its + // marginal benefit is gone and the finite-difference third-derivative + // perturbation can stall the endgame, so fall to pure centering (the + // provably convergent path) once residuals are within ~1e3·tol. + let near_conv = pres.max(dres).max(gap) < 1e3 * opts.tol; + let mut use_corr = !near_conv; + let mut dtau = 0.0_f64; + let mut dkappa = 0.0_f64; + let mut alpha = 0.0_f64; + let mut solve_failed = false; + loop { + if use_corr { + comp_term_corr( + &cone, &scalings, &s, &z, sigma_mu, &ds_aff, &dz_aff, &mut comp, + ); + } else { + comp_term(&cone, &scalings, &s, &z, sigma_mu, &mut comp); + } + build_rhs(&rho_x, &rho_y, &rho_z, &comp, n, m_eq, m_ineq, &mut rhs); + if fact.solve_one(&mut rhs).is_err() { + solve_failed = true; + break; + } + split_step(&rhs, n, m_eq, m_ineq, &mut dx, &mut dy, &mut dz); + let gtq = dot(&prob.c, &dx) + + two_over_tau * dot(&px_vec, &dx) + + dot(&prob.b, &dy) + + dot(&prob.h, &dz); + // τκ second-order term Δτ_aff·Δκ_aff only when the corrector is on. + let r_tk = if use_corr { + tau * kappa + dtau_aff * dkappa_aff + } else { + tau * kappa + }; + dtau = (-rho_tau - gtq - (sigma_mu - r_tk) / tau) / denom; + for i in 0..n { + dx[i] += dtau * p_x[i]; + } + for i in 0..m_eq { + dy[i] += dtau * p_y[i]; + } + for i in 0..m_ineq { + dz[i] += dtau * p_z[i]; + } + dkappa = (sigma_mu - r_tk - kappa * dtau) / tau; + recover_ds(&cone, &scalings, &comp, &dz, &mut ds); + + let cap = ray_step(tau, dtau, opts.tau).min(ray_step(kappa, dkappa, opts.tau)); + alpha = if m_ineq > 0 { + max_step(&cone, &s, &ds, &z, &dz, opts.tau, cap) + } else { + cap + }; + // If the corrector collapses the step, retry once without it. + if use_corr && alpha < 1e-2 { + use_corr = false; + continue; + } + break; + } + if solve_failed { + status = if near_opt { + QpStatus::Optimal + } else { + QpStatus::NumericalFailure + }; + break; + } + if alpha <= 0.0 { + status = if near_opt { + QpStatus::Optimal + } else { + QpStatus::NumericalFailure + }; + break; + } + + // Debugger checkpoint: combined Newton direction + step length known, + // not yet applied (single symmetric α in both slots). + if hook.is_some() { + let obj_hat = 0.5 * xpx / (tau * tau) + ctx / tau; + let mut st = ConvexDebugState { + cp: Checkpoint::AfterSearchDirection, + iter: it as i32, + mu, + pinf: pres, + dinf: dres, + res, + obj: obj_hat, + alpha: (alpha, alpha), + x: &mut x, + s: &mut s, + y: &mut y, + z: &mut z, + dx: &dx, + dy: &dy, + dz: &dz, + ds: &ds, + tau: Some(&mut tau), + kappa: Some(&mut kappa), + status: None, + }; + if fire(&mut hook, &mut st) == DebugAction::Stop { + break; + } + } + + for i in 0..n { + x[i] += alpha * dx[i]; + } + for i in 0..m_eq { + y[i] += alpha * dy[i]; + } + for i in 0..m_ineq { + s[i] += alpha * ds[i]; + z[i] += alpha * dz[i]; + } + tau += alpha * dtau; + kappa += alpha * dkappa; + + // Debugger checkpoint: the new homogeneous iterate is in place. + if hook.is_some() { + let mut pxn = vec![0.0; n]; + prob.p_mul(&x, &mut pxn); + let obj_hat = 0.5 * dot(&x, &pxn) / (tau * tau) + dot(&prob.c, &x) / tau; + let mut st = ConvexDebugState { + cp: Checkpoint::AfterStep, + iter: it as i32, + mu, + pinf: pres, + dinf: dres, + res, + obj: obj_hat, + alpha: (alpha, alpha), + x: &mut x, + s: &mut s, + y: &mut y, + z: &mut z, + dx: &dx, + dy: &dy, + dz: &dz, + ds: &ds, + tau: Some(&mut tau), + kappa: Some(&mut kappa), + status: None, + }; + if fire(&mut hook, &mut st) == DebugAction::Stop { + break; + } + } + } + + // Reduced-accuracy acceptance. If the driver broke down or hit the cap + // (NumericalFailure / IterationLimit) but the best iterate we reached has a + // KKT residual within √tol (e.g. tol=1e-8 → 1e-4), the problem was + // essentially solved — a near-boundary stall on a non-symmetric cone, not a + // genuine failure. Restore that iterate and report Optimal, mirroring the + // "solved to reduced accuracy" outcome of ECOS/Clarabel/SCS. This never + // fires for infeasible/unbounded problems (their residuals never get this + // small — the embedding drives τ → 0 and the certificate path triggers + // first) and never relaxes the clean convergence test above (still `tol`). + if matches!( + status, + QpStatus::NumericalFailure | QpStatus::IterationLimit + ) { + let reduced_acc = opts.tol.sqrt(); + if best_res < reduced_acc { + if let Some((bx, by, bz, bs, btau, _bkappa)) = best.take() { + // κ is not read downstream (the recovery un-homogenizes by + // 1/τ); restoring x/y/z/s/τ is what the solution recovery and + // the post-mortem hook consume. + x = bx; + y = by; + z = bz; + s = bs; + tau = btau; + status = QpStatus::Optimal; + } + } + } + + let inv = if tau.abs() > 0.0 { 1.0 / tau } else { 1.0 }; + let mut x: Vec = x.iter().map(|v| v * inv).collect(); + let mut y: Vec = y.iter().map(|v| v * inv).collect(); + let mut z: Vec = z.iter().map(|v| v * inv).collect(); + let mut px = vec![0.0; n]; + prob.p_mul(&x, &mut px); + let obj = 0.5 * dot(&x, &px) + dot(&prob.c, &x); + + // Debugger post-mortem at the recovered (un-homogenized) solution. + if hook.is_some() { + let status_str = format!("{status:?}"); + let mut st = ConvexDebugState { + cp: Checkpoint::Terminated, + iter: iters as i32, + mu: 0.0, + pinf: 0.0, + dinf: 0.0, + res: 0.0, + obj, + alpha: (0.0, 0.0), + x: &mut x, + s: &mut s, + y: &mut y, + z: &mut z, + dx: &dx, + dy: &dy, + dz: &dz, + ds: &ds, + tau: Some(&mut tau), + kappa: Some(&mut kappa), + status: Some(&status_str), + }; + let _ = fire(&mut hook, &mut st); + } + + QpSolution { + status, + x, + y, + z, + z_lb: vec![0.0; n], + z_ub: vec![0.0; n], + obj, + iters, + iterates: Vec::new(), + } +} + +/// Solve `min cᵀx s.t. Ax = b, Gx + s = h, s ∈ K` with `K` a product of +/// orthant, second-order, exponential, and power cones, via the non-symmetric +/// HSDE (cold self-dual start). +pub fn solve_conic_hsde_nonsym( + prob: &QpProblem, + specs: &[NsBlock], + opts: &QpOptions, + make_backend: F, +) -> QpSolution +where + F: FnMut() -> Box, +{ + run_nonsym(prob, specs, opts, None, make_backend, None) +} + +/// Debug-enabled [`solve_conic_hsde_nonsym`]: fires the interactive +/// [`DebugHook`] at each interior-point checkpoint of the non-symmetric +/// (exponential / power) HSDE solve. The iterate view matches the +/// symmetric HSDE driver (homogeneous `x/s/y/z` plus `τ/κ`). Apart from +/// the hook the result is identical. +pub fn solve_conic_hsde_nonsym_debug( + prob: &QpProblem, + specs: &[NsBlock], + opts: &QpOptions, + hook: &mut dyn DebugHook, + make_backend: F, +) -> QpSolution +where + F: FnMut() -> Box, +{ + run_nonsym(prob, specs, opts, None, make_backend, Some(hook)) +} + +/// Warm-started [`solve_conic_hsde_nonsym`]: seed the primal `x` from `warm_x` +/// (a previous, nearby solution) while keeping the cones centered. The +/// solution is start-independent; warm-starting lowers the initial primal +/// residual but — as for any HSDE embedding — is not guaranteed to reduce the +/// iteration count. `warm_x` is ignored if its length ≠ `prob.n`. +pub fn solve_conic_hsde_nonsym_warm( + prob: &QpProblem, + specs: &[NsBlock], + warm_x: &[f64], + opts: &QpOptions, + make_backend: F, +) -> QpSolution +where + F: FnMut() -> Box, +{ + run_nonsym(prob, specs, opts, Some(warm_x), make_backend, None) +} + +fn failed(prob: &QpProblem) -> QpSolution { + QpSolution { + status: QpStatus::NumericalFailure, + x: vec![0.0; prob.n], + y: vec![0.0; prob.m_eq()], + z: vec![1.0; prob.m_ineq()], + z_lb: vec![0.0; prob.n], + z_ub: vec![0.0; prob.n], + obj: 0.0, + iters: 0, + iterates: Vec::new(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::qp::Triplet; + use pounce_feral::FeralSolverInterface; + + fn backend() -> Box { + Box::new(FeralSolverInterface::new()) + } + + fn opts() -> QpOptions { + QpOptions { + max_iter: 200, + ..QpOptions::default() + } + } + + /// An exponential cone is always 3 rows. Declaring it over a `G` with + /// only 2 inequality rows is a caller error: the driver must fail + /// cleanly (`NumericalFailure`) instead of indexing past the 2-row + /// slack and panicking — the guard in [`crate::ipm::solve_socp_ipm`]. + #[test] + fn mismatched_cone_dims_fail_cleanly() { + use crate::cones::ConeSpec; + use crate::ipm::solve_socp_ipm; + let prob = QpProblem { + n: 2, + p_lower: vec![], + c: vec![1.0, 0.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, -1.0), Triplet::new(1, 1, -1.0)], + h: vec![0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let sol = solve_socp_ipm(&prob, &[ConeSpec::Exponential], &opts(), backend); + assert_eq!(sol.status, QpStatus::NumericalFailure); + } + + /// `min z s.t. x = 1, y = 1, (x,y,z) ∈ K_exp`. The cone forces + /// `z ≥ y·exp(x/y) = e`, so the optimum is `z = e` at `x = y = 1`. + #[test] + fn exp_epigraph_known_optimum() { + let e = std::f64::consts::E; + // Variables v = (x, y, z); slack s = v ∈ K_exp via G = −I, h = 0. + let prob = QpProblem { + n: 3, + p_lower: vec![], + c: vec![0.0, 0.0, 1.0], + a: vec![ + Triplet::new(0, 0, 1.0), // x = 1 + Triplet::new(1, 1, 1.0), // y = 1 + ], + b: vec![1.0, 1.0], + g: vec![ + Triplet::new(0, 0, -1.0), + Triplet::new(1, 1, -1.0), + Triplet::new(2, 2, -1.0), + ], + h: vec![0.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let sol = solve_conic_hsde_nonsym(&prob, &[NsBlock::exp()], &opts(), backend); + assert_eq!( + sol.status, + QpStatus::Optimal, + "not optimal: {:?}", + sol.status + ); + assert!((sol.x[0] - 1.0).abs() < 1e-5, "x = {}", sol.x[0]); + assert!((sol.x[1] - 1.0).abs() < 1e-5, "y = {}", sol.x[1]); + assert!((sol.x[2] - e).abs() < 1e-5, "z = {} vs e = {e}", sol.x[2]); + assert!((sol.obj - e).abs() < 1e-5, "obj = {} vs e", sol.obj); + } + + /// `log-sum-exp` epigraph: `min t s.t. t ≥ log(e^{x₁} + e^{x₂})` with + /// `x₁ = x₂ = 0`, so the optimum is `t = log 2`. Modeled with two exp + /// cones `(xᵢ − t, 1, uᵢ) ∈ K_exp` (⇒ `uᵢ ≥ e^{xᵢ−t}`) and the orthant + /// row `u₁ + u₂ ≤ 1`. This exercises **multiple exp blocks + an orthant + /// block** in one product cone — the mixed-cone path. + #[test] + fn log_sum_exp_known_optimum() { + // v = (t, u1, u2). Rows: exp1 (0..3), exp2 (3..6), orthant (6). + let prob = QpProblem { + n: 3, + p_lower: vec![], + c: vec![1.0, 0.0, 0.0], // min t + a: vec![], + b: vec![], + g: vec![ + // exp1 slack = (x1 − t, 1, u1) = (−t, 1, u1) + Triplet::new(0, 0, 1.0), // s0 = −t + Triplet::new(2, 1, -1.0), // s2 = u1 + // exp2 slack = (−t, 1, u2) + Triplet::new(3, 0, 1.0), // s3 = −t + Triplet::new(5, 2, -1.0), // s5 = u2 + // orthant: s6 = 1 − u1 − u2 + Triplet::new(6, 1, 1.0), + Triplet::new(6, 2, 1.0), + ], + // middle exp components pinned to 1 via h (G row = 0). + h: vec![0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], + lb: vec![], + ub: vec![], + }; + let specs = [NsBlock::exp(), NsBlock::exp(), NsBlock::Orthant(1)]; + let sol = solve_conic_hsde_nonsym(&prob, &specs, &opts(), backend); + assert_eq!( + sol.status, + QpStatus::Optimal, + "not optimal: {:?}", + sol.status + ); + let want = 2.0_f64.ln(); + assert!( + (sol.x[0] - want).abs() < 1e-5, + "t = {} vs log2 = {want}", + sol.x[0] + ); + // uᵢ = e^{−t} = 1/2 at the optimum. + assert!((sol.x[1] - 0.5).abs() < 1e-4, "u1 = {}", sol.x[1]); + assert!((sol.x[2] - 0.5).abs() < 1e-4, "u2 = {}", sol.x[2]); + } + + /// A tiny **geometric program**: `min x + 1/x` over `x > 0`, whose optimum + /// is `2` at `x = 1`. With `x = e^u` it becomes `min e^u + e^{−u}`, modeled + /// as `min t₁ + t₂` with `(u, 1, t₁) ∈ K_exp` (`t₁ ≥ e^u`) and + /// `(−u, 1, t₂) ∈ K_exp` (`t₂ ≥ e^{−u}`). Optimum `u = 0`, `t₁ = t₂ = 1`. + #[test] + fn geometric_program_known_optimum() { + // v = (u, t1, t2). Rows: exp1 (0..3), exp2 (3..6). + let prob = QpProblem { + n: 3, + p_lower: vec![], + c: vec![0.0, 1.0, 1.0], // min t1 + t2 + a: vec![], + b: vec![], + g: vec![ + // exp1 slack = (u, 1, t1) + Triplet::new(0, 0, -1.0), // s0 = u + Triplet::new(2, 1, -1.0), // s2 = t1 + // exp2 slack = (−u, 1, t2) + Triplet::new(3, 0, 1.0), // s3 = −u + Triplet::new(5, 2, -1.0), // s5 = t2 + ], + h: vec![0.0, 1.0, 0.0, 0.0, 1.0, 0.0], + lb: vec![], + ub: vec![], + }; + let specs = [NsBlock::exp(), NsBlock::exp()]; + let sol = solve_conic_hsde_nonsym(&prob, &specs, &opts(), backend); + assert_eq!( + sol.status, + QpStatus::Optimal, + "not optimal: {:?}", + sol.status + ); + assert!((sol.x[0]).abs() < 1e-4, "u = {} vs 0", sol.x[0]); + assert!((sol.obj - 2.0).abs() < 1e-5, "obj = {} vs 2", sol.obj); + } + + /// The same geometric program routed through the **public** entry + /// `solve_socp_ipm` with `ConeSpec::Exponential` — confirms the routing + /// (exp specs → non-symmetric driver) is wired end-to-end. + #[test] + fn routes_exponential_through_public_entry() { + use crate::cones::ConeSpec; + use crate::ipm::solve_socp_ipm; + let prob = QpProblem { + n: 3, + p_lower: vec![], + c: vec![0.0, 1.0, 1.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, -1.0), + Triplet::new(2, 1, -1.0), + Triplet::new(3, 0, 1.0), + Triplet::new(5, 2, -1.0), + ], + h: vec![0.0, 1.0, 0.0, 0.0, 1.0, 0.0], + lb: vec![], + ub: vec![], + }; + let specs = [ConeSpec::Exponential, ConeSpec::Exponential]; + let sol = solve_socp_ipm(&prob, &specs, &opts(), backend); + assert_eq!( + sol.status, + QpStatus::Optimal, + "not optimal: {:?}", + sol.status + ); + assert!((sol.obj - 2.0).abs() < 1e-5, "obj = {} vs 2", sol.obj); + } + + /// Power cone known optimum: `max x s.t. (x, 2, 0.5) ∈ K_α`, i.e. + /// `x ≤ 2^α · 0.5^{1−α}`. For α = 0.5 the bound is `√(2·0.5) = 1`. + #[test] + fn power_cone_known_optimum() { + // v = (x, y, z); slack s = v ∈ K_α via G = −I, h = 0; y = 2, z = 0.5. + let prob = QpProblem { + n: 3, + p_lower: vec![], + c: vec![-1.0, 0.0, 0.0], // max x + a: vec![Triplet::new(0, 1, 1.0), Triplet::new(1, 2, 1.0)], + b: vec![2.0, 0.5], + g: vec![ + Triplet::new(0, 0, -1.0), + Triplet::new(1, 1, -1.0), + Triplet::new(2, 2, -1.0), + ], + h: vec![0.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + for alpha in [0.5, 0.3, 0.75] { + let sol = solve_conic_hsde_nonsym(&prob, &[NsBlock::power(alpha)], &opts(), backend); + assert_eq!(sol.status, QpStatus::Optimal, "α={alpha}: {:?}", sol.status); + let want = 2.0_f64.powf(alpha) * 0.5_f64.powf(1.0 - alpha); + assert!( + (sol.x[0] - want).abs() < 1e-5, + "α={alpha}: x = {} vs {want}", + sol.x[0] + ); + } + } + + /// A **second-order cone mixed with an exponential cone** in one problem. + /// `min t + z s.t. (t, 3, 4) ∈ SOC(3)` (⇒ `t ≥ ‖(3,4)‖ = 5`) and + /// `(1, 1, z) ∈ K_exp` (⇒ `z ≥ e`). Optimum `t = 5`, `z = e`, + /// `obj = 5 + e`. Exercises the self-scaled SOC path and the dual-aware + /// exp path together. + #[test] + fn soc_mixed_with_exp() { + let e = std::f64::consts::E; + // v = (t, z). Rows: SOC (0..3) = (t, 3, 4); exp (3..6) = (1, 1, z). + let prob = QpProblem { + n: 2, + p_lower: vec![], + c: vec![1.0, 1.0], // min t + z + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, -1.0), // SOC s0 = t + Triplet::new(5, 1, -1.0), // exp s5 = z + ], + h: vec![0.0, 3.0, 4.0, 1.0, 1.0, 0.0], + lb: vec![], + ub: vec![], + }; + let specs = [NsBlock::SecondOrder(3), NsBlock::exp()]; + let sol = solve_conic_hsde_nonsym(&prob, &specs, &opts(), backend); + assert_eq!( + sol.status, + QpStatus::Optimal, + "not optimal: {:?}", + sol.status + ); + assert!((sol.x[0] - 5.0).abs() < 1e-5, "t = {} vs 5", sol.x[0]); + assert!((sol.x[1] - e).abs() < 1e-5, "z = {} vs e", sol.x[1]); + assert!( + (sol.obj - (5.0 + e)).abs() < 1e-5, + "obj = {} vs 5+e", + sol.obj + ); + } + + /// Warm-starting is **start-independent**: seeding the primal from the + /// optimum, or from a deliberately wrong point, converges to the same + /// solution. (We verify correctness — the property the warm path must + /// preserve — not an iteration-count reduction, which the HSDE embedding + /// does not guarantee.) + #[test] + fn warm_start_is_start_independent() { + // Geometric program min e^u + e^{−u} = 2 (u, t1, t2). + let prob = QpProblem { + n: 3, + p_lower: vec![], + c: vec![0.0, 1.0, 1.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, -1.0), + Triplet::new(2, 1, -1.0), + Triplet::new(3, 0, 1.0), + Triplet::new(5, 2, -1.0), + ], + h: vec![0.0, 1.0, 0.0, 0.0, 1.0, 0.0], + lb: vec![], + ub: vec![], + }; + let specs = [NsBlock::exp(), NsBlock::exp()]; + let cold = solve_conic_hsde_nonsym(&prob, &specs, &opts(), backend); + assert_eq!(cold.status, QpStatus::Optimal); + assert!((cold.obj - 2.0).abs() < 1e-5); + + // The objective is the start-independent invariant (the GP minimum is + // flat in `u`, so the coordinate itself is sensitive — the objective + // is what must agree). Warm from the optimum, a bad point, and a + // length-mismatched (ignored) vector all reach the same optimum. + for warm in [cold.x.as_slice(), &[50.0, -30.0, 9.0], &[1.0]] { + let sol = solve_conic_hsde_nonsym_warm(&prob, &specs, warm, &opts(), backend); + assert_eq!(sol.status, QpStatus::Optimal, "warm {warm:?}"); + assert!( + (sol.obj - cold.obj).abs() < 1e-5, + "warm {warm:?} obj {} vs {}", + sol.obj, + cold.obj + ); + } + } + + /// SOC routed through the non-symmetric driver alone matches the known + /// norm-minimization optimum (validates the SOC path in isolation). + /// `min t s.t. (t, x−2, x+1) ∈ SOC` → `x = ?`; simplest: `(t, 3, 4)` → 5. + #[test] + fn soc_only_through_nonsym_driver() { + let prob = QpProblem { + n: 1, + p_lower: vec![], + c: vec![1.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, -1.0)], + h: vec![0.0, 3.0, 4.0], + lb: vec![], + ub: vec![], + }; + let sol = solve_conic_hsde_nonsym(&prob, &[NsBlock::SecondOrder(3)], &opts(), backend); + assert_eq!(sol.status, QpStatus::Optimal, "{:?}", sol.status); + assert!((sol.x[0] - 5.0).abs() < 1e-5, "t = {} vs 5", sol.x[0]); + } + + /// Power cone routed through the **public** entry `solve_socp_ipm` with + /// `ConeSpec::Power(α)`. + #[test] + fn routes_power_through_public_entry() { + use crate::cones::ConeSpec; + use crate::ipm::solve_socp_ipm; + let prob = QpProblem { + n: 3, + p_lower: vec![], + c: vec![-1.0, 0.0, 0.0], + a: vec![Triplet::new(0, 1, 1.0), Triplet::new(1, 2, 1.0)], + b: vec![2.0, 0.5], + g: vec![ + Triplet::new(0, 0, -1.0), + Triplet::new(1, 1, -1.0), + Triplet::new(2, 2, -1.0), + ], + h: vec![0.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let sol = solve_socp_ipm(&prob, &[ConeSpec::Power(0.5)], &opts(), backend); + assert_eq!(sol.status, QpStatus::Optimal, "{:?}", sol.status); + assert!((sol.x[0] - 1.0).abs() < 1e-5, "x = {} vs 1", sol.x[0]); + } +} diff --git a/crates/pounce-convex/src/ipm.rs b/crates/pounce-convex/src/ipm.rs new file mode 100644 index 00000000..12e4967d --- /dev/null +++ b/crates/pounce-convex/src/ipm.rs @@ -0,0 +1,2196 @@ +//! Primal-dual interior-point driver for convex QP. +//! +//! Infeasible-start primal-dual path-following with **Mehrotra +//! predictor-corrector** (adaptive centering σ = (μ_aff/μ)³ plus the +//! second-order `Δs∘Δz` term) and fraction-to-boundary step control. +//! Predictor and corrector share one factorization per iteration. The +//! homogeneous self-dual embedding (for clean infeasibility detection +//! and a self-starting iterate) is the remaining Phase 3 piece and slots +//! into this same scaffolding. +//! +//! On bound/inequality-constrained convex QPs this reaches the solution +//! in materially fewer interior-point iterations than routing the same +//! problem through the NLP filter-IPM — see +//! `crates/pounce-cli/tests/qp_vs_nlp_iterations.rs` (≈41% fewer at +//! n=50), the check behind the plan's 30–50% claim. +//! +//! ## Method +//! +//! For the standard-form QP (see [`crate::qp`]) with slacks `s ≥ 0` on +//! the inequalities (`Gx + s = h`) and multipliers `y` (equality), +//! `z ≥ 0` (inequality), the KKT conditions are +//! +//! ```text +//! P x + c + Aᵀ y + Gᵀ z = 0 (stationarity, r_d) +//! A x − b = 0 (r_p) +//! G x + s − h = 0 (r_g) +//! s ∘ z = 0 (complementarity) +//! ``` +//! +//! Each iteration solves the symmetric indefinite Newton system +//! +//! ```text +//! ⎡ P+δI Aᵀ Gᵀ ⎤ ⎡dx⎤ ⎡ −r_d ⎤ +//! ⎢ A −δI 0 ⎥ ⎢dy⎥ = ⎢ −r_p ⎥ +//! ⎣ G 0 −(S⊘Z)−δI ⎦ ⎣dz⎦ ⎣ −r_g + r_c ⊘ z ⎦ +//! ``` +//! +//! (with `ds` recovered from `dz`) through the shared +//! [`pounce_linsol::Factorization`]. The tiny static regularization `δ` +//! makes the system quasi-definite so the LDLᵀ has a well-defined +//! inertia; because convergence is tested on the *unregularized* +//! residuals, the fixed point is the true QP solution — `δ` only +//! perturbs the search direction. +//! +//! The cone-specific pieces (`μ`, the `S⊘Z` scaling diagonal, the +//! complementarity residual, `ds` recovery, and the fraction-to-boundary +//! step) all route through the [`Cone`](crate::cones::Cone) trait so +//! that Phases 4–6 extend rather than rewrite this driver. + +use crate::cones::{CompositeCone, Cone, ConeBlock, ConeSpec}; +use crate::debug::{fire, ConvexDebugState}; +use crate::qp::{QpIterate, QpProblem, QpSolution, QpStatus}; +use pounce_common::debug::{Checkpoint, DebugAction, DebugHook}; +use pounce_common::types::{Index, Number}; +use pounce_linsol::{Factorization, SparseSymLinearSolverInterface}; +use std::collections::BTreeMap; + +/// Options for the QP interior-point solve. +#[derive(Debug, Clone, Copy)] +pub struct QpOptions { + /// Convergence tolerance on the max KKT residual and duality measure. + pub tol: f64, + /// Maximum iterations. + pub max_iter: usize, + /// Fraction-to-boundary parameter τ ∈ (0, 1). (The centering + /// parameter σ is computed adaptively by the Mehrotra predictor; + /// it is not an option.) + pub tau: f64, + /// Static KKT regularization δ. Added on the (block) diagonal to make + /// the reduced KKT system quasi-definite, so the LDLᵀ has a stable, + /// well-defined inertia. Because convergence is tested on the + /// *unregularized* residuals, δ only perturbs the search direction — but + /// with a full Newton step it also floors the achievable primal residual + /// at `δ·‖dy‖`. On badly-scaled NETLIB LPs the equality multipliers grow + /// large (`adlittle`: `‖dy‖ ≈ 4e8`), so a too-large δ freezes `inf_pr` + /// above the tolerance and the IPM stalls to its iteration cap. The + /// default is sized small enough to clear that floor on such instances + /// while still keeping the factorization quasi-definite (see [`Default`]). + pub reg: f64, + /// Relative tolerance for accepting an infeasibility/unboundedness + /// certificate. A certificate is declared only when its defining + /// inequalities hold to this tolerance *relative to the certificate's + /// own magnitude*, so the status is always backed by a verified + /// proof — there are no false positives, only (rarely) an + /// `IterationLimit` fallback when no certificate is verifiable. + pub infeas_tol: f64, + /// Use the homogeneous self-dual embedding driver ([`crate::hsde`]) rather + /// than the infeasible-start primal–dual method. HSDE self-starts, produces + /// infeasibility/unboundedness certificates natively, and stays stable on + /// badly-conditioned problems where the infeasible-start method diverges + /// (its duality measure blows up — e.g. NETLIB `nl`, where the direct path + /// runs `mu` to ~1e11 and trips a spurious `NumericalFailure`, while HSDE + /// converges). It is also the substrate for the non-symmetric cones + /// (exp/power). This matches Clarabel/ECOS/SCS, which embed precisely for + /// that robustness. **Default `true`.** + /// + /// HSDE does not (yet) exploit warm starts or reuse an external + /// factorization, so the advanced performance paths — [`QpWarmStart`] and + /// the build-once [`QpFactorization`] handle — set this `false` to opt back + /// into the direct solver, which they require. Their callers are doing + /// *nearby reoptimization* (a known-solvable neighborhood), where the + /// direct path's fragility is not a concern. + pub use_hsde: bool, + /// Collect a per-iteration convergence trace into + /// [`crate::QpSolution::iterates`]. Off by default so a normal solve has + /// no recording overhead; turn on when a solve report or benchmark + /// harness wants the per-iteration history. Default `false`. + pub collect_iterates: bool, + /// Ruiz-equilibrate the problem data before solving (see + /// [`crate::equilibrate`]). A conditioning aid for the **direct** + /// infeasible-start IPM, which factorizes the raw KKT system and is fragile + /// on badly-scaled data. It is applied only when [`Self::use_hsde`] is + /// `false` (the direct one-shot path and the warm-start path); the default + /// HSDE driver skips it, conditioning the system internally through its + /// per-cone NT scaling. Applied only on the LP/QP orthant entry points + /// ([`solve_qp_ipm`] / [`solve_qp_ipm_warm`]), where per-row scaling + /// preserves the cone; the SOCP/conic driver never equilibrates, since + /// per-row scaling is unsound for non-orthant cones. Default `true`. + pub equilibrate: bool, +} + +impl Default for QpOptions { + fn default() -> Self { + QpOptions { + tol: 1e-8, + max_iter: 200, + tau: 0.95, + // δ = 1e-10: small enough that the primal-residual floor δ·‖dy‖ + // clears `tol` even when the equality duals are large (badly + // scaled NETLIB LPs such as `adlittle`, which stalls at the cap + // with δ = 1e-8 but converges in ~57 iters here), yet still + // strictly positive so the reduced KKT stays quasi-definite for a + // stable LDLᵀ inertia. The whole 1e-9‥1e-11 band converges the + // LP/QP benchmark suites; 1e-10 is centered in it. + reg: 1e-10, + infeas_tol: 1e-7, + use_hsde: true, + collect_iterates: false, + equilibrate: true, + } + } +} + +/// Solve a convex QP, honoring any per-variable bounds (`lb`/`ub`). +/// +/// Variable bounds are a first-class part of [`QpProblem`] so presolve +/// can reason about boxes; the solver itself expands the *finite* bounds +/// into internal inequality rows, runs the bounds-agnostic Mehrotra core +/// ([`solve_qp_core`]), and splits the returned inequality multipliers +/// back into the original `z` and the bound multipliers `z_lb`/`z_ub`. +/// The iteration math is unchanged by the presence of bounds. +pub fn solve_qp_ipm(prob: &QpProblem, opts: &QpOptions, make_backend: F) -> QpSolution +where + F: FnMut() -> Box, +{ + // Ruiz-equilibrate the data first — but only for the *direct* driver. + // Solving the scaled problem and unscaling the result keeps the direct + // infeasible-start IPM well-conditioned without changing the recovered KKT + // point. The HSDE driver does NOT need (and must not get) this: the + // self-dual embedding conditions the system internally through its per-cone + // NT scaling — exactly as Clarabel/ECOS do, neither of which Ruiz-pre-scales + // — so it solves even badly-scaled data (NETLIB `nl`, ‖c‖~1e6) directly. + // Layering Ruiz on top is not only redundant for HSDE, it composes badly + // with presolve: presolve's reductions plus Ruiz's σ=1/‖c‖ cost scaling + // over-condition the reduced KKT system and trip the factorization near the + // boundary (a `NumericalFailure` that neither transform produces alone). + // See `crate::equilibrate`. + if opts.equilibrate && !opts.use_hsde { + let (scaled, scaling) = crate::equilibrate::equilibrate(prob); + let inner = QpOptions { + equilibrate: false, + ..*opts + }; + let mut sol = solve_qp_ipm_unscaled(&scaled, &inner, make_backend); + scaling.unscale_solution(prob, &mut sol); + return sol; + } + let mut make_backend = make_backend; + let sol = solve_qp_ipm_unscaled(prob, opts, &mut make_backend); + // HSDE robustness fallback. The self-dual driver normally conditions itself + // through its per-cone NT scaling and so deliberately skips Ruiz pre-scaling + // (see the comment above). But on a *severely* ill-scaled system — e.g. the + // spatial-B&B relaxation LPs whose McCormick/division columns and ln/√ + // envelope tangents span `|G| ∈ [1e-7, 1e6]` — the embedded KKT + // factorization can still break down (`NumericalFailure`), discarding an + // otherwise-correct iterate and leaving the B&B node with no lower bound. + // When that happens, retry once *with* Ruiz equilibration. This is sound and + // does not contradict the "Ruiz composes badly with HSDE" note: we only get + // here because the un-equilibrated solve already failed, so there is nothing + // left to regress — equilibration can only recover a usable solve or fail + // the same way (in which case we keep the original result). + if opts.use_hsde && opts.equilibrate && sol.status == QpStatus::NumericalFailure { + let (scaled, scaling) = crate::equilibrate::equilibrate(prob); + let inner = QpOptions { + equilibrate: false, + ..*opts + }; + let mut retry = solve_qp_ipm_unscaled(&scaled, &inner, &mut make_backend); + scaling.unscale_solution(prob, &mut retry); + if retry.status != QpStatus::NumericalFailure { + return retry; + } + } + sol +} + +/// The bounds-aware orthant solve without equilibration (the historical +/// [`solve_qp_ipm`] body). Factored out so [`solve_qp_ipm`] can wrap it with +/// Ruiz scaling. +fn solve_qp_ipm_unscaled(prob: &QpProblem, opts: &QpOptions, make_backend: F) -> QpSolution +where + F: FnMut() -> Box, +{ + if !prob.has_bounds() { + let cone = CompositeCone::single_nonneg(prob.m_ineq()); + return solve_qp_core(prob, &cone, opts, None, make_backend); + } + let (expanded, bound_rows) = expand_bounds(prob); + let cone = CompositeCone::single_nonneg(expanded.m_ineq()); + let sol = solve_qp_core(&expanded, &cone, opts, None, make_backend); + split_bound_duals(prob, &bound_rows, sol) +} + +/// Solve a convex LP / QP with an interactive [`DebugHook`] attached: the +/// hook is fired at each interior-point checkpoint (iteration start, after +/// the Newton step, after the step is applied, and at termination) so a +/// debugger can step, inspect, and break on the solve. +/// +/// Targets the direct (non-HSDE) convex IPM, so the debugged `x` block is +/// the user's variables (finite bounds are expanded into a trailing +/// nonnegative block, as in [`solve_qp_ipm`], and surface in the `s`/`z` +/// blocks). Apart from the hook the result is identical to +/// [`solve_qp_ipm`]. +pub fn solve_qp_ipm_debug( + prob: &QpProblem, + opts: &QpOptions, + hook: &mut dyn DebugHook, + mut make_backend: F, +) -> QpSolution +where + F: FnMut() -> Box, +{ + // Build the factorization and run the core loop directly with the hook + // (mirrors `solve_qp_core`'s non-HSDE path; `solve_qp_core` itself can't + // carry the borrowed hook through its generic plumbing). When the HSDE + // driver is selected, debug it instead — it self-starts and builds its + // own factorization. + let run = |p: &QpProblem, cone: &CompositeCone, mk: &mut F, hook: &mut dyn DebugHook| { + if opts.use_hsde { + return crate::hsde::solve_conic_hsde(p, cone, opts, mk, Some(hook)); + } + match build_factorization(p, cone, opts, mk) { + Ok((kkt, mut fact)) => run_ipm(p, cone, opts, &kkt, &mut fact, None, Some(hook)), + Err(()) => failed_solution( + p, + vec![0.0; p.n], + vec![0.0; p.m_eq()], + vec![1.0; p.m_ineq()], + 0, + ), + } + }; + if !prob.has_bounds() { + let cone = CompositeCone::single_nonneg(prob.m_ineq()); + return run(prob, &cone, &mut make_backend, hook); + } + let (expanded, bound_rows) = expand_bounds(prob); + let cone = CompositeCone::single_nonneg(expanded.m_ineq()); + let sol = run(&expanded, &cone, &mut make_backend, hook); + split_bound_duals(prob, &bound_rows, sol) +} + +/// Solve a convex QP starting from a warm point (typically a previous +/// solution of a nearby problem). See [`QpWarmStart`] for the centering +/// strategy and when warm starting helps. +/// +/// Identical to [`solve_qp_ipm`] except the interior-point iteration is +/// seeded from `warm` instead of the cold default. The *solution* is +/// independent of the start (the IPM converges to the same KKT point); a +/// good warm start only reduces the iteration count. +pub fn solve_qp_ipm_warm( + prob: &QpProblem, + opts: &QpOptions, + warm: &QpWarmStart, + make_backend: F, +) -> QpSolution +where + F: FnMut() -> Box, +{ + // Warm-starting requires the direct infeasible-start solver: HSDE + // self-starts and ignores a warm point (see `QpOptions::use_hsde`). So this + // path always runs the direct method, independent of the (HSDE) default — + // otherwise the warm start would silently do nothing. A caller that + // warm-starts is doing nearby reoptimization (a known-solvable + // neighborhood), where the direct path's fragility is not a concern. + let direct = QpOptions { + use_hsde: false, + equilibrate: false, + ..*opts + }; + // Equilibrate (default on) just as the cold path does, mapping the + // warm-start point into the scaled coordinates so the warm benefit is + // preserved and the two paths run on identically-conditioned data. + if opts.equilibrate { + let (scaled, scaling) = crate::equilibrate::equilibrate(prob); + let scaled_warm = scaling.scale_warm_start(warm); + let mut sol = solve_qp_ipm_warm(&scaled, &direct, &scaled_warm, make_backend); + scaling.unscale_solution(prob, &mut sol); + return sol; + } + if !prob.has_bounds() { + let w = WarmStart { + x: warm.x.clone(), + y: warm.y.clone(), + z: warm.z.clone(), + }; + let cone = CompositeCone::single_nonneg(prob.m_ineq()); + return solve_qp_core(prob, &cone, &direct, Some(&w), make_backend); + } + let (expanded, bound_rows) = expand_bounds(prob); + let w = WarmStart { + x: warm.x.clone(), + y: warm.y.clone(), + z: merge_bound_duals(prob, &bound_rows, warm), + }; + let cone = CompositeCone::single_nonneg(expanded.m_ineq()); + let sol = solve_qp_core(&expanded, &cone, &direct, Some(&w), make_backend); + split_bound_duals(prob, &bound_rows, sol) +} + +/// Solve a standard-form **SOCP** (or mixed LP/QP + second-order cones): +/// `min ½xᵀPx+cᵀx s.t. Ax=b, Gx ⪯_K h`, where the inequality block `Gx ≤ h` +/// is partitioned into the cones `K` described by `cones` (in row order; +/// each `s = h − Gx` block must lie in its cone). `cones` must cover the +/// `m_ineq` rows. Variable bounds (`lb`/`ub`) are appended as a trailing +/// nonnegative block. +pub fn solve_socp_ipm( + prob: &QpProblem, + cones: &[ConeSpec], + opts: &QpOptions, + make_backend: F, +) -> QpSolution +where + F: FnMut() -> Box, +{ + // The cones must partition the inequality rows exactly; otherwise the + // cone vectors and the `m_ineq` slack disagree and the driver would read + // out of bounds (an exp/power cone is always 3 rows). Fail cleanly here. + if !cone_dims_cover(cones, prob.m_ineq()) { + return failed_solution( + prob, + vec![0.0; prob.n], + vec![0.0; prob.m_eq()], + vec![0.0; prob.m_ineq()], + 0, + ); + } + // Non-symmetric cones (exponential / power) route to the dedicated HSDE + // driver; self-scaled cones (orthant / SOC / PSD) stay on the symmetric + // path below. Mixing the two families in one problem is not supported. + let has_nonsym = cones + .iter() + .any(|c| matches!(c, ConeSpec::Exponential | ConeSpec::Power(_))); + let has_psd = cones.iter().any(|c| matches!(c, ConeSpec::Psd(_))); + if has_nonsym && has_psd { + return failed_solution( + prob, + vec![0.0; prob.n], + vec![0.0; prob.m_eq()], + vec![0.0; prob.m_ineq()], + 0, + ); + } + if has_nonsym { + return solve_nonsym(prob, cones, opts, make_backend, None); + } + // Sparsity: split any block-diagonal PSD cone into independent smaller + // cones (one dense O(m²) KKT block → several small ones, exploited by the + // sparse factorization). The transform is solution-equivalent; the dual + // `z` is scattered back to the original row layout afterward. + if has_psd { + // First the cheap block-diagonal split (disjoint blocks → no new + // variables); then chordal range-space decomposition of any still + // connected-but-sparse PSD cone (introduces clique blocks + overlap + // consistency equalities). Reconstruct the dual through both layers. + let (prob1, cones1, row_map) = decompose_psd(prob, cones); + let (prob2, cones2, recon) = chordal_decompose(&prob1, &cones1); + let sol2 = solve_socp_symmetric(&prob2, &cones2, opts, make_backend); + let sol1 = chordal_reconstruct(sol2, &recon, &prob1); + return remap_decomposed_z(sol1, &row_map, prob.m_ineq()); + } + solve_socp_symmetric(prob, cones, opts, make_backend) +} + +/// Debug-enabled [`solve_socp_ipm`]: fires the interactive [`DebugHook`] at +/// each interior-point checkpoint. Exponential / power cones run on the +/// non-symmetric HSDE driver; all other cones (orthant / SOC / PSD) run on +/// the direct symmetric IPM. Under the debugger a PSD cone is solved +/// *directly* (no chordal decomposition) so the debugged `x`/`s`/`y`/`z` +/// blocks correspond to the user's problem; the solution is unchanged. +pub fn solve_socp_ipm_debug( + prob: &QpProblem, + cones: &[ConeSpec], + opts: &QpOptions, + hook: &mut dyn DebugHook, + mut make_backend: F, +) -> QpSolution +where + F: FnMut() -> Box, +{ + if !cone_dims_cover(cones, prob.m_ineq()) { + return failed_solution( + prob, + vec![0.0; prob.n], + vec![0.0; prob.m_eq()], + vec![0.0; prob.m_ineq()], + 0, + ); + } + let has_nonsym = cones + .iter() + .any(|c| matches!(c, ConeSpec::Exponential | ConeSpec::Power(_))); + let has_psd = cones.iter().any(|c| matches!(c, ConeSpec::Psd(_))); + if has_nonsym && has_psd { + return failed_solution( + prob, + vec![0.0; prob.n], + vec![0.0; prob.m_eq()], + vec![0.0; prob.m_ineq()], + 0, + ); + } + if has_nonsym { + return solve_nonsym(prob, cones, opts, make_backend, Some(hook)); + } + // Symmetric cones: debug the direct IPM (build the factorization and run + // the core loop with the hook), bound-expanded as in + // `solve_socp_symmetric`. PSD is solved directly here (no decomposition). + let run = |p: &QpProblem, cone: &CompositeCone, mk: &mut F, hook: &mut dyn DebugHook| { + match build_factorization(p, cone, opts, mk) { + Ok((kkt, mut fact)) => run_ipm(p, cone, opts, &kkt, &mut fact, None, Some(hook)), + Err(()) => failed_solution( + p, + vec![0.0; p.n], + vec![0.0; p.m_eq()], + vec![1.0; p.m_ineq()], + 0, + ), + } + }; + if !prob.has_bounds() { + let cone = CompositeCone::from_specs(cones); + return run(prob, &cone, &mut make_backend, hook); + } + let (expanded, bound_rows) = expand_bounds(prob); + let mut specs = cones.to_vec(); + specs.push(ConeSpec::Nonneg(bound_rows.len())); + let cone = CompositeCone::from_specs(&specs); + let sol = run(&expanded, &cone, &mut make_backend, hook); + split_bound_duals(prob, &bound_rows, sol) +} + +/// The symmetric-cone solve (orthant / SOC / PSD): expand finite bounds into +/// a trailing orthant block, run the Mehrotra core, and split the bound +/// duals back out. Shared by [`solve_socp_ipm`] and the PSD-decomposed path. +fn solve_socp_symmetric( + prob: &QpProblem, + cones: &[ConeSpec], + opts: &QpOptions, + make_backend: F, +) -> QpSolution +where + F: FnMut() -> Box, +{ + if !prob.has_bounds() { + let cone = CompositeCone::from_specs(cones); + return solve_qp_core(prob, &cone, opts, None, make_backend); + } + // Bounds expand into a trailing nonnegative block after the user cones. + let (expanded, bound_rows) = expand_bounds(prob); + let mut specs = cones.to_vec(); + specs.push(ConeSpec::Nonneg(bound_rows.len())); + let cone = CompositeCone::from_specs(&specs); + let sol = solve_qp_core(&expanded, &cone, opts, None, make_backend); + split_bound_duals(prob, &bound_rows, sol) +} + +/// Scatter the inequality dual `z` of a PSD-decomposed solve back to the +/// original inequality-row layout: new row `r` maps to `row_map[r]`, and the +/// dropped cross-block rows (structurally zero; their `G` rows are empty so +/// they carry no stationarity term) take dual `0`. Everything else +/// (`x`/`y`/bound duals/objective) is unchanged by the decomposition. +fn remap_decomposed_z(sol: QpSolution, row_map: &[usize], orig_m_ineq: usize) -> QpSolution { + let mut z = vec![0.0; orig_m_ineq]; + for (new_r, &orig_r) in row_map.iter().enumerate() { + z[orig_r] = sol.z[new_r]; + } + QpSolution { z, ..sol } +} + +/// Split each block-diagonal `Psd(n)` cone into independent PSD cones over +/// the connected components of its aggregate sparsity graph. +/// +/// A `Psd(n)` cone occupies `n(n+1)/2` `svec` rows of `(G, h)`. Treating the +/// matrix indices `0..n` as graph vertices and adding an edge `(i,j)` for +/// every *structurally present* off-diagonal `svec` row (nonzero `h` or a +/// non-empty `G` row), the connected components partition the matrix into +/// diagonal blocks: cross-component entries are structurally zero, so +/// `smat(s)` is block-diagonal and `⪰ 0` iff each block is. The cone is then +/// replaced by one `Psd(|C|)` per component `C` (its lower triangle pulled +/// from the original rows, in `svec` order), and the cross-component rows are +/// dropped. Non-PSD cones and undecomposable PSD cones pass through unchanged. +/// +/// Returns `(transformed problem, transformed cones, new→original ineq-row +/// map)`. This turns one dense `O((n(n+1)/2)²)` KKT block into several small +/// ones — the first (non-overlapping) rung of chordal sparsity for SDPs. +pub(crate) fn decompose_psd( + prob: &QpProblem, + cones: &[ConeSpec], +) -> (QpProblem, Vec, Vec) { + use crate::qp::Triplet; + let m_ineq = prob.m_ineq(); + let mut rows_of_g: Vec> = vec![Vec::new(); m_ineq]; + for t in &prob.g { + rows_of_g[t.row].push(*t); + } + + let mut new_g: Vec = Vec::new(); + let mut new_h: Vec = Vec::new(); + let mut new_cones: Vec = Vec::new(); + let mut row_map: Vec = Vec::new(); + + // Copy original ineq row `r` to a fresh row at the end of `new_g`/`new_h`. + let emit = + |r: usize, new_g: &mut Vec, new_h: &mut Vec, row_map: &mut Vec| { + let nr = new_h.len(); + for t in &rows_of_g[r] { + new_g.push(Triplet::new(nr, t.col, t.val)); + } + new_h.push(prob.h[r]); + row_map.push(r); + }; + + let mut off = 0usize; + for c in cones { + let d = c.dim(); + match c { + ConeSpec::Psd(n) => { + let n = *n; + // svec local order: (i,j) for j in 0..n, i in j..n. + let mut kij: Vec<(usize, usize)> = Vec::with_capacity(d); + for j in 0..n { + for i in j..n { + kij.push((i, j)); + } + } + // Union-find over the matrix indices. + let mut parent: Vec = (0..n).collect(); + fn find(parent: &mut [usize], x: usize) -> usize { + let mut r = x; + while parent[r] != r { + r = parent[r]; + } + let mut cur = x; + while parent[cur] != r { + let nxt = parent[cur]; + parent[cur] = r; + cur = nxt; + } + r + } + for (k, &(i, j)) in kij.iter().enumerate() { + if i != j { + let r = off + k; + let present = prob.h[r] != 0.0 || !rows_of_g[r].is_empty(); + if present { + let (ri, rj) = (find(&mut parent, i), find(&mut parent, j)); + if ri != rj { + parent[ri] = rj; + } + } + } + } + // Components, in ascending-vertex order. + let mut comps: BTreeMap> = BTreeMap::new(); + for v in 0..n { + let root = find(&mut parent, v); + comps.entry(root).or_default().push(v); + } + if comps.len() <= 1 { + // Nothing to split: copy the cone's rows through unchanged. + for k in 0..d { + emit(off + k, &mut new_g, &mut new_h, &mut row_map); + } + new_cones.push(ConeSpec::Psd(n)); + } else { + // Global (i,j) → local svec index `k`. + let mut idx = std::collections::HashMap::with_capacity(d); + for (k, &(i, j)) in kij.iter().enumerate() { + idx.insert((i, j), k); + } + for comp in comps.values() { + let cn = comp.len(); + // Each component's own lower triangle, in svec order. + for jj in 0..cn { + for ii in jj..cn { + // comp is ascending, so comp[ii] ≥ comp[jj]. + let k = idx[&(comp[ii], comp[jj])]; + emit(off + k, &mut new_g, &mut new_h, &mut row_map); + } + } + new_cones.push(ConeSpec::Psd(cn)); + } + // Cross-component rows are structurally zero → dropped. + } + } + _ => { + for k in 0..d { + emit(off + k, &mut new_g, &mut new_h, &mut row_map); + } + new_cones.push(*c); + } + } + off += d; + } + + let new_prob = QpProblem { + g: new_g, + h: new_h, + ..prob.clone() + }; + (new_prob, new_cones, row_map) +} + +/// Where a (post-block-split) inequality row's dual comes from after the +/// chordal range-space reformulation. +enum ZSrc { + /// A row copied verbatim — its dual is `z[aug_ineq_row]`. + Ineq(usize), + /// A PSD entry that became a consistency equality — its dual is the + /// equality multiplier `y[aug_eq_row]`. + Eq(usize), + /// A dropped (out-of-pattern) entry — dual `0`. + Zero, +} + +/// Bookkeeping to map an augmented solve back to the pre-chordal layout. +pub(crate) struct ChordalRecon { + orig_n: usize, + orig_m_eq: usize, + orig_m_ineq: usize, + z_src: Vec, +} + +/// Range-space chordal decomposition of any connected-but-sparse PSD cone. +/// +/// For a `Psd(n)` cone whose sparsity pattern is chordal with overlapping +/// maximal cliques `C₁…C_p`, the slack `s ⪰ 0` is rewritten as +/// `s = Σ_k Tᵀ_{C_k} S_k T_{C_k}` with each `S_k ⪰ 0` (Agler et al.). This +/// introduces clique matrix variables `w_k = svec(S_k)` (appended to `x`, +/// each constrained `⪰ 0` by a small `Psd(|C_k|)` cone), and one **consistency +/// equality** per clique-covered entry — `(h − Gx)ᵢⱼ = Σ_{k∋(i,j)} (S_k)ᵢⱼ` — +/// replacing the one dense `O(m²)` block with several small ones. Entries +/// outside every clique are structurally zero and dropped. +/// +/// Dense or already-decomposed PSD cones (and all non-PSD cones) pass through +/// unchanged. Returns `(augmented problem, augmented cones, reconstruction)`. +pub(crate) fn chordal_decompose( + prob: &QpProblem, + cones: &[ConeSpec], +) -> (QpProblem, Vec, ChordalRecon) { + use crate::cones::chordal; + use crate::cones::psd::svec_index; + use crate::qp::Triplet; + use std::collections::HashMap; + + let orig_n = prob.n; + let orig_m_eq = prob.m_eq(); + let orig_m_ineq = prob.m_ineq(); + + let mut rows_of_g: Vec> = vec![Vec::new(); orig_m_ineq]; + for t in &prob.g { + rows_of_g[t.row].push(*t); + } + + let mut aug_g: Vec = Vec::new(); + let mut aug_h: Vec = Vec::new(); + let mut aug_cones: Vec = Vec::new(); + let mut aug_a: Vec = prob.a.clone(); + let mut aug_b: Vec = prob.b.clone(); + let mut z_src: Vec = (0..orig_m_ineq).map(|_| ZSrc::Zero).collect(); + let mut aug_n = orig_n; + let mut eq_row = orig_m_eq; // next augmented equality row index + + let mut off = 0usize; + for c in cones { + let d = c.dim(); + let decompose = match c { + ConeSpec::Psd(n) if *n >= 2 => Some(*n), + _ => None, + }; + let cliques = decompose.and_then(|n| { + let mut edges = Vec::new(); + for j in 0..n { + for i in (j + 1)..n { + let r = off + svec_index(n, i, j); + if prob.h[r] != 0.0 || !rows_of_g[r].is_empty() { + edges.push((i, j)); + } + } + } + let ch = chordal::analyze(n, &edges); + // Only worth it when it genuinely splits into >1 clique. + (ch.cliques.len() > 1).then_some((n, ch.cliques)) + }); + + match cliques { + None => { + // Copy this cone's rows verbatim. + for k in 0..d { + let nr = aug_h.len(); + for t in &rows_of_g[off + k] { + aug_g.push(Triplet::new(nr, t.col, t.val)); + } + aug_h.push(prob.h[off + k]); + z_src[off + k] = ZSrc::Ineq(nr); + } + aug_cones.push(*c); + } + Some((n, cl_list)) => { + // Allocate a clique block per maximal clique and a Psd cone + // (s = w_k via G = −I) enforcing S_k ⪰ 0. + let mut clique_cols: Vec<(Vec, usize)> = Vec::new(); + for cl in &cl_list { + let cn = cl.len(); + let wbase = aug_n; + aug_n += cn * (cn + 1) / 2; + for jj in 0..cn { + for ii in jj..cn { + let nr = aug_h.len(); + aug_g.push(Triplet::new(nr, wbase + svec_index(cn, ii, jj), -1.0)); + aug_h.push(0.0); + } + } + aug_cones.push(ConeSpec::Psd(cn)); + clique_cols.push((cl.clone(), wbase)); + } + // Position of each vertex within each clique. + let pos: Vec> = cl_list + .iter() + .map(|cl| cl.iter().enumerate().map(|(p, &v)| (v, p)).collect()) + .collect(); + // One consistency equality per clique-covered entry. + for j in 0..n { + for i in j..n { + let k = svec_index(n, i, j); + let r = off + k; + // Cliques containing both i and j contribute (S_k)ᵢⱼ. + let mut w_terms: Vec = Vec::new(); + for (ci, (cl, wbase)) in clique_cols.iter().enumerate() { + if let (Some(&pi), Some(&pj)) = (pos[ci].get(&i), pos[ci].get(&j)) { + let (a, b) = if pi >= pj { (pi, pj) } else { (pj, pi) }; + let _ = cl; + w_terms.push(wbase + svec_index(cl.len(), a, b)); + } + } + if w_terms.is_empty() { + continue; // out-of-pattern entry: dropped (s = 0) + } + // (h − Gx)_r = Σ w ⇔ Gx + Σ w = h_r (equality `eq_row`). + for t in &rows_of_g[r] { + aug_a.push(Triplet::new(eq_row, t.col, t.val)); + } + for &wc in &w_terms { + aug_a.push(Triplet::new(eq_row, wc, 1.0)); + } + aug_b.push(prob.h[r]); + z_src[r] = ZSrc::Eq(eq_row); + eq_row += 1; + } + } + } + } + off += d; + } + + // Augmented variable vector x' = (x, w): objective and Hessian carry no + // `w` terms, bounds (if any) extend as free. + let mut c_aug = prob.c.clone(); + c_aug.resize(aug_n, 0.0); + let (lb, ub) = if prob.has_bounds() { + let mut lb = prob.lb.clone(); + let mut ub = prob.ub.clone(); + lb.resize(aug_n, crate::qp::NEG_INF); + ub.resize(aug_n, crate::qp::POS_INF); + (lb, ub) + } else { + (Vec::new(), Vec::new()) + }; + let aug_prob = QpProblem { + n: aug_n, + p_lower: prob.p_lower.clone(), + c: c_aug, + a: aug_a, + b: aug_b, + g: aug_g, + h: aug_h, + lb, + ub, + }; + let recon = ChordalRecon { + orig_n, + orig_m_eq, + orig_m_ineq, + z_src, + }; + (aug_prob, aug_cones, recon) +} + +/// Map a solve of the chordal-augmented problem back to the pre-chordal +/// layout: the primal/objective are unchanged on the original variables, and +/// each PSD dual entry is recovered from its consistency-equality multiplier +/// (a clique-covered entry), a copied row's dual, or `0` (dropped entry). +fn chordal_reconstruct(sol: QpSolution, recon: &ChordalRecon, _prob1: &QpProblem) -> QpSolution { + let mut z = vec![0.0; recon.orig_m_ineq]; + for (r, src) in recon.z_src.iter().enumerate() { + z[r] = match *src { + ZSrc::Ineq(ar) => sol.z[ar], + ZSrc::Eq(er) => sol.y[er], + ZSrc::Zero => 0.0, + }; + } + QpSolution { + status: sol.status, + x: sol.x[..recon.orig_n].to_vec(), + y: sol.y[..recon.orig_m_eq].to_vec(), + z, + z_lb: sol.z_lb[..recon.orig_n].to_vec(), + z_ub: sol.z_ub[..recon.orig_n].to_vec(), + obj: sol.obj, + iters: sol.iters, + iterates: sol.iterates, + } +} + +/// Warm-started [`solve_socp_ipm`]: seed the iteration from `warm` (a nearby +/// SOCP's solution). The warm `(s, z)` are projected into each cone's +/// interior (orthant positivity / SOC `λ_min` floor); the solution is +/// start-independent, so warm starting only reduces the iteration count. +/// `prob` must be bound-free (use `G`/`h` rows for all constraints). +pub fn solve_socp_ipm_warm( + prob: &QpProblem, + cones: &[ConeSpec], + warm: &QpWarmStart, + opts: &QpOptions, + make_backend: F, +) -> QpSolution +where + F: FnMut() -> Box, +{ + assert!( + !prob.has_bounds(), + "solve_socp_ipm_warm: encode bounds as G/h rows (bound expansion + warm not combined)" + ); + if !cone_dims_cover(cones, prob.m_ineq()) { + return failed_solution( + prob, + vec![0.0; prob.n], + vec![0.0; prob.m_eq()], + vec![0.0; prob.m_ineq()], + 0, + ); + } + let cone = CompositeCone::from_specs(cones); + let w = WarmStart { + x: warm.x.clone(), + y: warm.y.clone(), + z: warm.z.clone(), + }; + solve_qp_core(prob, &cone, opts, Some(&w), make_backend) +} + +/// Route a problem whose cone product contains an **exponential** cone to the +/// non-symmetric HSDE driver ([`crate::hsde_nonsym`]). Orthant, second-order, +/// exponential, and power blocks are all supported (a second-order cone may be +/// mixed with a non-symmetric one). Variable bounds expand into a trailing +/// orthant block exactly as in the symmetric path. +fn solve_nonsym( + prob: &QpProblem, + cones: &[ConeSpec], + opts: &QpOptions, + make_backend: F, + hook: Option<&mut dyn DebugHook>, +) -> QpSolution +where + F: FnMut() -> Box, +{ + use crate::hsde_nonsym::{solve_conic_hsde_nonsym, solve_conic_hsde_nonsym_debug, NsBlock}; + + fn blocks_of(cones: &[ConeSpec], extra_orthant: usize) -> Vec { + let mut blocks = Vec::with_capacity(cones.len() + 1); + for c in cones { + match c { + ConeSpec::Nonneg(n) => blocks.push(NsBlock::Orthant(*n)), + ConeSpec::SecondOrder(m) => blocks.push(NsBlock::SecondOrder(*m)), + ConeSpec::Exponential => blocks.push(NsBlock::exp()), + ConeSpec::Power(a) => blocks.push(NsBlock::power(*a)), + // PSD is self-scaled and runs on the symmetric driver; the + // PSD-with-exp/power mix is rejected upstream in + // `solve_socp_ipm`, so this arm is never reached. + ConeSpec::Psd(_) => { + unreachable!("PSD cone routes to the symmetric driver, not hsde_nonsym") + } + } + } + if extra_orthant > 0 { + blocks.push(NsBlock::Orthant(extra_orthant)); + } + blocks + } + + if !prob.has_bounds() { + let blocks = blocks_of(cones, 0); + return match hook { + Some(h) => solve_conic_hsde_nonsym_debug(prob, &blocks, opts, h, make_backend), + None => solve_conic_hsde_nonsym(prob, &blocks, opts, make_backend), + }; + } + let (expanded, bound_rows) = expand_bounds(prob); + let blocks = blocks_of(cones, bound_rows.len()); + let sol = match hook { + Some(h) => solve_conic_hsde_nonsym_debug(&expanded, &blocks, opts, h, make_backend), + None => solve_conic_hsde_nonsym(&expanded, &blocks, opts, make_backend), + }; + split_bound_duals(prob, &bound_rows, sol) +} + +/// Expand a problem's finite variable bounds into extra `G` rows +/// (`x_i ≤ ub_i` and `−x_i ≤ −lb_i`), returning the bounds-free expanded +/// problem and the `(row, var, is_upper)` provenance of each appended row +/// so the bound multipliers can be split back out. +fn expand_bounds(prob: &QpProblem) -> (QpProblem, Vec<(usize, usize, bool)>) { + let mut g = prob.g.clone(); + let mut h = prob.h.clone(); + let mut bound_rows: Vec<(usize, usize, bool)> = Vec::new(); + for i in 0..prob.n { + let ub = prob.ub_of(i); + if ub < crate::qp::BOUND_INF { + let r = h.len(); + g.push(crate::qp::Triplet::new(r, i, 1.0)); + h.push(ub); + bound_rows.push((r, i, true)); + } + let lb = prob.lb_of(i); + if lb > -crate::qp::BOUND_INF { + let r = h.len(); + g.push(crate::qp::Triplet::new(r, i, -1.0)); + h.push(-lb); + bound_rows.push((r, i, false)); + } + } + let expanded = QpProblem { + n: prob.n, + p_lower: prob.p_lower.clone(), + c: prob.c.clone(), + a: prob.a.clone(), + b: prob.b.clone(), + g, + h, + lb: Vec::new(), + ub: Vec::new(), + }; + (expanded, bound_rows) +} + +/// A warm-start iterate: a previous primal/dual solution to seed the +/// interior-point iteration for a *nearby* problem (same structure, mildly +/// perturbed `c`/`b`/`h`/bounds). Its fields mirror [`QpSolution`], so the +/// idiomatic use is to feed back the prior solve's solution. +/// +/// ## Why warm starting an IPM needs care +/// +/// Unlike active-set/simplex methods, a primal-dual interior-point method +/// converges *to* the complementarity boundary (`s∘z → 0`). A converged +/// warm point therefore lies essentially **on** that boundary — the worst +/// place to restart, since the IPM needs a well-centered interior iterate. +/// Seeding `(x, s, z)` verbatim typically stalls. +/// +/// [`solve_qp_ipm_warm`] handles this with a Mehrotra-style recentering +/// ([`init_iterate`]): it keeps the warm primal `x` (whose slack pattern +/// `h − Gx` encodes the active set) but pushes the slacks `s` and +/// multipliers `z` back into the interior with a **scale-aware floor**, so +/// the start is genuinely interior and centered while still benefiting +/// from the warm `x`. The benefit is real but bounded — it is largest when +/// the active set is stable across the perturbation, and modest or absent +/// when it changes substantially (a known property of IPM warm starts). +#[derive(Debug, Clone)] +pub struct QpWarmStart { + /// Primal iterate (length `n`). + pub x: Vec, + /// Equality multipliers (length `m_eq`). + pub y: Vec, + /// Inequality multipliers for the original `G` rows (length `m_ineq`). + pub z: Vec, + /// Lower-bound multipliers (length `n`). + pub z_lb: Vec, + /// Upper-bound multipliers (length `n`). + pub z_ub: Vec, +} + +impl QpWarmStart { + /// Build a warm start from a previous [`QpSolution`]. + pub fn from_solution(sol: &QpSolution) -> Self { + QpWarmStart { + x: sol.x.clone(), + y: sol.y.clone(), + z: sol.z.clone(), + z_lb: sol.z_lb.clone(), + z_ub: sol.z_ub.clone(), + } + } +} + +/// Internal warm start expressed in the *expanded* space (variable bounds +/// already folded into the inequality block, so `z` covers `G`-rows then +/// the appended bound rows). +struct WarmStart { + x: Vec, + y: Vec, + z: Vec, +} + +/// Build the expanded-space `z` for a warm start: the original `G`-row +/// multipliers followed by each appended bound row's `z_lb`/`z_ub` value, +/// in the same append order as [`expand_bounds`]. Inverse of +/// [`split_bound_duals`]'s `z` handling. +fn merge_bound_duals( + prob: &QpProblem, + bound_rows: &[(usize, usize, bool)], + warm: &QpWarmStart, +) -> Vec { + let base_m = prob.m_ineq(); + let mut z = vec![0.0; base_m + bound_rows.len()]; + let copy = base_m.min(warm.z.len()); + z[..copy].copy_from_slice(&warm.z[..copy]); + for &(r, var, is_upper) in bound_rows { + let v = if is_upper { + warm.z_ub.get(var).copied().unwrap_or(0.0) + } else { + warm.z_lb.get(var).copied().unwrap_or(0.0) + }; + if r < z.len() { + z[r] = v; + } + } + z +} + +/// Move the appended bound rows' multipliers from the expanded solution's +/// `z` into `z_lb`/`z_ub`, and trim `z` back to the original rows. +fn split_bound_duals( + prob: &QpProblem, + bound_rows: &[(usize, usize, bool)], + mut sol: QpSolution, +) -> QpSolution { + let base_m = prob.m_ineq(); + let mut z = vec![0.0; base_m]; + z.copy_from_slice(&sol.z[..base_m]); + let mut z_lb = vec![0.0; prob.n]; + let mut z_ub = vec![0.0; prob.n]; + for &(r, var, is_upper) in bound_rows { + if is_upper { + z_ub[var] = sol.z[r]; + } else { + z_lb[var] = sol.z[r]; + } + } + sol.z = z; + sol.z_lb = z_lb; + sol.z_ub = z_ub; + sol +} + +/// Bounds-agnostic Mehrotra predictor-corrector core. `prob.lb`/`ub` are +/// ignored here; the public [`solve_qp_ipm`] handles bound expansion. +fn solve_qp_core( + prob: &QpProblem, + cone: &CompositeCone, + opts: &QpOptions, + warm: Option<&WarmStart>, + mut make_backend: F, +) -> QpSolution +where + F: FnMut() -> Box, +{ + // Opt-in homogeneous self-dual embedding driver. It builds its own + // factorization and self-starts, so it bypasses the warm-start / + // factor-reuse plumbing below (warm is ignored — it cannot change the + // solution, only the iteration count, which HSDE does not exploit yet). + if opts.use_hsde { + return crate::hsde::solve_conic_hsde(prob, cone, opts, make_backend, None); + } + + // Build the fixed KKT pattern and an initial factorization, then run + // the iteration. The pattern is constant across iterations (only the + // cone scaling block changes), so the loop `refactor`s rather than + // re-analyzing. Build-once / solve-many across *instances* with the + // same pattern is exposed via [`QpFactorization`]. + let (kkt, mut fact) = match build_factorization(prob, cone, opts, &mut make_backend) { + Ok(pair) => pair, + Err(()) => { + let n = prob.n; + return failed_solution( + prob, + vec![0.0; n], + vec![0.0; prob.m_eq()], + vec![1.0; prob.m_ineq()], + 0, + ); + } + }; + run_ipm(prob, cone, opts, &kkt, &mut fact, warm, None) +} + +/// Build the constant KKT pattern for `prob` and a `Factorization` over +/// it (seeded with the initial scaling). Shared by the single-shot path +/// and the reusable [`QpFactorization`] handle. `Err(())` ⇒ the initial +/// factorization failed. +pub(crate) fn build_factorization( + prob: &QpProblem, + cone: &CompositeCone, + opts: &QpOptions, + make_backend: &mut F, +) -> Result<(KktStructure, Factorization), ()> +where + F: FnMut() -> Box, +{ + // Seed the scaling at the cone identity (s = z = e ⇒ block = I). + let mut e = vec![0.0; prob.m_ineq()]; + cone.identity(&mut e); + + let kkt = KktStructure::build(prob, cone, opts.reg); + let dim = kkt.dim; // base rows + per-SOC auxiliary variables + let mut kkt_vals = kkt.values.clone(); + kkt.update_blocks(cone, &e, &e, opts.reg, &mut kkt_vals); + let fact = Factorization::new( + dim as Index, + kkt.airn.clone(), + kkt.ajcn.clone(), + kkt_vals, + make_backend(), + ) + .map_err(|_| ())?; + Ok((kkt, fact)) +} + +/// Build the starting iterate `(x, y, z, s)` for [`run_ipm`]. +/// +/// With no warm start (`warm = None`) this is the cold default +/// `x = 0, y = 0, z = 1, s = 1` — a perfectly centered interior point +/// (`s∘z = 1`) — preserving the established cold-start behavior exactly. +/// +/// With a warm start it applies a **Mehrotra-style recentering** seeded +/// from the warm point (Mehrotra 1992, §7, adapted for warm starting): +/// +/// 1. Keep the warm primal `x` and equality multipliers `y`. +/// 2. Take the implied slacks `s̃ = h − Gx` (their signs encode which +/// inequalities the warm `x` makes active/violated) and the warm `z`. +/// 3. Shift both into the strict interior by `δ = max(−1.5·min(·), floor)`. +/// The `floor` is **adaptive**: it is the warm point's KKT residual `ρ` +/// on *this* problem, clamped to `[1e-9·scale, 0.1·scale]` with +/// `scale = max(1, ‖s̃‖∞, ‖z‖∞)`. A converged warm point sits on the +/// complementarity boundary (`s̃ᵢ` or `zᵢ ≈ 0`), so a floor is required +/// to keep the restart interior — but a *fixed* floor overwrites the +/// warm dual structure and degrades to a primal-only warm start. +/// Sizing the floor to `ρ` keeps `s`/`z` near their warm (correctly +/// structured) values when the problem is nearby (small `ρ`), so the +/// IPM exploits the warm duals — and softens toward the conservative +/// `0.1·scale` when the active set has moved (large `ρ`). This both +/// deepens the benefit on nearby problems and keeps it from ever doing +/// worse than a centered start. +/// 4. A final centering shift `½(s·z)/Σz`, `½(s·z)/Σs` balances `s` and +/// `z` (Mehrotra's second step). +/// +/// The returned iterate always satisfies `s > 0, z > 0`. If `warm`'s +/// dimensions don't match the (expanded) problem it is ignored and the +/// cold start is used, so a stale warm start can never corrupt a solve. +fn init_iterate( + prob: &QpProblem, + cone: &CompositeCone, + n: usize, + m_eq: usize, + m_ineq: usize, + warm: Option<&WarmStart>, +) -> (Vec, Vec, Vec, Vec) { + // Cold start at the cone identity e (orthant: all ones; SOC: (1,0,…)), + // a perfectly centered interior point (s∘z = e). + let cold = || { + let mut e = vec![0.0; m_ineq]; + cone.identity(&mut e); + (vec![0.0; n], vec![0.0; m_eq], e.clone(), e) + }; + // A matching primal `x` is enough to warm start; `y`/`z` fall back to + // the cold values when they don't match (so a primal-only warm start — + // e.g. feeding back just the previous primal — is supported). + let w = match warm { + Some(w) if w.x.len() == n => w, + _ => return cold(), + }; + + let x = w.x.clone(); + let y = if w.y.len() == m_eq { + w.y.clone() + } else { + vec![0.0; m_eq] + }; + let mut z = if w.z.len() == m_ineq { + w.z.clone() + } else { + let mut e = vec![0.0; m_ineq]; + cone.identity(&mut e); + e + }; + + // No cone: x/y are the whole iterate, s/z are empty. + if m_ineq == 0 { + return (x, y, z, Vec::new()); + } + + // Implied slacks s̃ = h − Gx. + let mut gx = vec![0.0; m_ineq]; + prob.g_mul(&x, &mut gx); + let mut s: Vec = (0..m_ineq).map(|i| prob.h[i] - gx[i]).collect(); + + let scale = 1.0_f64.max(inf_norm(&s)).max(inf_norm(&z)); + + // Adaptive interior floor sized to the warm point's KKT residual ρ on + // *this* problem. ρ measures how far the warm point is from satisfying + // the new KKT system: a small ρ (nearby problem, stable active set) + // lets the slacks/multipliers stay near their warm — correctly + // structured — values, so the IPM exploits the warm duals and needs + // few steps; a large ρ (the active set moved, so the warm point is + // badly infeasible) softens the floor toward the conservative cold + // level `0.1·scale`. This self-corrects: warm starting never does + // worse than a centered start, and gains the most when it can. + let floor = { + let mut rd = prob.c.clone(); + prob.p_mul_add(&x, &mut rd); + prob.at_mul_add(&y, &mut rd); + prob.gt_mul_add(&z, &mut rd); + let mut rp: Vec = prob.b.iter().map(|b| -b).collect(); + prob.a_mul_add(&x, &mut rp); + // Inequality infeasibility of the warm point: max(0, Gx − h) = −s̃. + let viol = s.iter().fold(0.0_f64, |m, &si| m.max((-si).max(0.0))); + let rho = inf_norm(&rd).max(inf_norm(&rp)).max(viol); + rho.clamp(1e-9 * scale, 0.1 * scale) + }; + // Project (s, z) into the strict interior of each cone block and + // rebalance (orthant: positivity + Mehrotra; SOC: lift λ_min). + cone.recenter_warm(&mut s, &mut z, floor); + (x, y, z, s) +} + +/// Run the Mehrotra predictor-corrector iteration for `prob` given an +/// already-built KKT pattern (`kkt`) and a live `Factorization` (`fact`) +/// over that pattern. The factorization is re-numeric-factored each +/// iteration (symbolic reuse); when `fact` is reused across instances +/// with the *same pattern*, the AMD ordering / symbolic factor is reused +/// across instances too. +fn run_ipm( + prob: &QpProblem, + cone: &CompositeCone, + opts: &QpOptions, + kkt: &KktStructure, + fact: &mut Factorization, + warm: Option<&WarmStart>, + mut hook: Option<&mut dyn DebugHook>, +) -> QpSolution { + let n = prob.n; + let m_eq = prob.m_eq(); + let m_ineq = prob.m_ineq(); + + let (mut x, mut y, mut z, mut s) = init_iterate(prob, cone, n, m_eq, m_ineq, warm); + + let mut r_d = vec![0.0; n]; + let mut r_p = vec![0.0; m_eq]; + let mut r_g = vec![0.0; m_ineq]; + let mut r_c = vec![0.0; m_ineq]; + let mut rhs_term = vec![0.0; m_ineq]; + // The KKT system carries one auxiliary variable per second-order cone; + // the rhs is sized to it (auxiliary rows are zero). + let mut rhs = vec![0.0; kkt.dim]; + let mut dx = vec![0.0; n]; + let mut dy = vec![0.0; m_eq]; + let mut dz = vec![0.0; m_ineq]; + let mut ds = vec![0.0; m_ineq]; + let mut ds_aff = vec![0.0; m_ineq]; + let mut dz_aff = vec![0.0; m_ineq]; + let mut kkt_vals = kkt.values.clone(); + + let mut iters = 0; + let mut status = QpStatus::IterationLimit; + let mut iterates: Vec = Vec::new(); + + for it in 0..opts.max_iter { + iters = it; + + // --- residuals (unregularized; this is the convergence test) --- + // r_d = P x + c + Aᵀ y + Gᵀ z + r_d.iter_mut().zip(&prob.c).for_each(|(r, c)| *r = *c); + prob.p_mul_add(&x, &mut r_d); + prob.at_mul_add(&y, &mut r_d); + prob.gt_mul_add(&z, &mut r_d); + // r_p = A x − b + r_p.iter_mut().zip(&prob.b).for_each(|(r, b)| *r = -*b); + prob.a_mul_add(&x, &mut r_p); + // r_g = G x + s − h + for i in 0..m_ineq { + r_g[i] = s[i] - prob.h[i]; + } + prob.g_mul_add(&x, &mut r_g); + + let mu = cone.mu(&s, &z); + let pinf = inf_norm(&r_p).max(inf_norm(&r_g)); + let dinf = inf_norm(&r_d); + let res = dinf.max(pinf).max(mu); + // Per-iteration objective, needed for the trace and for the + // debugger's `objective()` accessor. + let obj_it = if opts.collect_iterates || hook.is_some() { + let mut px = vec![0.0; n]; + prob.p_mul_add(&x, &mut px); + (0..n).map(|i| 0.5 * x[i] * px[i] + prob.c[i] * x[i]).sum() + } else { + 0.0 + }; + + // Debugger checkpoint: top of iteration — residuals and the + // accepted iterate from the previous step are in place; the + // search direction (`dx`/…`) is the previous iteration's (zero on + // the first), as on the NLP path. + if hook.is_some() { + let mut st = ConvexDebugState { + cp: Checkpoint::IterStart, + iter: it as i32, + mu, + pinf, + dinf, + res, + obj: obj_it, + alpha: (0.0, 0.0), + x: &mut x, + s: &mut s, + y: &mut y, + z: &mut z, + dx: &dx, + dy: &dy, + dz: &dz, + ds: &ds, + tau: None, + kappa: None, + status: None, + }; + if fire(&mut hook, &mut st) == DebugAction::Stop { + break; + } + } + + if res < opts.tol { + status = QpStatus::Optimal; + // Record the converged iterate so the trace *ends* at the + // optimum, matching the NLP path's N+1 convention (a problem + // solved in N steps logs N+1 records: the cold start through the + // converged point). Every other record is pushed at the bottom of + // the loop with the step that was taken *from* it; the converged + // iterate takes no step, so its `alpha`s are zero. Without this a + // solve that converges immediately (e.g. a tiny well-conditioned + // QP in one step) would leave only the pre-step cold start in the + // trace, and the trace's final objective would not be the optimum. + if opts.collect_iterates { + iterates.push(QpIterate { + iter: it, + objective: obj_it, + primal_infeasibility: pinf, + dual_infeasibility: dinf, + mu, + alpha_primal: 0.0, + alpha_dual: 0.0, + }); + } + break; + } + + // Verified infeasibility / unboundedness detection. Checked + // (not assumed), so a positive result is a proof and a false + // positive is impossible; this is the HSDE benefit without the + // homogeneous-embedding rewrite. Cheap (a few matvecs). + if let Some(infeas) = detect_infeasibility_cone(prob, &x, &y, &z, opts, cone) { + status = infeas; + break; + } + + // --- update the cone scaling block(s) and refactor (numeric-only; + // the symbolic factor / ordering is reused). The one factorization + // then backs both the predictor and corrector solves. --- + kkt.update_blocks(cone, &s, &z, opts.reg, &mut kkt_vals); + if fact.refactor(&kkt_vals).is_err() { + status = QpStatus::NumericalFailure; + break; + } + + // === Predictor (affine-scaling) step: σ = 0 === + // r_c = s∘z (affine target). + cone.comp_residual(&s, &z, 0.0, &mut r_c); + cone.rhs_comp_term(&s, &z, &r_c, &mut rhs_term); + build_rhs(&r_d, &r_p, &r_g, &rhs_term, n, m_eq, m_ineq, &mut rhs); + if fact.solve_one(&mut rhs).is_err() { + status = QpStatus::NumericalFailure; + break; + } + split_step(&rhs, n, m_eq, m_ineq, &mut dx, &mut dy, &mut dz); + cone.recover_ds(&s, &z, &r_c, &dz, &mut ds_aff); + dz_aff.copy_from_slice(&dz); + + // Affine step lengths and the predicted duality measure μ_aff. + let (alpha_p_aff, alpha_d_aff) = + step_lengths(cone, &s, &ds_aff, &z, &dz_aff, opts.tau, m_ineq); + let sigma = if m_ineq == 0 { + 0.0 + } else { + // μ_aff = ⟨s + αp ds_aff, z + αd dz_aff⟩ / m + let mut dot = 0.0; + for i in 0..m_ineq { + dot += (s[i] + alpha_p_aff * ds_aff[i]) * (z[i] + alpha_d_aff * dz_aff[i]); + } + let mu_aff = dot / m_ineq as f64; + // Mehrotra's heuristic centering parameter σ = (μ_aff/μ)³. + (mu_aff / mu).powi(3) + }; + + // === Corrector step: centered target + second-order term === + // Compute the step direction (`dx`/`dy`/`dz`/`ds`) and the step + // lengths taken this iteration, but defer *applying* it until after + // the `AfterSearchDirection` checkpoint. With no cone the predictor + // is already the full Newton step (`dz`/`ds` empty, full step). + let (mut step_p, mut step_d) = (1.0_f64, 1.0_f64); + if m_ineq != 0 { + let sigma_mu = sigma * mu; + cone.comp_residual_corrector(&s, &z, &ds_aff, &dz_aff, sigma_mu, &mut r_c); + cone.rhs_comp_term(&s, &z, &r_c, &mut rhs_term); + build_rhs(&r_d, &r_p, &r_g, &rhs_term, n, m_eq, m_ineq, &mut rhs); + if fact.solve_one(&mut rhs).is_err() { + status = QpStatus::NumericalFailure; + break; + } + split_step(&rhs, n, m_eq, m_ineq, &mut dx, &mut dy, &mut dz); + cone.recover_ds(&s, &z, &r_c, &dz, &mut ds); + + let (alpha_p, alpha_d) = step_lengths(cone, &s, &ds, &z, &dz, opts.tau, m_ineq); + step_p = alpha_p; + step_d = alpha_d; + } + + // Debugger checkpoint: the Newton step and its fraction-to-boundary + // lengths are known but not yet applied. + if hook.is_some() { + let mut st = ConvexDebugState { + cp: Checkpoint::AfterSearchDirection, + iter: it as i32, + mu, + pinf, + dinf, + res, + obj: obj_it, + alpha: (step_p, step_d), + x: &mut x, + s: &mut s, + y: &mut y, + z: &mut z, + dx: &dx, + dy: &dy, + dz: &dz, + ds: &ds, + tau: None, + kappa: None, + status: None, + }; + if fire(&mut hook, &mut st) == DebugAction::Stop { + break; + } + } + + // Apply the step (the no-cone full step is `step_p = step_d = 1`). + for i in 0..n { + x[i] += step_p * dx[i]; + } + for i in 0..m_eq { + y[i] += step_d * dy[i]; + } + for i in 0..m_ineq { + s[i] += step_p * ds[i]; + z[i] += step_d * dz[i]; + } + + // Debugger checkpoint: the new iterate is in place. + if hook.is_some() { + let mut st = ConvexDebugState { + cp: Checkpoint::AfterStep, + iter: it as i32, + mu, + pinf, + dinf, + res, + obj: obj_it, + alpha: (step_p, step_d), + x: &mut x, + s: &mut s, + y: &mut y, + z: &mut z, + dx: &dx, + dy: &dy, + dz: &dz, + ds: &ds, + tau: None, + kappa: None, + status: None, + }; + if fire(&mut hook, &mut st) == DebugAction::Stop { + break; + } + } + + if opts.collect_iterates { + iterates.push(QpIterate { + iter: it, + objective: obj_it, + primal_infeasibility: pinf, + dual_infeasibility: dinf, + mu, + alpha_primal: step_p, + alpha_dual: step_d, + }); + } + } + + // Objective ½ xᵀP x + cᵀx. + let mut px = vec![0.0; n]; + prob.p_mul_add(&x, &mut px); + let mut obj = 0.0; + for i in 0..n { + obj += 0.5 * x[i] * px[i] + prob.c[i] * x[i]; + } + + // Debugger post-mortem at the final iterate (the returned action is + // ignored — the solve is over). + if hook.is_some() { + let status_str = format!("{status:?}"); + let mut st = ConvexDebugState { + cp: Checkpoint::Terminated, + iter: iters as i32, + mu: cone.mu(&s, &z), + pinf: inf_norm(&r_p).max(inf_norm(&r_g)), + dinf: inf_norm(&r_d), + res: 0.0, + obj, + alpha: (0.0, 0.0), + x: &mut x, + s: &mut s, + y: &mut y, + z: &mut z, + dx: &dx, + dy: &dy, + dz: &dz, + ds: &ds, + tau: None, + kappa: None, + status: Some(&status_str), + }; + let _ = fire(&mut hook, &mut st); + } + + let nn = n; + QpSolution { + status, + x, + y, + z, + z_lb: vec![0.0; nn], + z_ub: vec![0.0; nn], + obj, + iters, + iterates, + } +} + +/// A reusable convex-QP factorization: build the KKT symbolic factor +/// (AMD ordering) **once** for a fixed problem *structure*, then solve +/// many instances that share that structure, paying the symbolic +/// analysis only on construction. This is the build-once / solve-many +/// handle (cf. the JAX `JaxProblem` from pounce#75) at the convex-QP +/// level. +/// +/// "Same structure" means: same `n`, same `A`/`G`/`P` sparsity pattern, +/// and the same *set* of finite variable bounds (so the bound-expanded +/// KKT pattern is identical). Only the numeric data — `c`, `b`, `h`, and +/// the bound *values* — may change between solves. A solve whose problem +/// does not match the captured structure returns +/// [`QpStatus::NumericalFailure`] rather than silently producing a wrong +/// answer; use the one-shot [`solve_qp_ipm`] for heterogeneous problems. +pub struct QpFactorization { + fact: Factorization, + opts: QpOptions, + /// The (orthant) inequality cone of the expanded problem; reused for + /// the KKT pattern check and the per-solve scaling. + cone: CompositeCone, + /// Captured structure fingerprint for the per-solve compatibility + /// check (same `n` and same expanded KKT pattern). + n: usize, + airn: Vec, + ajcn: Vec, +} + +impl QpFactorization { + /// Build the reusable factor from a representative `base` problem. + /// Returns `None` if the initial factorization fails (e.g. a + /// structurally singular KKT system). + pub fn build(base: &QpProblem, opts: &QpOptions, mut make_backend: F) -> Option + where + F: FnMut() -> Box, + { + let expanded = if base.has_bounds() { + expand_bounds(base).0 + } else { + base.clone() + }; + let cone = CompositeCone::single_nonneg(expanded.m_ineq()); + let (kkt, fact) = build_factorization(&expanded, &cone, opts, &mut make_backend).ok()?; + Some(QpFactorization { + airn: kkt.airn, + ajcn: kkt.ajcn, + n: base.n, + fact, + cone, + opts: *opts, + }) + } + + /// Solve `prob`, reusing the captured symbolic factor. `prob` must + /// share the captured structure (see the type docs); otherwise a + /// `NumericalFailure` solution is returned. + pub fn solve(&mut self, prob: &QpProblem) -> QpSolution { + self.solve_inner(prob, None) + } + + /// Solve `prob` reusing the captured symbolic factor **and** warm + /// starting from `warm` (a nearby problem's solution). Combines the + /// two reuse axes: the symbolic factorization is paid once at `build`, + /// and the interior-point iteration is seeded from the warm point (see + /// [`QpWarmStart`]). Same structure requirement as [`Self::solve`]. + pub fn solve_warm(&mut self, prob: &QpProblem, warm: &QpWarmStart) -> QpSolution { + let (expanded_z, _) = if prob.has_bounds() { + // `merge_bound_duals` needs the bound-row provenance. + let (_, bound_rows) = expand_bounds(prob); + (merge_bound_duals(prob, &bound_rows, warm), ()) + } else { + (warm.z.clone(), ()) + }; + let w = WarmStart { + x: warm.x.clone(), + y: warm.y.clone(), + z: expanded_z, + }; + self.solve_inner(prob, Some(&w)) + } + + fn solve_inner(&mut self, prob: &QpProblem, warm: Option<&WarmStart>) -> QpSolution { + let (expanded, bound_rows) = if prob.has_bounds() { + expand_bounds(prob) + } else { + (prob.clone(), Vec::new()) + }; + // Rebuild this instance's pattern and require it to match the + // captured one exactly (same nnz, same row/col indices). + let kkt = KktStructure::build(&expanded, &self.cone, self.opts.reg); + if prob.n != self.n || kkt.airn != self.airn || kkt.ajcn != self.ajcn { + return failed_solution( + prob, + vec![0.0; prob.n], + vec![0.0; prob.m_eq()], + vec![1.0; prob.m_ineq()], + 0, + ); + } + // Reuse the live factorization (it carries the symbolic analysis; + // `run_ipm` refactors numerically per iteration). The same factor + // object is reused across solves, so the AMD ordering / symbolic + // factor is paid once at `build`. + let sol = run_ipm( + &expanded, + &self.cone, + &self.opts, + &kkt, + &mut self.fact, + warm, + None, + ); + split_bound_duals(prob, &bound_rows, sol) + } +} + +/// Whether the cone specs partition exactly `m_ineq` inequality rows — the +/// invariant the conic drivers assume (each `s = h − Gx` block sits in one +/// cone, with an exp/power cone occupying exactly 3 rows). A mismatch is a +/// caller error that would otherwise index past the slack vector. +fn cone_dims_cover(cones: &[ConeSpec], m_ineq: usize) -> bool { + cones.iter().map(|c| c.dim()).sum::() == m_ineq +} + +/// Build a `NumericalFailure` solution from the current iterate (used +/// when the *initial* factorization fails before the loop starts). +fn failed_solution( + prob: &QpProblem, + x: Vec, + y: Vec, + z: Vec, + iters: usize, +) -> QpSolution { + let mut px = vec![0.0; prob.n]; + prob.p_mul_add(&x, &mut px); + let mut obj = 0.0; + for i in 0..prob.n { + obj += 0.5 * x[i] * px[i] + prob.c[i] * x[i]; + } + QpSolution { + status: QpStatus::NumericalFailure, + x, + y, + z, + z_lb: vec![0.0; prob.n], + z_ub: vec![0.0; prob.n], + obj, + iters, + iterates: Vec::new(), + } +} + +/// Build the Newton RHS `[−r_d; −r_p; −r_g + r_c ⊘ z]` for a given +/// complementarity residual `r_c` (predictor or corrector). +#[allow(clippy::too_many_arguments)] +/// Assemble the reduced KKT right-hand side `[-r_d; -r_p; -r_g + comp_term]`. +/// `comp_term` is the cone's contribution at the `(z)` rows (the orthant's +/// is `r_c ⊘ z`), computed by the caller via [`Cone::rhs_comp_term`] so the +/// block is cone-specific rather than baked in here. +pub(crate) fn build_rhs( + r_d: &[f64], + r_p: &[f64], + r_g: &[f64], + comp_term: &[f64], + n: usize, + m_eq: usize, + m_ineq: usize, + rhs: &mut [f64], +) { + for i in 0..n { + rhs[i] = -r_d[i]; + } + for i in 0..m_eq { + rhs[n + i] = -r_p[i]; + } + for i in 0..m_ineq { + rhs[n + m_eq + i] = -r_g[i] + comp_term[i]; + } + // Auxiliary-variable rows (per second-order cone, appended after the + // base rows) have zero right-hand side; re-zero them since `solve_one` + // overwrote the buffer with the previous step. + for v in rhs.iter_mut().skip(n + m_eq + m_ineq) { + *v = 0.0; + } +} + +/// Copy the solved RHS into the (dx, dy, dz) step components. +pub(crate) fn split_step( + rhs: &[f64], + n: usize, + m_eq: usize, + m_ineq: usize, + dx: &mut [f64], + dy: &mut [f64], + dz: &mut [f64], +) { + dx.copy_from_slice(&rhs[0..n]); + dy.copy_from_slice(&rhs[n..n + m_eq]); + dz.copy_from_slice(&rhs[n + m_eq..n + m_eq + m_ineq]); +} + +/// Separate fraction-to-boundary step lengths for the primal slack `s` +/// (via `ds`) and dual `z` (via `dz`). Returns `(alpha_primal, +/// alpha_dual)`; both are 1 when there is no cone. +fn step_lengths( + cone: &CompositeCone, + s: &[f64], + ds: &[f64], + z: &[f64], + dz: &[f64], + tau: f64, + m_ineq: usize, +) -> (f64, f64) { + if m_ineq == 0 { + return (1.0, 1.0); + } + (cone.max_step(s, ds, tau), cone.max_step(z, dz, tau)) +} + +/// Bench-only re-export of the KKT assembly so the `scaling` example can +/// time it in isolation. Not part of the public solving API. +#[doc(hidden)] +pub fn assemble_kkt_for_bench( + prob: &QpProblem, + scaling: &[f64], + reg: f64, + _dim: usize, +) -> (Vec, Vec, Vec) { + let cone = CompositeCone::single_nonneg(prob.m_ineq()); + let kkt = KktStructure::build(prob, &cone, reg); + let mut vals = kkt.values.clone(); + // Orthant block s/z = scaling at z = 1. + let ones = vec![1.0; prob.m_ineq()]; + kkt.update_blocks(&cone, scaling, &ones, reg, &mut vals); + (kkt.airn, kkt.ajcn, vals) +} + +/// Fixed-pattern KKT structure for the QP augmented system. +/// +/// The KKT *sparsity pattern* is identical across all IPM iterations — +/// only the `(z, z)` diagonal (the cone scaling block) changes from step +/// to step. This struct captures the pattern (`airn`/`ajcn`, 1-based +/// lower triangle) and the constant part of the values once, plus the +/// positions of the scaling-dependent diagonal entries, so each +/// iteration recomputes only `O(m_ineq)` values and the solver can +/// `refactor` (numeric-only, reusing the symbolic factor / fill-reducing +/// ordering) instead of rebuilding the factorization from scratch. This +/// is the constant-pattern symbolic reuse called for in +/// `dev-notes/performance-engineering.md`; without it the per-iteration +/// cost is dominated by repeated symbolic analysis on large sparse QPs. +/// Value-array positions of one cone's `(z, z)` scaling block, aligned with +/// the cone's [`CompositeCone::blocks`] order. +enum ZBlockPos { + /// One value position per row (orthant diagonal). + Diagonal(Vec), + /// A second-order cone in **diagonal + rank-1** form, represented with + /// one auxiliary variable `ξ`: the `(z,z)` diagonal entries, the + /// coupling column `(z_i, ξ) = u_i`, and the `(ξ,ξ) = +1` entry. Its + /// Schur complement reproduces the dense block `diag(d) + uuᵀ`, keeping + /// the factorization sparse (ECOS/Clarabel sparse-SOC trick). + DiagRank1 { + diag_pos: Vec, + u_pos: Vec, + aux_pos: usize, + }, + /// A fully dense symmetric block (the PSD cone's `W ⊗ₛ W`): the + /// value-array positions of its lower triangle, row-major + /// `[(0,0),(1,0),(1,1),…]`, aligned with [`ConeBlock::DenseLower`]. + Dense { pos: Vec }, +} + +/// How a cone block enters the `(z,z)` position of the KKT system. +#[derive(Clone, Copy, PartialEq)] +enum BlockShape { + /// Orthant: one diagonal entry per row. + Diagonal, + /// Second-order cone: diagonal + rank-1 via an auxiliary variable. + DiagRank1, + /// PSD cone: a fully dense symmetric lower-triangle block. + Dense, +} + +pub(crate) struct KktStructure { + pub(crate) airn: Vec, + pub(crate) ajcn: Vec, + /// Constant values (everything except the scaling block; the `(z, z)` + /// diagonal entries hold their `-reg` term here). + pub(crate) values: Vec, + /// Total KKT dimension, including the per-SOC auxiliary variables. + pub(crate) dim: usize, + /// Per-cone `(z, z)` block positions, in `cone.blocks()` order. + z_blocks: Vec, +} + +impl KktStructure { + /// Build the pattern and constant values once for `prob`'s inequality + /// cone `cone`. Each cone block contributes either a diagonal entry per + /// row (orthant) or a dense lower-triangle block (SOC) at its `(z, z)` + /// position; all seeded with `-reg` on the diagonal. The pattern is + /// constant across iterations — only the scaling values change — so the + /// solver `refactor`s rather than re-analyzing. + pub(crate) fn build(prob: &QpProblem, cone: &CompositeCone, reg: f64) -> Self { + let n = prob.n; + let m_eq = prob.m_eq(); + let mut entries: BTreeMap<(usize, usize), f64> = BTreeMap::new(); + let mut add = |r: usize, c: usize, v: f64| { + let (r, c) = if r >= c { (r, c) } else { (c, r) }; + *entries.entry((r, c)).or_insert(0.0) += v; + }; + + // (x,x): P + δI. + for t in &prob.p_lower { + add(t.row, t.col, t.val); + } + for i in 0..n { + add(i, i, reg); + } + // (y,x): A; (y,y): −δI. + for t in &prob.a { + add(n + t.row, t.col, t.val); + } + for i in 0..m_eq { + add(n + i, n + i, -reg); + } + // (z,x): G. + for t in &prob.g { + add(n + m_eq + t.row, t.col, t.val); + } + // (z,z): per cone block, seeded with −δI. SOC blocks get an + // auxiliary variable (appended after the base rows) carrying the + // rank-1 term. The scaling values are written by `update_blocks`. + let base_dim = n + m_eq + prob.m_ineq(); + let shapes = block_shapes(cone); + let mut aux = base_dim; // next auxiliary-variable index + for ((off, k), shape) in cone.blocks().iter().zip(&shapes) { + let d = k.dim(); + let zbase = n + m_eq + off; + for i in 0..d { + add(zbase + i, zbase + i, -reg); // diagonal (filled per iter) + } + match shape { + BlockShape::Diagonal => {} + BlockShape::DiagRank1 => { + // Aux: coupling (z_i, ξ) = u_i and (ξ, ξ) = +1. + for i in 0..d { + add(aux, zbase + i, 0.0); + } + add(aux, aux, 1.0); + aux += 1; + } + BlockShape::Dense => { + // Reserve the strict lower triangle of the (z,z) block; + // the diagonal was already added above. + for i in 0..d { + for j in 0..i { + add(zbase + i, zbase + j, 0.0); + } + } + } + } + } + let dim = aux; + + let nnz = entries.len(); + let mut airn = Vec::with_capacity(nnz); + let mut ajcn = Vec::with_capacity(nnz); + let mut values = Vec::with_capacity(nnz); + let mut coord_to_pos: BTreeMap<(usize, usize), usize> = BTreeMap::new(); + for (pos, ((r, c), v)) in entries.into_iter().enumerate() { + airn.push((r + 1) as Index); + ajcn.push((c + 1) as Index); + values.push(v); + coord_to_pos.insert((r, c), pos); + } + + // Record each cone block's positions in `blocks()` order. + let mut z_blocks = Vec::with_capacity(cone.blocks().len()); + let mut aux = base_dim; + for ((off, k), shape) in cone.blocks().iter().zip(&shapes) { + let d = k.dim(); + let zbase = n + m_eq + off; + match shape { + BlockShape::Diagonal => { + let diag_pos = (0..d) + .map(|i| coord_to_pos[&(zbase + i, zbase + i)]) + .collect(); + z_blocks.push(ZBlockPos::Diagonal(diag_pos)); + } + BlockShape::DiagRank1 => { + let diag_pos = (0..d) + .map(|i| coord_to_pos[&(zbase + i, zbase + i)]) + .collect(); + let u_pos = (0..d).map(|i| coord_to_pos[&(aux, zbase + i)]).collect(); + let aux_pos = coord_to_pos[&(aux, aux)]; + z_blocks.push(ZBlockPos::DiagRank1 { + diag_pos, + u_pos, + aux_pos, + }); + aux += 1; + } + BlockShape::Dense => { + // Lower triangle, row-major — matching ConeBlock::DenseLower. + let mut pos = Vec::with_capacity(d * (d + 1) / 2); + for i in 0..d { + for j in 0..=i { + pos.push(coord_to_pos[&(zbase + i, zbase + j)]); + } + } + z_blocks.push(ZBlockPos::Dense { pos }); + } + } + } + + KktStructure { + airn, + ajcn, + values, + dim, + z_blocks, + } + } + + /// Write the per-iteration cone scaling into `out` (a copy of + /// `self.values`): each block's `(z, z)` entries become `-(block) - + /// reg·I`, from the cone's [`Cone::kkt_block`]. + pub(crate) fn update_blocks( + &self, + cone: &CompositeCone, + s: &[f64], + z: &[f64], + reg: f64, + out: &mut [Number], + ) { + for ((off, k), zb) in cone.blocks().iter().zip(&self.z_blocks) { + let d = k.dim(); + let block = k.kkt_block(&s[*off..off + d], &z[*off..off + d]); + match (zb, block) { + (ZBlockPos::Diagonal(pos), ConeBlock::Diagonal(vals)) => { + for (i, &p) in pos.iter().enumerate() { + out[p] = -vals[i] - reg; + } + } + ( + ZBlockPos::DiagRank1 { + diag_pos, + u_pos, + aux_pos, + }, + ConeBlock::DiagPlusRank1 { diag, u }, + ) => { + // (z,z) block = −(diag(d) + uuᵀ) − reg, with the rank-1 + // carried by the aux variable ξ: diagonal −dᵢ − reg, the + // coupling (z_i, ξ) = uᵢ, and (ξ, ξ) = +1. Its Schur + // complement is −diag(d) − reg − uuᵀ = −(W²) − reg. + for i in 0..d { + out[diag_pos[i]] = -diag[i] - reg; + out[u_pos[i]] = u[i]; + } + out[*aux_pos] = 1.0; + } + (ZBlockPos::Dense { pos }, ConeBlock::DenseLower { dim: _, lower }) => { + // (z,z) block = −H − reg·I, H = W⊗ₛW dense. Lower triangle + // row-major; reg only on the diagonal (i == j). + let mut idx = 0; + for i in 0..d { + for j in 0..=i { + out[pos[idx]] = -lower[idx] - if i == j { reg } else { 0.0 }; + idx += 1; + } + } + } + _ => unreachable!("cone block shape changed between build and update"), + } + } + } +} + +/// How each cone block enters the `(z,z)` position — diagonal (orthant), +/// diag-plus-rank-1 (SOC), or fully dense (PSD) — probed via `kkt_block` at +/// the cone identity. +fn block_shapes(cone: &CompositeCone) -> Vec { + cone.blocks() + .iter() + .map(|(_, k)| { + let d = k.dim(); + let mut e = vec![0.0; d]; + k.identity(&mut e); + match k.kkt_block(&e, &e) { + ConeBlock::Diagonal(_) => BlockShape::Diagonal, + ConeBlock::DiagPlusRank1 { .. } => BlockShape::DiagRank1, + ConeBlock::DenseLower { .. } => BlockShape::Dense, + } + }) + .collect() +} + +pub(crate) fn inf_norm(v: &[f64]) -> f64 { + v.iter().fold(0.0_f64, |m, &x| m.max(x.abs())) +} + +pub(crate) fn dot(a: &[f64], b: &[f64]) -> f64 { + a.iter().zip(b).map(|(x, y)| x * y).sum() +} + +/// Check the current iterate for a *verified* infeasibility certificate. +/// +/// Returns `Some(PrimalInfeasible | DualInfeasible)` **only** when the +/// certificate's defining (in)equalities hold to `opts.infeas_tol` +/// relative to the certificate's own magnitude. Because the certificate +/// is checked, not assumed, a positive result is a genuine proof and a +/// false positive is impossible; an unverifiable iterate returns `None` +/// and the solve keeps going (ultimately `IterationLimit`). +/// +/// This recovers HSDE's headline benefit — clean infeasible/unbounded +/// status instead of silently exhausting the iteration budget — without +/// the homogeneous embedding's full rewrite of the iteration. When the +/// problem is primal-infeasible the IPM's dual iterate `(y, z)` diverges +/// along a Farkas ray, so its normalization satisfies the primal +/// certificate; when the problem is unbounded the primal iterate `x` +/// diverges along a recession direction satisfying the dual certificate. +/// +/// Certificates (for `min ½xᵀPx + cᵀx s.t. Ax = b, Gx ≤ h`): +/// - **Primal infeasible:** `(y, z ≥ 0)` with `Aᵀy + Gᵀz ≈ 0` and +/// `bᵀy + hᵀz < 0` (Farkas). `z ≥ 0` is maintained by the IPM. +/// - **Dual infeasible / unbounded:** direction `d` (= `x`) with +/// `Pd ≈ 0, Ad ≈ 0, Gd ≤ 0, cᵀd < 0`. +pub(crate) fn detect_infeasibility( + prob: &QpProblem, + x: &[f64], + y: &[f64], + z: &[f64], + opts: &QpOptions, +) -> Option { + // Default dual-cone test: componentwise `zᵢ ≥ −tol`, exact for the + // nonnegative orthant (LP/QP) and the non-symmetric Farkas paths. The + // cone-aware entry point is [`detect_infeasibility_cone`]. + detect_infeasibility_with(prob, x, y, z, opts, |z, tol| z.iter().all(|&zi| zi >= -tol)) +} + +/// Cone-aware variant of [`detect_infeasibility`]: validates the Farkas +/// dual multiplier `z` against the **actual** dual cone `K*` (orthant: `z ≥ +/// 0`; SOC: `z₀ ≥ ‖z₁‖`; PSD: `smat(z) ⪰ 0`). The componentwise default is +/// correct only for the orthant — for SOC/PSD blocks a primal-infeasibility +/// certificate must have its multiplier *in the cone*, not merely +/// componentwise nonnegative, or the "proof" is not a proof. +pub(crate) fn detect_infeasibility_cone( + prob: &QpProblem, + x: &[f64], + y: &[f64], + z: &[f64], + opts: &QpOptions, + cone: &CompositeCone, +) -> Option { + detect_infeasibility_with(prob, x, y, z, opts, |z, tol| cone.in_dual_cone(z, tol)) +} + +fn detect_infeasibility_with( + prob: &QpProblem, + x: &[f64], + y: &[f64], + z: &[f64], + opts: &QpOptions, + dual_cone_ok: impl Fn(&[f64], f64) -> bool, +) -> Option { + let n = prob.n; + let ctol = opts.infeas_tol; + + // --- Primal infeasibility (Farkas certificate) --- + let dual_norm = inf_norm(y).max(inf_norm(z)); + if dual_norm > 0.0 { + let mut resid = vec![0.0; n]; // Aᵀy + Gᵀz + prob.at_mul(y, &mut resid); + prob.gt_mul(z, &mut resid); + let cert = dot(&prob.b, y) + dot(&prob.h, z); // bᵀy + hᵀz + let z_ok = dual_cone_ok(z, ctol * dual_norm); + if cert < -ctol * dual_norm && inf_norm(&resid) <= ctol * dual_norm && z_ok { + return Some(QpStatus::PrimalInfeasible); + } + } + + // --- Dual infeasibility / unboundedness (recession direction d = x) --- + let x_norm = inf_norm(x); + if x_norm > 0.0 { + let mut pd = vec![0.0; n]; + prob.p_mul(x, &mut pd); + let mut ad = vec![0.0; prob.m_eq()]; + prob.a_mul(x, &mut ad); + let mut gd = vec![0.0; prob.m_ineq()]; + prob.g_mul(x, &mut gd); + let cd = dot(&prob.c, x); + let gd_max = gd.iter().fold(0.0_f64, |m, &v| m.max(v)); + if cd < -ctol * x_norm + && inf_norm(&pd) <= ctol * x_norm + && inf_norm(&ad) <= ctol * x_norm + && gd_max <= ctol * x_norm + { + return Some(QpStatus::DualInfeasible); + } + } + + None +} diff --git a/crates/pounce-convex/src/lib.rs b/crates/pounce-convex/src/lib.rs new file mode 100644 index 00000000..673b11b1 --- /dev/null +++ b/crates/pounce-convex/src/lib.rs @@ -0,0 +1,47 @@ +//! `pounce-convex` — interior-point solvers for POUNCE's convex problem +//! classes. +//! +//! Phase 2 of the LP/QP routing plan (see `dev-notes/lp-qp-routing.md`): +//! a bare primal-dual interior-point solver for convex QP (and LP, which +//! is the `P = 0` case), built over a [`cones::Cone`] abstraction with +//! only the nonnegative orthant implemented so that later phases +//! (Mehrotra + HSDE, SOCP, exponential/power cones, SDP) extend rather +//! than rewrite the driver. +//! +//! The augmented-system factorization is shared with the NLP path via +//! [`pounce_linsol::Factorization`]; this crate adds no new linear-solver +//! dependency. +//! +//! Entry points: +//! - [`solve_qp_ipm`] — solve a [`qp::QpProblem`] (covers LP via an empty +//! `P`). + +#![cfg_attr(test, allow(clippy::unwrap_used, clippy::expect_used))] + +pub mod batch; +pub mod cones; +pub(crate) mod debug; +pub(crate) mod equilibrate; +pub mod hsde; +pub mod hsde_nonsym; +pub mod ipm; +pub mod presolve; +pub mod qp; +pub mod sensitivity; +pub mod sos; + +pub use batch::{ + solve_qp_batch, solve_qp_batch_parallel, solve_qp_batch_parallel_warm, solve_qp_multi_rhs, + solve_qp_multi_rhs_parallel, +}; +pub use cones::ConeSpec; +pub use ipm::{ + solve_qp_ipm, solve_qp_ipm_debug, solve_qp_ipm_warm, solve_socp_ipm, solve_socp_ipm_debug, + solve_socp_ipm_warm, QpFactorization, QpOptions, QpWarmStart, +}; +pub use qp::{QpIterate, QpProblem, QpResiduals, QpSolution, QpStatus, Triplet, NEG_INF, POS_INF}; +pub use sensitivity::{QpSensitivity, ReducedHessian, SensError}; +pub use sos::{ + sos_constrained_lower_bound, sos_lower_bound, sos_minimize, PolyProblem, Polynomial, SosBound, + SosSolution, +}; diff --git a/crates/pounce-convex/src/presolve.rs b/crates/pounce-convex/src/presolve.rs new file mode 100644 index 00000000..b54e0339 --- /dev/null +++ b/crates/pounce-convex/src/presolve.rs @@ -0,0 +1,1793 @@ +//! Presolve for convex QP and LP (Phase 3.5). +//! +//! Reduces a [`QpProblem`] before the interior-point solve and maps the +//! reduced solution back to the original problem space, recovering both +//! the primal `x` and the duals `(y, z)`. The contract is correctness of +//! the recovered KKT point: a presolved-then-postsolved solve yields a +//! valid primal–dual solution of the *original* problem (see +//! `tests/presolve_roundtrip.rs` and `tests/presolve_reductions.rs`). +//! +//! This is the architectural seam the dev note calls the "missing +//! piece": a **transaction stack** of [`Reduction`]s, each carrying the +//! data needed to undo itself (primal *and* dual). Postsolve replays the +//! stack in reverse. The catalog is small but the postsolve is complete, +//! so richer reductions can be added without reworking the recovery path. +//! +//! Reductions implemented: +//! - **Empty rows** (equality / inequality with no nonzeros): a +//! feasibility check, then drop. Their dual is zero. Detects trivial +//! primal infeasibility (`0 = b≠0` or `0 ≤ h<0`). +//! - **Fixed-variable elimination** from a singleton equality row +//! (`a·x_k = b ⇒ x_k = b/a`): substitute `x_k` out of `P`, `c`, `A`, +//! `G` (adjusting the objective constant and the row right-hand +//! sides), and recover the fixing row's multiplier from stationarity +//! at the postsolved point. The QP-aware reduction (the Hessian +//! coupling moves into the linear term and the dual must be recovered +//! consistently with `P`). +//! - **Empty/free-column elimination**: a variable absent from `P`, `A`, +//! and `G` is free and unconstrained, so its only objective effect is +//! `c_k x_k`. If `c_k = 0` the variable is irrelevant (set to 0, drop); +//! if `c_k ≠ 0` the problem is unbounded below (detected as +//! [`PresolveOutcome::Unbounded`]). +//! - **Parallel-row removal** (equality / inequality): rows that are +//! **scalar multiples** of one another (after substitution) — exact +//! duplicates being the unit-scale case — are redundant or expose +//! infeasibility. Detection normalizes each row by a canonical pivot and +//! uses rayon-parallel per-row hashing (PaPILO's hashing-based pairing), +//! confirming candidates with a tolerance so a wrong merge is +//! impossible (a quantization split only ever *misses* a pair). +//! Parallel equalities with inconsistent (scaled) right-hand sides ⇒ +//! infeasible; parallel inequalities (positive multiples — same +//! direction) keep the most restrictive row. Dual recovery stays +//! trivial because the *kept* row is an original one in its own frame +//! and every dropped row's multiplier is zero — a valid KKT point. +//! - **Free column singleton substitution**: an unbounded variable, +//! absent from `P` and `G`, that appears in exactly one (multi-entry) +//! equality row is substituted out via `x_col = (b_r − Σ_{j≠col} a_j +//! x_j) / a_col`, eliminating both the variable *and* the row. The +//! substitution shifts cost onto the surviving variables; the consumed +//! row's multiplier is the unique value `y_r = −c_col / a_col`. This is +//! a clean PaPILO reduction (uniquely determined dual), unlike forcing +//! constraints / bound tightening. +//! - **Activity-bound reductions** (need the variable box): for each +//! inequality `g·x ≤ h`, compute the activity range `[min, max]` over +//! the box. If `max ≤ h` the row is always satisfied → **redundant**, +//! drop it (dual 0). If `min > h` the row can never hold → +//! **infeasible**. For each equality `a·x = b`, infeasible when `b` +//! lies outside `[min, max]`. +//! - **Dominated columns**: a variable absent from `P` and the equalities +//! that appears in inequalities `Gx ≤ h` with sign-definite coefficients +//! matching its cost sign is optimal at a bound (pushing it there raises +//! neither the objective nor any row's activity), so it is fixed and +//! dropped. Its bound multiplier is its reduced cost `c_k + Σᵢ aᵢₖ zᵢ`, +//! which the sign conditions make nonnegative — a valid dual by +//! construction. (PaPILO's dominated-column reduction, restricted to the +//! clean sign-guaranteed case.) +//! - **Forcing constraints**: when a row's activity range *touches* its +//! right-hand side it can hold only at one vertex of the box, pinning +//! every involved variable to a bound (inequality `g·x ≤ h` with +//! `min = h`; equality `a·x = b` with `min = b` or `max = b`). The row +//! is dropped and each variable fixed. The dual recovery — the reason +//! this was the hard PaPILO postsolve — is exact: the forcing row's +//! multiplier is the tightest value making every pinned variable's bound +//! multiplier correctly signed (`max`/`min` over `−gradⱼ/coefⱼ`, clamped +//! `≥ 0` for inequalities), and each pinned variable's bound multiplier +//! is then its full reduced cost. The multiplier is generally *not +//! unique* (it ranges over an interval), so postsolve emits a valid +//! representative; correctness is checked as KKT validity, not dual +//! equality (`tests/presolve_forcing.rs`). Forcing rows are required to +//! have disjoint column sets so the recovery stays independent. +//! +//! # Relationship to PaPILO +//! +//! [PaPILO](https://github.com/scipopt/papilo) (Gleixner, Gottwald & +//! Hoen; the presolving library SCIP uses) is the reference architecture +//! for this module. It is C++ and Apache-2.0, so POUNCE does **not** wrap +//! it — that would break the pure-Rust guarantee — but ports its ideas: +//! +//! - the **transaction/reduction-stack** model with reversible postsolve +//! (the [`Reduction`] enum + `stack` + [`Presolve::postsolve`]); +//! - **hashing-based pairing** for duplicate detection, parallelized +//! (PaPILO uses Intel TBB; we use rayon). +//! +//! PaPILO is the catalog to mine for the next reductions — singleton / +//! doubleton rows, dominated columns, coefficient strengthening, probing +//! — and, importantly, for each one's *postsolve transform*, since the +//! dual recovery is the hard part. +//! +//! Implemented from that catalog so far: the transaction stack, fixed / +//! free / free-singleton columns, empty + duplicate rows, activity-based +//! redundancy/feasibility, and **forcing constraints** (above) — which +//! capture the dual-safe slice of activity/bound reasoning, since a +//! forcing row is exactly a model-changing bound deduction whose dual +//! re-attributes to the source row. +//! +//! - **Bound tightening** (domain propagation): each live row implies +//! bounds on its variables (`a_k x_k ≤ h − amin_{−k}`, etc.); where one +//! is strictly tighter than the declared box, the box is shrunk in the +//! reduced problem (the variable is *kept*). The subtle dual — when a +//! tightened bound is active at the optimum while the original bound is +//! slack, its multiplier is not a real bound multiplier but belongs to +//! the row that implied it — is handled in postsolve by **global bound +//! recovery**: every row multiplier is recovered first (re-attributing +//! each active tightened bound to its source row), then every variable's +//! bound multipliers are read off the complete reduced cost by +//! complementarity. To keep the re-attributions independent, tightening +//! sources are restricted to column-disjoint rows untouched by other +//! reductions (the same conservative rule as forcing). A single pass +//! (not iterated to a fixpoint), validated by randomized KKT roundtrips +//! (`tests/presolve_bound_tightening.rs`). +//! +//! The full deferred catalog — forcing constraints, parallel rows, +//! dominated columns, and bound tightening — is implemented, each with a +//! dual recovery proven correct (and KKT-validated in tests). +//! +//! [`presolve`] iterates the single-pass catalog ([`presolve_once`]) to a +//! **fixpoint**, so deductions cascade across rounds (a fixing exposes a +//! new singleton; a tightened bound makes a row forcing). Because each pass +//! is a correct solution-space transform, the iterate is their composition +//! and reuses every pass's proven dual recovery — no new dual math. +//! +//! This is also how the disjoint-source restriction on forcing / tightening +//! is *lifted*. Within one round, overlapping forcing / tightening sources +//! must stay column-disjoint so their dual re-attributions don't couple. +//! But the fixpoint resolves the overlap across rounds: a source claims its +//! columns only when it actually fires, so the round after it reaches its +//! own fixpoint it stops blocking its neighbours, which then fire — and the +//! *composed* postsolve recovers the shared variable's bound multiplier +//! with **both** rows' contributions present (each layer's global bound +//! recovery sees the inner layers' row multipliers mapped through). The +//! effect is a coupled re-attribution, achieved by composition rather than +//! a within-round coupled solve, and validated by randomized KKT roundtrips +//! over *overlapping* constraint chains +//! (`tests/presolve_bound_tightening.rs`). + +use crate::cones::ConeSpec; +use crate::qp::{QpProblem, QpSolution, QpStatus, Triplet, BOUND_INF}; +use rayon::prelude::*; +use std::collections::hash_map::DefaultHasher; +use std::collections::HashMap; +use std::hash::{Hash, Hasher}; + +/// Outcome of presolve. +// `Reduced` carries the full reduced problem and is by far the common case; +// boxing it to shrink the two rare unit variants would just add an +// allocation + deref on the hot path and ripple through every caller's match. +#[allow(clippy::large_enum_variant)] +pub enum PresolveOutcome { + /// Problem reduced; solve `reduced`, then call [`Presolve::postsolve`]. + Reduced(Presolve), + /// Presolve proved the problem primal-infeasible (e.g. an empty row + /// `0 = b` with `b ≠ 0`, contradictory fixed bounds, or duplicate + /// equality rows with different right-hand sides). + Infeasible, + /// Presolve proved the problem unbounded below (a free column with a + /// nonzero objective coefficient). + Unbounded, +} + +/// A reversible presolve transaction. Each variant stores exactly what +/// postsolve needs to reconstruct the eliminated primal and dual. +/// +/// Dropped *rows* (empty rows, duplicate rows) need no stack entry: they +/// are simply absent from the kept-row maps, so postsolve leaves their +/// dual at the zero initialization, which is the correct multiplier. +enum Reduction { + /// Variable `col` was fixed to `value` by the singleton equality row + /// `eq_row` (coefficient `a_coef`). Postsolve restores `x[col] = + /// value` and computes the row's multiplier from stationarity. + FixedVar { + col: usize, + value: f64, + eq_row: usize, + a_coef: f64, + }, + /// A column absent from `P`, `A`, `G` (linear-only) was fixed at + /// `value` — its optimal box position given the sign of `c_col` — + /// and dropped. Its reduced cost equals `c_col` (carried by the + /// active variable bound). + FreeColumnFixed { col: usize, value: f64 }, + /// A *free column singleton*: variable `col` is unbounded, absent + /// from `P` and `G`, and appears in exactly one equality row + /// `eq_row` (coefficient `a_coef`). It is substituted out via + /// `x_col = (b_r − Σ_{j≠col} a_j x_j) / a_coef`, consuming the row. + /// Postsolve recovers `x_col` from that expression and sets the + /// consumed row's multiplier to the unique value `y_r = −c_col / a_coef`. + FreeColSingleton { + col: usize, + eq_row: usize, + a_coef: f64, + /// `c_col`, used to recover `y_eq_row = −c_col / a_coef`. + c_col: f64, + }, + /// A **forcing constraint**: a row whose activity range touches its + /// right-hand side, so the row can only hold at one vertex of the box, + /// pinning every involved variable to a bound. The row is dropped and + /// each variable fixed; postsolve recovers the row's multiplier and the + /// pinned variables' bound multipliers (see [`Presolve::postsolve`]). + ForcingRow { + /// Original row index. + row: usize, + /// Equality row? (else inequality.) + is_equality: bool, + /// The forced-to vertex is the *max*-activity one (only possible + /// for equalities); else the min-activity vertex. + at_max: bool, + /// Each pinned variable: `(col, coef, value, at_upper)`. + cols: Vec<(usize, f64, f64, bool)>, + }, + /// A **dominated column**: a variable absent from `P` and the + /// equalities, appearing in inequalities `Gx ≤ h` with sign-definite + /// coefficients that match the sign of its cost, so pushing it to one + /// bound never hurts the objective *or* feasibility — it is optimal + /// there. Fixed and dropped; its bound multiplier is its reduced cost, + /// which the sign conditions make valid by construction (recovered in + /// the global bound pass from where the variable lands). + DominatedColumn { col: usize, value: f64 }, + /// A **tightened bound**: row `row` implies a bound on `col` strictly + /// inside its declared box, so the box is shrunk in the reduced problem + /// (the variable is *kept*, not removed). Postsolve handles the dual: + /// if the tightened bound is active at the optimum while the original + /// bound is slack, its multiplier is re-attributed to the source row + /// (the multiplier on a non-real bound belongs to the constraint that + /// implied it). See [`Presolve::postsolve`]'s global bound recovery. + BoundTightening { + col: usize, + row: usize, + is_equality: bool, + /// Source-row coefficient `a_{row,col}`. + coef: f64, + /// Tightened the upper bound? (else lower.) + is_upper: bool, + }, +} + +/// Captured presolve state: the reduced problem plus the transaction +/// stack and the index maps needed to expand a reduced solution back to +/// the original space. +pub struct Presolve { + /// The reduced problem to hand to the solver. + pub reduced: QpProblem, + /// Constant added to the objective by variable substitutions; the + /// reduced objective plus this equals the original objective. + pub obj_offset: f64, + /// Original problem dimensions. + orig_n: usize, + orig_m_eq: usize, + orig_m_ineq: usize, + /// `kept_cols[reduced_col] = orig_col`. + kept_cols: Vec, + /// `kept_eq[reduced_eq_row] = orig_eq_row`. + kept_eq: Vec, + /// `kept_ineq[reduced_ineq_row] = orig_ineq_row`. + kept_ineq: Vec, + /// Original problem data, retained for fixing-row dual recovery. + orig: QpProblem, + stack: Vec, + /// For an *iterated* presolve, the ordered single-pass layers + /// (`L0, L1, …`) whose composition this object represents; empty for a + /// single pass. `reduced` is then the final layer's reduced problem and + /// `postsolve` folds the layers in reverse. The single-pass fields + /// above are unused in that case. + chain: Vec, +} + +/// Coefficients are treated as nonzero unless exactly 0.0. +const ZERO_TOL: f64 = 0.0; +/// Slack allowed when checking a fixed value against its variable box. +const BOUND_FEAS_TOL: f64 = 1e-9; +/// Slack allowed in activity-bound comparisons (redundancy / feasibility). +const ACTIVITY_TOL: f64 = 1e-9; +/// How close `x_i` must be to a box bound to count it *active* when +/// recovering bound multipliers. Looser than [`BOUND_FEAS_TOL`] because an +/// interior-point solve only drives a variable to within ~1e-8 of a bound, +/// not to machine zero; interior variables sit far further away. +const ACTIVE_BOUND_TOL: f64 = 1e-6; + +/// Group nonzero entries by row index: `out[row] = [(col, val), …]`. +fn group_by_row(triplets: &[Triplet], m: usize) -> Vec> { + let mut out = vec![Vec::new(); m]; + for t in triplets { + if t.val != ZERO_TOL { + out[t.row].push((t.col, t.val)); + } + } + out +} + +/// Minimum and maximum of `Σ a_j x_j` over the variable box, given each +/// variable's effective lower/upper bound. An infinite contribution +/// makes the corresponding extreme `±∞`. +fn activity(row: &[(usize, f64)], lb: &L, ub: &U) -> (f64, f64) +where + L: Fn(usize) -> f64, + U: Fn(usize) -> f64, +{ + let mut amin = 0.0; + let mut amax = 0.0; + for &(c, a) in row { + let (lo, hi) = (lb(c), ub(c)); + if a > 0.0 { + amin += a * lo; // a>0: min at lower bound + amax += a * hi; + } else { + amin += a * hi; // a<0: min at upper bound + amax += a * lo; + } + } + (amin, amax) +} + +/// A single constraint row in the reduced column space, tagged with its +/// original row index. Used for duplicate detection and final assembly. +struct Row { + /// `(reduced_col, value)` pairs, sorted by column, duplicates merged. + coeffs: Vec<(usize, f64)>, + rhs: f64, + orig: usize, +} + +/// Run presolve on `prob`, iterating the reduction passes to a **fixpoint** +/// so deductions cascade (a fixing exposes a new singleton, a tightened +/// bound makes a row forcing, …). Each pass is a correct solution-space +/// transform, so the iterate is the composition of the per-pass transforms +/// — postsolve folds them back in reverse — and inherits each pass's proven +/// dual recovery with no new dual math. +pub fn presolve(prob: &QpProblem) -> PresolveOutcome { + // Cap rounds defensively; in practice it converges in a few. + const MAX_ROUNDS: usize = 32; + let mut chain: Vec = Vec::new(); + let mut current = prob.clone(); + loop { + match presolve_once(¤t, &[]) { + PresolveOutcome::Infeasible => return PresolveOutcome::Infeasible, + PresolveOutcome::Unbounded => return PresolveOutcome::Unbounded, + PresolveOutcome::Reduced(ps) => { + if !ps.changed() { + // Fixpoint: this round did nothing. + if chain.is_empty() { + return PresolveOutcome::Reduced(ps); // plain single pass + } + break; + } + current = ps.reduced.clone(); + chain.push(ps); + if chain.len() >= MAX_ROUNDS { + break; + } + } + } + } + if chain.len() == 1 { + return PresolveOutcome::Reduced(chain.pop().unwrap()); + } + let reduced = chain.last().expect("chain non-empty").reduced.clone(); + PresolveOutcome::Reduced(Presolve { + reduced, + obj_offset: 0.0, + orig_n: prob.n, + orig_m_eq: prob.m_eq(), + orig_m_ineq: prob.m_ineq(), + kept_cols: Vec::new(), + kept_eq: Vec::new(), + kept_ineq: Vec::new(), + orig: prob.clone(), + stack: Vec::new(), + chain, + }) +} + +/// Cone-aware presolve for a problem whose inequality block is partitioned +/// by `cones`. Applies only the cone-safe reductions (equality singletons, +/// free columns / free-column singletons, fixed-variable substitution; and +/// the orthant `≤`-row reductions on the *nonnegative* blocks), leaving +/// second-order-cone rows and the columns coupled to them untouched. A +/// **single pass** (the fixpoint loop is orthant-only), so the reduced cone +/// partition is recoverable from the kept rows — see +/// [`Presolve::reduced_cones`]. +pub fn presolve_conic(prob: &QpProblem, cones: &[ConeSpec]) -> PresolveOutcome { + // SOC rows are the inequality rows belonging to a non-`Nonneg` block. + let mut soc_row = vec![false; prob.m_ineq()]; + let mut row = 0; + for spec in cones { + let d = spec.dim(); + if matches!(spec, ConeSpec::SecondOrder(_)) { + for r in row..row + d { + if r < soc_row.len() { + soc_row[r] = true; + } + } + } + row += d; + } + presolve_once(prob, &soc_row) +} + +/// A single presolve pass (the reduction catalog applied once). [`presolve`] +/// iterates this to a fixpoint. +/// +/// `soc_row` (length `m_ineq`, or empty for the all-orthant QP path) marks +/// inequality rows that belong to a *non-orthant* cone (e.g. a second-order +/// cone). Such rows are coupled, so the `≤`-row reductions (empty-row, +/// activity, forcing, bound-tightening, parallel/duplicate) must not touch +/// them, and columns appearing in them are not eligible for the dominated- +/// column reduction. The cone-safe reductions (equality singletons, free +/// columns, free-column singletons, fixed-variable substitution) apply +/// regardless. Marked rows are never dropped, so the conic partition is +/// recoverable from the kept rows. +fn presolve_once(prob: &QpProblem, soc_row: &[bool]) -> PresolveOutcome { + let n = prob.n; + let m_eq = prob.m_eq(); + let m_ineq = prob.m_ineq(); + let is_soc_row = |i: usize| soc_row.get(i).copied().unwrap_or(false); + // A column is conic-coupled if it appears in any SOC inequality row. + let mut soc_col = vec![false; n]; + if !soc_row.is_empty() { + for t in &prob.g { + if is_soc_row(t.row) && t.val != ZERO_TOL { + soc_col[t.col] = true; + } + } + } + + let mut stack: Vec = Vec::new(); + + // --- per-row / per-column nonzero structure --- + let mut eq_nnz = vec![0usize; m_eq]; + let mut eq_single: Vec> = vec![None; m_eq]; + // Finer per-column appearance counts: total (`col_nnz`), and split + // by where the variable appears, so we can recognize a free *column + // singleton* (a variable in exactly one equality row, nowhere else). + let mut col_nnz = vec![0usize; n]; + let mut a_col_count = vec![0usize; n]; + let mut g_col_count = vec![0usize; n]; + let mut p_col_present = vec![false; n]; + // For a column singleton: which equality row holds it, with coef. + let mut col_eq_single: Vec> = vec![None; n]; + for t in &prob.a { + if t.val != ZERO_TOL { + eq_nnz[t.row] += 1; + eq_single[t.row] = Some((t.col, t.val)); + col_nnz[t.col] += 1; + a_col_count[t.col] += 1; + col_eq_single[t.col] = Some((t.row, t.val)); + } + } + let mut ineq_nnz = vec![0usize; m_ineq]; + for t in &prob.g { + if t.val != ZERO_TOL { + ineq_nnz[t.row] += 1; + col_nnz[t.col] += 1; + g_col_count[t.col] += 1; + } + } + for t in &prob.p_lower { + if t.val != ZERO_TOL { + col_nnz[t.row] += 1; + p_col_present[t.row] = true; + if t.row != t.col { + col_nnz[t.col] += 1; + p_col_present[t.col] = true; + } + } + } + + // --- empty equality rows + singleton-equality fixings --- + let mut fixed: Vec> = vec![None; n]; + let mut eq_dropped = vec![false; m_eq]; + for row in 0..m_eq { + match eq_nnz[row] { + 0 => { + if prob.b[row] != 0.0 { + return PresolveOutcome::Infeasible; + } + eq_dropped[row] = true; + } + 1 => { + let (col, a) = eq_single[row].expect("singleton has an entry"); + if fixed[col].is_none() { + let value = prob.b[row] / a; + // The fixed value must satisfy the variable's box. + if value < prob.lb_of(col) - BOUND_FEAS_TOL + || value > prob.ub_of(col) + BOUND_FEAS_TOL + { + return PresolveOutcome::Infeasible; + } + fixed[col] = Some(value); + eq_dropped[row] = true; + stack.push(Reduction::FixedVar { + col, + value, + eq_row: row, + a_coef: a, + }); + } + } + _ => {} + } + } + + // --- free column singletons --- + // A free variable (unbounded both ways), absent from P and G, that + // appears in exactly one equality row whose row has ≥ 2 nonzeros, is + // substituted out: `x_col = (b_r − Σ_{j≠col} a_j x_j) / a_col`. This + // consumes both the variable and the row. The substitution shifts the + // cost of the row's other variables (`c_adjust`) and a constant into + // the objective offset; the consumed row's dual is the unique value + // `−c_col / a_col`, recovered in postsolve. + let mut substituted = vec![false; n]; + let mut c_adjust = vec![0.0; n]; + let mut subst_offset = 0.0; + for col in 0..n { + if fixed[col].is_some() || substituted[col] { + continue; + } + let free = prob.lb_of(col) <= -BOUND_INF && prob.ub_of(col) >= BOUND_INF; + let only_in_one_eq = a_col_count[col] == 1 && g_col_count[col] == 0 && !p_col_present[col]; + if !(free && only_in_one_eq) { + continue; + } + let (row, a_col) = col_eq_single[col].expect("column singleton entry"); + // The row must still be live and non-trivial (≥ 2 vars: a plain + // singleton row was already turned into a FixedVar above). + if eq_dropped[row] || eq_nnz[row] < 2 { + continue; + } + // Substitute: c_col·x_col = (c_col·b_r/a_col) − Σ_{j≠col} + // (c_col·a_jr/a_col)·x_j. + let c_col = prob.c[col]; + subst_offset += c_col * prob.b[row] / a_col; + for t in &prob.a { + if t.row == row && t.col != col && t.val != ZERO_TOL { + c_adjust[t.col] -= c_col * t.val / a_col; + } + } + substituted[col] = true; + eq_dropped[row] = true; + stack.push(Reduction::FreeColSingleton { + col, + eq_row: row, + a_coef: a_col, + c_col, + }); + } + + // --- empty inequality rows --- + // (SOC rows are coupled — an "empty" SOC row is part of a cone block and + // must be kept; skip.) + let mut ineq_dropped = vec![false; m_ineq]; + for row in 0..m_ineq { + if !is_soc_row(row) && ineq_nnz[row] == 0 { + if prob.h[row] < 0.0 { + return PresolveOutcome::Infeasible; + } + ineq_dropped[row] = true; + } + } + + // --- activity-bound reductions (need the variable box) --- + // Effective bounds: a fixed variable contributes its exact value; + // others contribute their declared box (±∞ when absent). + let eff_lb = |c: usize| fixed[c].unwrap_or_else(|| prob.lb_of(c)); + let eff_ub = |c: usize| fixed[c].unwrap_or_else(|| prob.ub_of(c)); + + // Group nonzeros by row once, reused for inequalities and equalities. + let g_by_row = group_by_row(&prob.g, m_ineq); + let a_by_row = group_by_row(&prob.a, m_eq); + + // Inequality `g·x ≤ h`: + // max-activity ≤ h ⇒ redundant (always satisfied) → drop; + // min-activity > h ⇒ infeasible. + for row in 0..m_ineq { + if ineq_dropped[row] || is_soc_row(row) { + continue; + } + let (amin, amax) = activity(&g_by_row[row], &eff_lb, &eff_ub); + if amin > prob.h[row] + ACTIVITY_TOL { + return PresolveOutcome::Infeasible; + } + if amax <= prob.h[row] + ACTIVITY_TOL { + ineq_dropped[row] = true; + } + } + + // Equality `a·x = b`: feasible only if `b` lies in the activity + // range `[min, max]`. Out of range ⇒ infeasible. (A redundant + // equality whose range is the single point `b` is left in place; its + // dual is genuine, unlike a dropped inequality's zero multiplier.) + for row in 0..m_eq { + if eq_dropped[row] { + continue; + } + let (amin, amax) = activity(&a_by_row[row], &eff_lb, &eff_ub); + if prob.b[row] < amin - ACTIVITY_TOL || prob.b[row] > amax + ACTIVITY_TOL { + return PresolveOutcome::Infeasible; + } + } + + // --- forcing constraints --- + // A row whose activity range touches its RHS can hold only at one + // vertex of the box, pinning every involved variable to a bound: + // inequality g·x ≤ h with min-activity == h ⇒ pin to the min vertex; + // equality a·x = b with min-activity == b ⇒ pin to the min vertex; + // equality a·x = b with max-activity == b ⇒ pin to the max vertex. + // Each pinned variable becomes fixed (substituted out like any fixed + // var); the row is dropped. Dual recovery (the reason this is subtle) + // is handled in postsolve. We require each forcing row's columns to be + // disjoint from every other forcing row's, so the multiplier recovery + // stays independent (a conservative but always-correct restriction). + let eff_lb_at = |fixed: &[Option], c: usize| fixed[c].unwrap_or_else(|| prob.lb_of(c)); + let eff_ub_at = |fixed: &[Option], c: usize| fixed[c].unwrap_or_else(|| prob.ub_of(c)); + let mut forced_touched = vec![false; n]; + + // Pin the variables of one forcing row to `at_max` vertex (or the min + // vertex when `at_max` is false), recording the reduction. Returns + // false (skipped) if any column is already fixed/substituted/forced. + // `row_entries` is the row's `(col, coef)` list, all coefficients nonzero. + let try_force = |row_entries: &[(usize, f64)], + orig_row: usize, + is_equality: bool, + at_max: bool, + fixed: &mut [Option], + forced_touched: &mut [bool], + stack: &mut Vec| + -> bool { + // Every involved column must be free to fix and not shared with + // another forcing row. + for &(c, _) in row_entries { + if fixed[c].is_some() || substituted[c] || forced_touched[c] { + return false; + } + } + let mut cols = Vec::with_capacity(row_entries.len()); + for &(c, coef) in row_entries { + // Vertex bound: min-activity puts coef>0 at lb, coef<0 at + // ub; max-activity is the mirror. + let at_upper = if at_max { coef > 0.0 } else { coef < 0.0 }; + let value = if at_upper { + prob.ub_of(c) + } else { + prob.lb_of(c) + }; + // A forcing vertex requires finite bounds; guard anyway. + if !value.is_finite() || value.abs() >= BOUND_INF { + return false; + } + cols.push((c, coef, value, at_upper)); + } + for &(c, _, value, _) in &cols { + fixed[c] = Some(value); + forced_touched[c] = true; + } + stack.push(Reduction::ForcingRow { + row: orig_row, + is_equality, + at_max, + cols, + }); + true + }; + + for row in 0..m_ineq { + if ineq_dropped[row] || is_soc_row(row) || g_by_row[row].is_empty() { + continue; + } + let (amin, _) = activity(&g_by_row[row], &|c| eff_lb_at(&fixed, c), &|c| { + eff_ub_at(&fixed, c) + }); + if amin.is_finite() + && (prob.h[row] - amin).abs() <= ACTIVITY_TOL + && try_force( + &g_by_row[row], + row, + false, + false, + &mut fixed, + &mut forced_touched, + &mut stack, + ) + { + ineq_dropped[row] = true; + } + } + + for row in 0..m_eq { + if eq_dropped[row] || a_by_row[row].len() < 2 { + continue; + } + let (amin, amax) = activity(&a_by_row[row], &|c| eff_lb_at(&fixed, c), &|c| { + eff_ub_at(&fixed, c) + }); + let b = prob.b[row]; + let at_max = if amin.is_finite() && (b - amin).abs() <= ACTIVITY_TOL { + Some(false) + } else if amax.is_finite() && (amax - b).abs() <= ACTIVITY_TOL { + Some(true) + } else { + None + }; + if let Some(at_max) = at_max { + if try_force( + &a_by_row[row], + row, + true, + at_max, + &mut fixed, + &mut forced_touched, + &mut stack, + ) { + eq_dropped[row] = true; + } + } + } + + // --- dominated columns --- + // A variable absent from P and the equalities, present only in + // inequalities `Gx ≤ h`, whose live G-coefficients are sign-definite in + // a way that matches its cost sign, is optimal at a bound: pushing it + // there never raises the objective nor tightens a `≤` row, so an + // optimal solution with it at that bound always exists. Fix and drop + // it. Its bound multiplier is its reduced cost `c_k + Σᵢ aᵢₖ zᵢ`, which + // the sign conditions (`aᵢₖ ≥ 0, c_k ≥ 0` for the lower bound; mirror + // for the upper) make nonnegative — so the recovered dual is valid by + // construction. This is PaPILO's dominated-column reduction, restricted + // to the case with a clean, sign-guaranteed dual. + { + // Per-column G-coefficient sign summary over *live* inequality rows. + let mut g_all_nonneg = vec![true; n]; + let mut g_all_nonpos = vec![true; n]; + for t in &prob.g { + if t.val == ZERO_TOL || ineq_dropped[t.row] { + continue; + } + if t.val < 0.0 { + g_all_nonneg[t.col] = false; + } else if t.val > 0.0 { + g_all_nonpos[t.col] = false; + } + } + for col in 0..n { + if fixed[col].is_some() + || substituted[col] + || p_col_present[col] + || a_col_count[col] != 0 + || g_col_count[col] == 0 + || soc_col[col] + { + continue; + } + let c_k = prob.c[col]; + let lb = prob.lb_of(col); + let ub = prob.ub_of(col); + if g_all_nonneg[col] && c_k >= 0.0 && lb > -BOUND_INF { + fixed[col] = Some(lb); + stack.push(Reduction::DominatedColumn { col, value: lb }); + } else if g_all_nonpos[col] && c_k <= 0.0 && ub < BOUND_INF { + fixed[col] = Some(ub); + stack.push(Reduction::DominatedColumn { col, value: ub }); + } + } + } + + // --- bound tightening (domain propagation, single pass) --- + // From each live row, derive implied bounds on its variables and shrink + // the box where strictly tighter. The variable is *kept* (only its box + // changes); the subtle dual — re-attributing an active tightened + // bound's multiplier to the source row — is handled by postsolve's + // global bound recovery. A single pass (not iterated to a fixpoint), + // so it tightens but does not cascade into further reductions here. + let mut tlb: Vec = (0..n).map(|c| prob.lb_of(c)).collect(); + let mut tub: Vec = (0..n).map(|c| prob.ub_of(c)).collect(); + for c in 0..n { + if let Some(v) = fixed[c] { + tlb[c] = v; + tub[c] = v; + } + } + // Source row (and its coef / kind) of each variable's tightened bound. + let mut ub_src: Vec> = vec![None; n]; + let mut lb_src: Vec> = vec![None; n]; + + // Re-attributing an active tightened bound's multiplier to its source + // row is only *independent* when source rows share no columns (and + // touch no already-reduced column); otherwise the re-attributions + // couple. So a row may serve as a tightening source only if all its + // columns are kept (not fixed/substituted) and disjoint from every + // other accepted source row — a conservative but always-correct + // restriction, exactly like forcing's disjoint-column rule. + let reduction_touched: Vec = (0..n) + .map(|c| fixed[c].is_some() || substituted[c]) + .collect(); + let mut bt_col_used = vec![false; n]; + let row_is_clean = |entries: &[(usize, f64)], used: &[bool]| { + entries + .iter() + .all(|&(c, _)| !reduction_touched[c] && !used[c]) + }; + + // Tighten variable boxes from one row whose activity lies in `[lo, hi]` + // (inequality `≤ h`: `lo = −∞, hi = h`; equality: `lo = hi = b`). + // `None` ⇒ a detected empty domain (infeasible); `Some(k)` ⇒ `k` bounds + // were tightened. + let tighten_from_row = |entries: &[(usize, f64)], + lo: f64, + hi: f64, + row_idx: usize, + is_eq: bool, + tlb: &mut [f64], + tub: &mut [f64], + ub_src: &mut [Option<(usize, f64, bool)>], + lb_src: &mut [Option<(usize, f64, bool)>]| + -> Option { + let (amin, amax) = activity(entries, &|c| tlb[c], &|c| tub[c]); + // Compute all implied bounds against the row-start state, then + // apply (so within-row order doesn't matter). + let mut updates: Vec<(usize, bool, f64, f64)> = Vec::new(); // (col,is_upper,val,coef) + for &(k, a) in entries { + if fixed[k].is_some() || a == 0.0 { + continue; + } + let contrib_min = if a > 0.0 { a * tlb[k] } else { a * tub[k] }; + let contrib_max = if a > 0.0 { a * tub[k] } else { a * tlb[k] }; + let amin_mk = amin - contrib_min; + let amax_mk = amax - contrib_max; + if hi.is_finite() { + let val = (hi - amin_mk) / a; + if val.is_finite() { + if a > 0.0 { + if val < tub[k] - BOUND_FEAS_TOL { + updates.push((k, true, val, a)); + } + } else if val > tlb[k] + BOUND_FEAS_TOL { + updates.push((k, false, val, a)); + } + } + } + if lo.is_finite() { + let val = (lo - amax_mk) / a; + if val.is_finite() { + if a > 0.0 { + if val > tlb[k] + BOUND_FEAS_TOL { + updates.push((k, false, val, a)); + } + } else if val < tub[k] - BOUND_FEAS_TOL { + updates.push((k, true, val, a)); + } + } + } + } + let mut tightened = 0usize; + for (k, is_upper, val, a) in updates { + if is_upper { + if val < tub[k] - BOUND_FEAS_TOL { + tub[k] = val; + ub_src[k] = Some((row_idx, a, is_eq)); + tightened += 1; + } + } else if val > tlb[k] + BOUND_FEAS_TOL { + tlb[k] = val; + lb_src[k] = Some((row_idx, a, is_eq)); + tightened += 1; + } + if tlb[k] > tub[k] + BOUND_FEAS_TOL { + return None; + } + } + Some(tightened) + }; + + // A source row claims its columns (blocking overlapping sources, so the + // re-attributions stay independent) only when it *actually* tightens — + // a clean row that tightens nothing must not block its neighbours, or a + // pair of overlapping rows where only one is useful would deadlock + // across fixpoint rounds. With this, the fixpoint progressively fires + // overlapping tightenings (each round the previous round's sources are + // at their fixpoint and no longer claim columns). + for row in 0..m_ineq { + if ineq_dropped[row] + || is_soc_row(row) + || g_by_row[row].is_empty() + || !row_is_clean(&g_by_row[row], &bt_col_used) + { + continue; + } + match tighten_from_row( + &g_by_row[row], + f64::NEG_INFINITY, + prob.h[row], + row, + false, + &mut tlb, + &mut tub, + &mut ub_src, + &mut lb_src, + ) { + None => return PresolveOutcome::Infeasible, + Some(0) => {} + Some(_) => { + for &(c, _) in &g_by_row[row] { + bt_col_used[c] = true; + } + } + } + } + for row in 0..m_eq { + if eq_dropped[row] + || a_by_row[row].is_empty() + || !row_is_clean(&a_by_row[row], &bt_col_used) + { + continue; + } + let b = prob.b[row]; + match tighten_from_row( + &a_by_row[row], + b, + b, + row, + true, + &mut tlb, + &mut tub, + &mut ub_src, + &mut lb_src, + ) { + None => return PresolveOutcome::Infeasible, + Some(0) => {} + Some(_) => { + for &(c, _) in &a_by_row[row] { + bt_col_used[c] = true; + } + } + } + } + + // Record a reduction for each variable whose box was strictly tightened. + for k in 0..n { + if fixed[k].is_some() { + continue; + } + if tub[k] < prob.ub_of(k) - BOUND_FEAS_TOL { + if let Some((row, coef, is_eq)) = ub_src[k] { + stack.push(Reduction::BoundTightening { + col: k, + row, + is_equality: is_eq, + coef, + is_upper: true, + }); + } + } + if tlb[k] > prob.lb_of(k) + BOUND_FEAS_TOL { + if let Some((row, coef, is_eq)) = lb_src[k] { + stack.push(Reduction::BoundTightening { + col: k, + row, + is_equality: is_eq, + coef, + is_upper: false, + }); + } + } + } + + // --- free / linear-only columns --- + // A column absent from P, A, G contributes only `c_k x_k`, so its + // optimum is at a bound dictated by the sign of c_k: + // c_k > 0 → minimize by pushing to lb (unbounded if lb = −∞) + // c_k < 0 → push to ub (unbounded if ub = +∞) + // c_k = 0 → irrelevant; pin to lb if finite else ub if finite else 0 + let mut dropped_col = vec![false; n]; + for c in 0..n { + if fixed[c].is_some() || substituted[c] { + dropped_col[c] = true; // fixed / substituted columns are removed + continue; + } + if col_nnz[c] == 0 { + let (lb, ub) = (prob.lb_of(c), prob.ub_of(c)); + let value = if prob.c[c] > 0.0 { + if lb <= -BOUND_INF { + return PresolveOutcome::Unbounded; + } + lb + } else if prob.c[c] < 0.0 { + if ub >= BOUND_INF { + return PresolveOutcome::Unbounded; + } + ub + } else if lb > -BOUND_INF { + lb + } else if ub < BOUND_INF { + ub + } else { + 0.0 + }; + dropped_col[c] = true; + stack.push(Reduction::FreeColumnFixed { col: c, value }); + } + } + + // --- column map over surviving columns --- + let mut kept_cols = Vec::new(); + let mut col_new = vec![usize::MAX; n]; + for c in 0..n { + if !dropped_col[c] { + col_new[c] = kept_cols.len(); + kept_cols.push(c); + } + } + let fixval = |c: usize| fixed[c].unwrap_or(0.0); + + // --- objective: P, c, offset with fixed vars substituted --- + // Surviving variables' linear cost is their original `c` plus any + // cost shifted onto them by a free-column-singleton substitution. + let mut new_c = vec![0.0; kept_cols.len()]; + for (newc, &oldc) in kept_cols.iter().enumerate() { + new_c[newc] = prob.c[oldc] + c_adjust[oldc]; + } + let mut offset = subst_offset; + for (c, &fixed_c) in fixed.iter().enumerate() { + if let Some(v) = fixed_c { + offset += prob.c[c] * v; + } + } + // Free/linear-only columns fixed to a bound contribute `c_k · value`. + for r in &stack { + if let Reduction::FreeColumnFixed { col, value } = r { + offset += prob.c[*col] * value; + } + } + let mut new_p: Vec = Vec::new(); + for t in &prob.p_lower { + let (i, j, v) = (t.row, t.col, t.val); + match (fixed[i].is_some(), fixed[j].is_some()) { + (false, false) => new_p.push(Triplet::new(col_new[i], col_new[j], v)), + (true, true) => { + // both fixed → constant. Off-diagonal counts twice. + if i == j { + offset += 0.5 * v * fixval(i) * fixval(j); + } else { + offset += v * fixval(i) * fixval(j); + } + } + (true, false) => new_c[col_new[j]] += v * fixval(i), + (false, true) => new_c[col_new[i]] += v * fixval(j), + } + } + + // --- build reduced rows (after substitution), then dedup --- + let eq_rows = match build_rows(&prob.a, m_eq, &eq_dropped, &prob.b, &fixed, &col_new, true) { + Ok(rows) => rows, + Err(()) => return PresolveOutcome::Infeasible, + }; + let ineq_rows = match build_rows( + &prob.g, + m_ineq, + &ineq_dropped, + &prob.h, + &fixed, + &col_new, + false, + ) { + Ok(rows) => rows, + Err(()) => return PresolveOutcome::Infeasible, + }; + + let eq_rows = match dedup_rows(eq_rows, true, &[]) { + Ok(rows) => rows, + Err(()) => return PresolveOutcome::Infeasible, + }; + // SOC rows are coupled and must survive verbatim — exclude them from + // parallel/duplicate merging. + let ineq_rows = dedup_rows(ineq_rows, false, soc_row).expect("ineq dedup never infeasible"); + + // --- flatten surviving rows to triplets + kept-row maps --- + let mut kept_eq = Vec::with_capacity(eq_rows.len()); + let mut new_a = Vec::new(); + let mut new_b = vec![0.0; eq_rows.len()]; + for (newr, row) in eq_rows.iter().enumerate() { + kept_eq.push(row.orig); + new_b[newr] = row.rhs; + for &(c, v) in &row.coeffs { + new_a.push(Triplet::new(newr, c, v)); + } + } + let mut kept_ineq = Vec::with_capacity(ineq_rows.len()); + let mut new_g = Vec::new(); + let mut new_h = vec![0.0; ineq_rows.len()]; + for (newr, row) in ineq_rows.iter().enumerate() { + kept_ineq.push(row.orig); + new_h[newr] = row.rhs; + for &(c, v) in &row.coeffs { + new_g.push(Triplet::new(newr, c, v)); + } + } + + // Carry the kept columns' (possibly tightened) bounds into the reduced + // problem. Emit bounds when the original had them or bound tightening + // produced a finite bound on a kept variable; otherwise leave empty. + let need_bounds = prob.has_bounds() + || kept_cols + .iter() + .any(|&c| tlb[c] > -BOUND_INF || tub[c] < BOUND_INF); + let (new_lb, new_ub) = if need_bounds { + ( + kept_cols.iter().map(|&c| tlb[c]).collect(), + kept_cols.iter().map(|&c| tub[c]).collect(), + ) + } else { + (Vec::new(), Vec::new()) + }; + + let reduced = QpProblem { + n: kept_cols.len(), + p_lower: new_p, + c: new_c, + a: new_a, + b: new_b, + g: new_g, + h: new_h, + lb: new_lb, + ub: new_ub, + }; + + PresolveOutcome::Reduced(Presolve { + reduced, + obj_offset: offset, + orig_n: n, + orig_m_eq: m_eq, + orig_m_ineq: m_ineq, + kept_cols, + kept_eq, + kept_ineq, + orig: prob.clone(), + stack, + chain: Vec::new(), + }) +} + +/// Build per-row coefficient lists in the reduced column space, +/// substituting fixed variables into the right-hand side. Rows that +/// become empty after substitution trigger a feasibility check: +/// `0 = rhs` (equality) requires `rhs == 0`; `0 ≤ rhs` (inequality) +/// requires `rhs ≥ 0`. Returns `Err(())` on detected infeasibility. +fn build_rows( + triplets: &[Triplet], + m: usize, + dropped: &[bool], + base_rhs: &[f64], + fixed: &[Option], + col_new: &[usize], + is_equality: bool, +) -> Result, ()> { + let mut acc: Vec> = (0..m) + .map(|r| { + if dropped[r] { + None + } else { + Some(Row { + coeffs: Vec::new(), + rhs: base_rhs[r], + orig: r, + }) + } + }) + .collect(); + + for t in triplets { + if dropped[t.row] || t.val == ZERO_TOL { + continue; + } + let row = acc[t.row].as_mut().expect("non-dropped row"); + if let Some(v) = fixed[t.col] { + row.rhs -= t.val * v; + } else { + row.coeffs.push((col_new[t.col], t.val)); + } + } + + let mut out = Vec::new(); + for row in acc.into_iter().flatten() { + let mut row = row; + merge_sort_coeffs(&mut row.coeffs); + if row.coeffs.is_empty() { + // Row reduced to `0 (cmp) rhs`: a feasibility check. + if is_equality { + if row.rhs.abs() > 0.0 { + return Err(()); + } + } else if row.rhs < 0.0 { + return Err(()); + } + // Feasible empty row: drop it (no coefficients, no dual). + continue; + } + out.push(row); + } + Ok(out) +} + +/// Sort coefficients by column and merge any duplicate columns (a +/// variable appearing twice in one row). Drops entries that cancel to 0. +fn merge_sort_coeffs(coeffs: &mut Vec<(usize, f64)>) { + coeffs.sort_by_key(|&(c, _)| c); + let mut merged: Vec<(usize, f64)> = Vec::with_capacity(coeffs.len()); + for &(c, v) in coeffs.iter() { + if let Some(last) = merged.last_mut() { + if last.0 == c { + last.1 += v; + continue; + } + } + merged.push((c, v)); + } + merged.retain(|&(_, v)| v != 0.0); + *coeffs = merged; +} + +/// Relative tolerance for confirming two rows are scalar multiples. +const PARALLEL_TOL: f64 = 1e-9; + +/// Canonical pivot used to normalize a row for *parallel* (scalar- +/// multiple) detection: its first coefficient (the rows' coeffs are +/// sorted by column). For inequalities we divide by the pivot's +/// **magnitude** so only *positive* multiples — same inequality direction +/// — normalize alike; for equalities we divide by the **signed** pivot so +/// `±` multiples (the same constraint either way) match. +fn pivot_divisor(row: &Row, is_equality: bool) -> f64 { + let p = row.coeffs[0].1; + if is_equality { + p + } else { + p.abs() + } +} + +/// Normalized coefficient values (parallel detection): `coeffs / divisor`. +fn normalized_coeffs(row: &Row, is_equality: bool) -> Vec<(usize, f64)> { + let d = pivot_divisor(row, is_equality); + row.coeffs.iter().map(|&(c, v)| (c, v / d)).collect() +} + +/// Hash a normalized coefficient pattern. Values are quantized so exact +/// scalar multiples hash together; the hash is only a *filter* (a quantize +/// boundary can split a true pair into different buckets, which merely +/// misses a reduction — never a wrong merge, since membership is confirmed +/// by [`approx_parallel`]). +fn parallel_signature(norm: &[(usize, f64)]) -> u64 { + let mut h = DefaultHasher::new(); + norm.len().hash(&mut h); + for &(c, v) in norm { + c.hash(&mut h); + ((v / PARALLEL_TOL).round() as i64).hash(&mut h); + } + h.finish() +} + +/// Confirm two normalized patterns are equal to `PARALLEL_TOL` (same +/// columns, matching values). Conservative: only true scalar multiples +/// pass, so a wrong merge is impossible. +fn approx_parallel(a: &[(usize, f64)], b: &[(usize, f64)]) -> bool { + a.len() == b.len() + && a.iter().zip(b).all(|(&(ca, va), &(cb, vb))| { + ca == cb && (va - vb).abs() <= PARALLEL_TOL * (1.0 + va.abs().max(vb.abs())) + }) +} + +/// Remove **parallel** rows (scalar multiples of one another), the +/// generalization of exact-duplicate removal (PaPILO's parallel-row +/// reduction). Normalized signatures are computed in parallel (rayon); +/// grouping and the per-group decision are serial and cheap. +/// +/// Dual recovery stays trivial because we always keep an *original* row in +/// its own frame and set every dropped row's multiplier to 0 (the kept row +/// carries the constraint): +/// - equalities — all scalar multiples represent one constraint; their +/// *normalized* right-hand sides must agree, else the system is +/// infeasible. Keep the first; drop the rest. +/// - inequalities — positive multiples of one direction; keep the **most +/// restrictive** original row (smallest normalized rhs `h / |pivot|`) +/// and drop the looser ones, which it implies. +fn dedup_rows(rows: Vec, is_equality: bool, protected: &[bool]) -> Result, ()> { + if rows.len() < 2 { + return Ok(rows); + } + // A row is protected (never merged) when its *original* index is marked + // — used to keep coupled cone rows verbatim. + let is_protected = |i: usize| protected.get(rows[i].orig).copied().unwrap_or(false); + + // Parallel: normalize + hash each row (PaPILO-style hashing-based + // pairing, generalized to scalar multiples). + let norms: Vec> = rows + .par_iter() + .map(|r| normalized_coeffs(r, is_equality)) + .collect(); + let sigs: Vec = norms.par_iter().map(|n| parallel_signature(n)).collect(); + + // Group row indices by signature (serial; small). Protected rows are + // excluded from grouping, so they are never dropped and never drop + // others. + let mut buckets: HashMap> = HashMap::new(); + for (i, &s) in sigs.iter().enumerate() { + if !is_protected(i) { + buckets.entry(s).or_default().push(i); + } + } + + // Normalized rhs of a row, for the tightness / consistency decisions. + let norm_rhs = |i: usize| rows[i].rhs / pivot_divisor(&rows[i], is_equality); + + let mut keep = vec![true; rows.len()]; + for idxs in buckets.values() { + if idxs.len() < 2 { + continue; + } + // Within a signature bucket, partition into confirmed-parallel + // groups (guards against quantization collisions). + let mut handled = vec![false; idxs.len()]; + for a in 0..idxs.len() { + if handled[a] { + continue; + } + let mut group = vec![idxs[a]]; + handled[a] = true; + for b in (a + 1)..idxs.len() { + if !handled[b] && approx_parallel(&norms[idxs[a]], &norms[idxs[b]]) { + handled[b] = true; + group.push(idxs[b]); + } + } + if group.len() < 2 { + continue; + } + if is_equality { + // Parallel equalities: normalized rhs must agree, else the + // two scaled-identical constraints are contradictory. + let r0 = norm_rhs(group[0]); + for &g in &group[1..] { + if (norm_rhs(g) - r0).abs() > PARALLEL_TOL * (1.0 + r0.abs()) { + return Err(()); + } + } + for &g in &group[1..] { + keep[g] = false; + } + } else { + // Parallel inequalities: keep the most restrictive original + // row (smallest normalized rhs); it implies the rest. + let tightest = *group + .iter() + .min_by(|&&p, &&q| norm_rhs(p).partial_cmp(&norm_rhs(q)).unwrap()) + .unwrap(); + for &g in &group { + if g != tightest { + keep[g] = false; + } + } + } + } + } + + Ok(rows + .into_iter() + .zip(keep) + .filter_map(|(r, k)| if k { Some(r) } else { None }) + .collect()) +} + +/// Summary of what presolve removed, for logging and tests. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub struct PresolveStats { + /// Variables in the original problem. + pub orig_vars: usize, + /// Variables in the reduced problem. + pub reduced_vars: usize, + /// Equality + inequality rows in the original problem. + pub orig_rows: usize, + /// Equality + inequality rows in the reduced problem. + pub reduced_rows: usize, + /// Variables fixed by a singleton equality row. + pub fixed_vars: usize, + /// Free / linear-only columns pinned to a bound and dropped. + pub free_cols_fixed: usize, + /// Free column singletons substituted out (each also removes a row). + pub free_col_singletons: usize, + /// Forcing rows: each pins all its variables to a bound and is dropped. + pub forcing_rows: usize, + /// Dominated columns fixed to a bound and dropped. + pub dominated_cols: usize, + /// Variable bounds tightened by domain propagation. + pub tightened_bounds: usize, +} + +impl PresolveStats { + /// Did presolve remove anything? + pub fn reduced_anything(&self) -> bool { + self.reduced_vars < self.orig_vars || self.reduced_rows < self.orig_rows + } +} + +impl Presolve { + /// The cone partition of the *reduced* inequality block, given the + /// original `cones`. Walks the kept inequality rows (a cone-aware + /// presolve never drops or reorders a second-order-cone block, so each + /// cone's surviving rows stay contiguous) and run-length-encodes them by + /// source cone. Orthant blocks may shrink (or vanish); SOC blocks keep + /// their full dimension. Use after [`presolve_conic`] (a single pass). + pub fn reduced_cones(&self, cones: &[ConeSpec]) -> Vec { + // Original inequality row → cone index. + let mut row_cone = vec![usize::MAX; self.orig_m_ineq]; + let mut r = 0; + for (ci, spec) in cones.iter().enumerate() { + for _ in 0..spec.dim() { + if r < row_cone.len() { + row_cone[r] = ci; + } + r += 1; + } + } + let mut out = Vec::new(); + let mut i = 0; + while i < self.kept_ineq.len() { + let ci = row_cone[self.kept_ineq[i]]; + let mut j = i; + while j < self.kept_ineq.len() && row_cone[self.kept_ineq[j]] == ci { + j += 1; + } + let count = j - i; + out.push(match cones[ci] { + ConeSpec::Nonneg(_) => ConeSpec::Nonneg(count), + ConeSpec::SecondOrder(_) => ConeSpec::SecondOrder(count), + // Non-symmetric cones are fixed at 3 rows and are not split or + // merged by presolve. + ConeSpec::Exponential => ConeSpec::Exponential, + ConeSpec::Power(a) => ConeSpec::Power(a), + // PSD blocks are structurally coupled (svec of a fixed n×n) + // and likewise pass through unchanged. + ConeSpec::Psd(n) => ConeSpec::Psd(n), + }); + i = j; + } + out + } + + /// Did this single pass change anything (a reduction, or a dropped + /// row)? Used by [`presolve`] to detect the fixpoint. + fn changed(&self) -> bool { + !self.stack.is_empty() + || self.reduced.n < self.orig_n + || self.reduced.m_eq() + self.reduced.m_ineq() < self.orig_m_eq + self.orig_m_ineq + } + + /// Reduction summary (sizes before/after and counts by reduction). For + /// an iterated presolve, counts aggregate over the rounds. + pub fn stats(&self) -> PresolveStats { + if self.chain.is_empty() { + return self.stats_once(); + } + let mut s = PresolveStats { + orig_vars: self.orig_n, + reduced_vars: self.reduced.n, + orig_rows: self.orig_m_eq + self.orig_m_ineq, + reduced_rows: self.reduced.m_eq() + self.reduced.m_ineq(), + ..Default::default() + }; + for layer in &self.chain { + let ls = layer.stats_once(); + s.fixed_vars += ls.fixed_vars; + s.free_cols_fixed += ls.free_cols_fixed; + s.free_col_singletons += ls.free_col_singletons; + s.forcing_rows += ls.forcing_rows; + s.dominated_cols += ls.dominated_cols; + s.tightened_bounds += ls.tightened_bounds; + } + s + } + + fn stats_once(&self) -> PresolveStats { + let mut s = PresolveStats { + orig_vars: self.orig_n, + reduced_vars: self.reduced.n, + orig_rows: self.orig_m_eq + self.orig_m_ineq, + reduced_rows: self.reduced.m_eq() + self.reduced.m_ineq(), + ..Default::default() + }; + for r in &self.stack { + match r { + Reduction::FixedVar { .. } => s.fixed_vars += 1, + Reduction::FreeColumnFixed { .. } => s.free_cols_fixed += 1, + Reduction::FreeColSingleton { .. } => s.free_col_singletons += 1, + Reduction::ForcingRow { .. } => s.forcing_rows += 1, + Reduction::DominatedColumn { .. } => s.dominated_cols += 1, + Reduction::BoundTightening { .. } => s.tightened_bounds += 1, + } + } + s + } + + /// Expand a reduced-problem solution back to the original space, + /// recovering primal `x` and duals `(y, z)`. For an iterated presolve, + /// folds the per-round postsolves in reverse. + pub fn postsolve(&self, red: &QpSolution) -> QpSolution { + if self.chain.is_empty() { + return self.postsolve_once(red); + } + let mut sol = red.clone(); + for layer in self.chain.iter().rev() { + sol = layer.postsolve_once(&sol); + } + sol + } + + /// Expand a single pass's reduced solution back to its original space. + fn postsolve_once(&self, red: &QpSolution) -> QpSolution { + let mut x = vec![0.0; self.orig_n]; + let mut y = vec![0.0; self.orig_m_eq]; + let mut z = vec![0.0; self.orig_m_ineq]; + + // Primal: kept columns from the reduced solution. + for (newc, &oldc) in self.kept_cols.iter().enumerate() { + x[oldc] = red.x[newc]; + } + // Duals: kept rows from the reduced solution. Dropped rows + // (empty / duplicate) stay 0, which is their correct multiplier. + for (newr, &oldr) in self.kept_eq.iter().enumerate() { + y[oldr] = red.y[newr]; + } + for (newr, &oldr) in self.kept_ineq.iter().enumerate() { + z[oldr] = red.z[newr]; + } + + // Restore eliminated primals in two passes, ordered by dependency. + // + // A free-column-singleton recovers `x_col = (b_r − Σ_{j≠col} a_jr + // x_j) / a_col`, so it *reads* the values of the other variables in + // its consumed row. Those neighbours may themselves have been + // eliminated by a **constant-valued** reduction (a fixed / free-fixed + // / dominated / forced variable) earlier in the same pass — earlier, + // hence *lower* on the stack. A plain reverse-LIFO replay would + // restore the singleton (higher on the stack) before its constant + // neighbour, reading a stale 0 for it and producing an infeasible + // recovered point (the capri LP wrong-answer bug). The neighbours are + // never themselves singletons (a free-column-singleton variable + // appears in exactly one equality row — its own consumed row — so it + // cannot appear in another singleton's row), so two passes suffice: + // 1. all constant-valued primal restorations (any order — they + // depend on nothing); then + // 2. the formula-based free-column-singletons, which now read fully + // restored neighbours. + for r in self.stack.iter().rev() { + match r { + Reduction::FixedVar { col, value, .. } => x[*col] = *value, + Reduction::FreeColumnFixed { col, value } => x[*col] = *value, + Reduction::ForcingRow { cols, .. } => { + // Each forced variable sits at the stored bound value. + for &(col, _, value, _) in cols { + x[col] = value; + } + } + Reduction::DominatedColumn { col, value, .. } => x[*col] = *value, + // Restored in the second pass (depends on its neighbours). + Reduction::FreeColSingleton { .. } => {} + // The variable is kept; only its box changed, so its primal + // comes from the reduced solution (already mapped above). + Reduction::BoundTightening { .. } => {} + } + } + for r in &self.stack { + if let Reduction::FreeColSingleton { + col, + eq_row, + a_coef, + .. + } = r + { + // x_col = (b_r − Σ_{j≠col} a_jr x_j) / a_col. + let mut acc = self.orig.b[*eq_row]; + for t in &self.orig.a { + if t.row == *eq_row && t.col != *col { + acc -= t.val * x[t.col]; + } + } + x[*col] = acc / a_coef; + } + } + + // Free-column-singleton consumed-row multipliers have the unique + // value y_r = −c_col / a_col (from stationarity of the eliminated + // free variable, which has no P/G terms). + for r in &self.stack { + if let Reduction::FreeColSingleton { + eq_row, + a_coef, + c_col, + .. + } = r + { + y[*eq_row] = -c_col / a_coef; + } + } + + // Recover each fixing row's multiplier from stationarity for its + // variable: with all primals and other duals known, + // (Px)_k + c_k + (Aᵀy)_k + (Gᵀz)_k + a·y_fix = 0 + // ⇒ y_fix = −[(Px)_k + c_k + (Aᵀy)_k + (Gᵀz)_k] / a. + let n = self.orig_n; + let mut grad = vec![0.0; n]; + grad[..n].copy_from_slice(&self.orig.c[..n]); + self.orig.p_mul(&x, &mut grad); + self.orig.at_mul(&y, &mut grad); + self.orig.gt_mul(&z, &mut grad); + for r in &self.stack { + if let Reduction::FixedVar { + col, + eq_row, + a_coef, + .. + } = r + { + y[*eq_row] = -grad[*col] / a_coef; + } + } + + // Forcing-row multipliers. `grad` (above, = grad0) is each pinned + // variable's reduced cost *excluding* the forcing row (its + // multiplier is still 0). The row multiplier is the tightest value + // making every pinned variable's bound multiplier correctly signed: + // min-vertex ⇒ mult = maxⱼ(−gradⱼ/coefⱼ) (clamped ≥ 0 if ≤-row); + // max-vertex ⇒ mult = minⱼ(−gradⱼ/coefⱼ) (equalities only). + // (The pinned variables' bound multipliers themselves come out of + // the global recovery below.) + for r in &self.stack { + if let Reduction::ForcingRow { + row, + is_equality, + at_max, + cols, + } = r + { + let mut mult = if *at_max { + f64::INFINITY + } else { + f64::NEG_INFINITY + }; + for &(col, coef, _, _) in cols { + let t = -grad[col] / coef; + mult = if *at_max { mult.min(t) } else { mult.max(t) }; + } + if !*is_equality { + mult = mult.max(0.0); // inequality multiplier ≥ 0 + } + if !mult.is_finite() { + mult = 0.0; + } + if *is_equality { + y[*row] = mult; + } else { + z[*row] = mult; + } + } + } + + // Re-attribute active tightened-bound multipliers to their source + // rows. A tightened bound that is active in the reduced solve while + // the *original* bound is slack is not a real bound — its + // multiplier belongs to the row that implied it. Because tightening + // sources are column-disjoint, these moves are independent. + let mut col_reduced = vec![usize::MAX; n]; + for (newc, &oldc) in self.kept_cols.iter().enumerate() { + col_reduced[oldc] = newc; + } + for r in &self.stack { + if let Reduction::BoundTightening { + col, + row, + is_equality, + coef, + is_upper, + } = r + { + let newc = col_reduced[*col]; + if newc == usize::MAX { + continue; + } + let delta = if *is_upper { + let m = red.z_ub.get(newc).copied().unwrap_or(0.0); + if m > 0.0 && x[*col] < self.orig.ub_of(*col) - BOUND_FEAS_TOL { + m / coef + } else { + 0.0 + } + } else { + let m = red.z_lb.get(newc).copied().unwrap_or(0.0); + if m > 0.0 && x[*col] > self.orig.lb_of(*col) + BOUND_FEAS_TOL { + -m / coef + } else { + 0.0 + } + }; + if *is_equality { + y[*row] += delta; + } else { + z[*row] += delta; + } + } + } + + // Global bound-multiplier recovery. With every row multiplier now in + // place, recompute the full reduced cost and read off each + // variable's bound multipliers by complementarity against its + // *original* box: at the lower bound `z_lb = max(0, grad)`, at the + // upper `z_ub = max(0, −grad)`, interior ⇒ both 0. This single rule + // subsumes the per-reduction bound recovery (fixed, free-fixed, + // forcing, dominated — each lands at a real bound or interior with + // the right reduced cost) and correctly zeroes a tightened + // variable's bound dual (it sits interior to its real box, the force + // having moved to the source row above). + let mut grad = vec![0.0; n]; + grad[..n].copy_from_slice(&self.orig.c[..n]); + self.orig.p_mul(&x, &mut grad); + self.orig.at_mul(&y, &mut grad); + self.orig.gt_mul(&z, &mut grad); + let mut z_lb = vec![0.0; n]; + let mut z_ub = vec![0.0; n]; + for i in 0..n { + let lb = self.orig.lb_of(i); + let ub = self.orig.ub_of(i); + let at_lb = lb > -BOUND_INF && (x[i] - lb).abs() <= ACTIVE_BOUND_TOL; + let at_ub = ub < BOUND_INF && (ub - x[i]).abs() <= ACTIVE_BOUND_TOL; + if at_lb && grad[i] > 0.0 { + z_lb[i] = grad[i]; + } else if at_ub && grad[i] < 0.0 { + z_ub[i] = -grad[i]; + } + } + + // Objective in the original problem. + let mut px = vec![0.0; n]; + self.orig.p_mul(&x, &mut px); + let mut obj = 0.0; + for i in 0..n { + obj += 0.5 * x[i] * px[i] + self.orig.c[i] * x[i]; + } + + QpSolution { + status: red.status, + x, + y, + z, + z_lb, + z_ub, + obj, + iters: red.iters, + iterates: red.iterates.clone(), + } + } +} + +/// Convenience: presolve, solve the reduced problem with `solve`, and +/// postsolve — returning a solution in the *original* problem space. On a +/// presolve-detected infeasibility / unboundedness, returns the matching +/// status without invoking the solver. +pub fn solve_with_presolve(prob: &QpProblem, solve: S) -> QpSolution +where + S: FnOnce(&QpProblem) -> QpSolution, +{ + let trivial = |status| QpSolution { + status, + x: vec![0.0; prob.n], + y: vec![0.0; prob.m_eq()], + z: vec![0.0; prob.m_ineq()], + z_lb: vec![0.0; prob.n], + z_ub: vec![0.0; prob.n], + obj: 0.0, + iters: 0, + iterates: Vec::new(), + }; + match presolve(prob) { + PresolveOutcome::Infeasible => trivial(QpStatus::PrimalInfeasible), + PresolveOutcome::Unbounded => trivial(QpStatus::DualInfeasible), + PresolveOutcome::Reduced(ps) => { + let red = solve(&ps.reduced); + ps.postsolve(&red) + } + } +} diff --git a/crates/pounce-convex/src/qp.rs b/crates/pounce-convex/src/qp.rs new file mode 100644 index 00000000..de912d54 --- /dev/null +++ b/crates/pounce-convex/src/qp.rs @@ -0,0 +1,443 @@ +//! Convex QP problem data in standard form. +//! +//! ```text +//! minimize ½ xᵀP x + cᵀx +//! subject to A x = b (equality, m_eq rows) +//! G x ≤ h (inequality, m_ineq rows) +//! ``` +//! +//! `x` is free; variable bounds are expressed as rows of `G`. `P` must +//! be symmetric positive semidefinite (convexity); it is supplied as its +//! **lower triangle** in triplet form. `A` and `G` are general sparse +//! triplets. This is the form the IPM in [`crate::ipm`] consumes, and +//! the form the `.nl` → QP extraction (Phase 2 dispatch) will target. + +/// A sparse matrix entry `(row, col, val)`, 0-based. +#[derive(Debug, Clone, Copy)] +pub struct Triplet { + pub row: usize, + pub col: usize, + pub val: f64, +} + +impl Triplet { + pub fn new(row: usize, col: usize, val: f64) -> Self { + Triplet { row, col, val } + } +} + +/// Convex QP in the standard form documented at the module level. +#[derive(Debug, Clone)] +pub struct QpProblem { + /// Number of decision variables. + pub n: usize, + /// Lower triangle (row ≥ col) of the symmetric PSD Hessian `P`. + pub p_lower: Vec, + /// Linear objective coefficient `c` (length `n`). + pub c: Vec, + /// Equality matrix `A` (m_eq × n), full triplets. + pub a: Vec, + /// Equality right-hand side `b` (length m_eq). + pub b: Vec, + /// Inequality matrix `G` (m_ineq × n), full triplets. + pub g: Vec, + /// Inequality right-hand side `h` (length m_ineq). + pub h: Vec, + /// Per-variable lower bounds `lb ≤ x`. Either empty (all `-∞`) or + /// length `n`. Use [`NEG_INF`] for an unbounded entry. Bounds are a + /// first-class part of the problem (not encoded as `G` rows), so + /// presolve can reason about variable boxes; the solver expands the + /// finite ones into internal inequality rows. + pub lb: Vec, + /// Per-variable upper bounds `x ≤ ub`. Either empty (all `+∞`) or + /// length `n`. Use [`POS_INF`] for an unbounded entry. + pub ub: Vec, +} + +/// Sentinel for an absent lower bound (`-∞`). Anything `≤ -BOUND_INF` is +/// treated as no bound. +pub const NEG_INF: f64 = f64::NEG_INFINITY; +/// Sentinel for an absent upper bound (`+∞`). Anything `≥ BOUND_INF` is +/// treated as no bound. +pub const POS_INF: f64 = f64::INFINITY; +/// Magnitude past which a bound is considered infinite. +pub(crate) const BOUND_INF: f64 = 1e20; + +impl QpProblem { + pub fn m_eq(&self) -> usize { + self.b.len() + } + + pub fn m_ineq(&self) -> usize { + self.h.len() + } + + /// Lower bound of variable `i` (`-∞` when `lb` is empty). + pub fn lb_of(&self, i: usize) -> f64 { + self.lb.get(i).copied().unwrap_or(NEG_INF) + } + + /// Upper bound of variable `i` (`+∞` when `ub` is empty). + pub fn ub_of(&self, i: usize) -> f64 { + self.ub.get(i).copied().unwrap_or(POS_INF) + } + + /// Whether the problem carries any finite variable bound. + pub fn has_bounds(&self) -> bool { + self.lb.iter().any(|&v| v > -BOUND_INF) || self.ub.iter().any(|&v| v < BOUND_INF) + } + + /// Public `y += P x` (full symmetric product from the stored lower + /// triangle). Exposed so external callers — e.g. a TNLP adapter + /// reusing the same problem data — can evaluate the objective + /// gradient consistently with the solver. + pub fn p_mul_add_pub(&self, x: &[f64], y: &mut [f64]) { + self.p_mul_add(x, y); + } + + /// Public `y += A x`. + pub fn a_mul_add_pub(&self, x: &[f64], y: &mut [f64]) { + self.a_mul_add(x, y); + } + + /// `y += P x` using the stored lower triangle (mirrors the implicit + /// upper triangle for off-diagonal entries). + pub(crate) fn p_mul_add(&self, x: &[f64], y: &mut [f64]) { + for t in &self.p_lower { + y[t.row] += t.val * x[t.col]; + if t.row != t.col { + y[t.col] += t.val * x[t.row]; + } + } + } + + /// `y += A x`. + pub(crate) fn a_mul_add(&self, x: &[f64], y: &mut [f64]) { + for t in &self.a { + y[t.row] += t.val * x[t.col]; + } + } + + /// `y += Aᵀ v`. + pub(crate) fn at_mul_add(&self, v: &[f64], y: &mut [f64]) { + for t in &self.a { + y[t.col] += t.val * v[t.row]; + } + } + + /// `y += G x`. + pub(crate) fn g_mul_add(&self, x: &[f64], y: &mut [f64]) { + for t in &self.g { + y[t.row] += t.val * x[t.col]; + } + } + + /// `y += Gᵀ v`. + pub(crate) fn gt_mul_add(&self, v: &[f64], y: &mut [f64]) { + for t in &self.g { + y[t.col] += t.val * v[t.row]; + } + } + + /// Public `y += A x` (alias of [`Self::a_mul_add`]). + pub fn a_mul(&self, x: &[f64], y: &mut [f64]) { + self.a_mul_add(x, y); + } + + /// Public `y += G x` (alias of [`Self::g_mul_add`]). + pub fn g_mul(&self, x: &[f64], y: &mut [f64]) { + self.g_mul_add(x, y); + } + + /// Public `y += Aᵀ v` (alias of [`Self::at_mul_add`]). + pub fn at_mul(&self, v: &[f64], y: &mut [f64]) { + self.at_mul_add(v, y); + } + + /// Public `y += Gᵀ v` (alias of [`Self::gt_mul_add`]). + pub fn gt_mul(&self, v: &[f64], y: &mut [f64]) { + self.gt_mul_add(v, y); + } + + /// Public `y += P x` (alias of [`Self::p_mul_add`]). + pub fn p_mul(&self, x: &[f64], y: &mut [f64]) { + self.p_mul_add(x, y); + } +} + +/// Termination status of an IPM solve. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum QpStatus { + /// Converged: KKT residuals and duality gap below tolerance. + Optimal, + /// Primal infeasible: no `x` satisfies `Ax = b, Gx ≤ h`. A Farkas + /// certificate `(y, z ≥ 0)` with `Aᵀy + Gᵀz ≈ 0` and `bᵀy + hᵀz < 0` + /// was detected and verified. + PrimalInfeasible, + /// Dual infeasible / unbounded below: a recession direction `d` with + /// `Pd ≈ 0, Ad = 0, Gd ≤ 0, cᵀd < 0` was detected and verified. + DualInfeasible, + /// Iteration limit reached before convergence. + IterationLimit, + /// The KKT factorization failed (e.g. structurally singular system). + NumericalFailure, +} + +/// Result of an IPM solve: the primal/dual solution and status. +#[derive(Debug, Clone)] +pub struct QpSolution { + pub status: QpStatus, + /// Primal solution `x` (length `n`). + pub x: Vec, + /// Equality multipliers `y` (length m_eq). + pub y: Vec, + /// Inequality multipliers `z ≥ 0` (length m_ineq). + pub z: Vec, + /// Lower-bound multipliers `z_lb ≥ 0` for `lb ≤ x` (length `n`; zero + /// where there is no finite lower bound or it is inactive). + pub z_lb: Vec, + /// Upper-bound multipliers `z_ub ≥ 0` for `x ≤ ub` (length `n`). + pub z_ub: Vec, + /// Objective value `½ xᵀP x + cᵀx`. + pub obj: f64, + /// Iterations taken. + pub iters: usize, + /// Per-iteration convergence trace, populated only when + /// [`crate::QpOptions::collect_iterates`] was set (otherwise empty, with + /// no per-solve overhead). Each entry is one interior-point iteration. + pub iterates: Vec, +} + +/// One interior-point iteration's convergence record — the per-iteration data +/// a solve report or benchmark harness wants (residuals, the duality measure, +/// and the step lengths). Collected by the convex IPM when +/// [`crate::QpOptions::collect_iterates`] is set. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct QpIterate { + /// Iteration index (0-based). + pub iter: usize, + /// Objective `½ xᵀP x + cᵀx` at the start of this iteration. + pub objective: f64, + /// Primal infeasibility `max(‖Ax − b‖∞, ‖(Gx + s − h)‖∞)`. + pub primal_infeasibility: f64, + /// Dual infeasibility `‖Px + c + Aᵀy + Gᵀz‖∞`. + pub dual_infeasibility: f64, + /// Duality measure `μ = ⟨s, z⟩ / degree`. + pub mu: f64, + /// Primal step length taken this iteration. + pub alpha_primal: f64, + /// Dual step length taken this iteration. + pub alpha_dual: f64, +} + +/// Final KKT residuals of a [`QpSolution`] with respect to its [`QpProblem`] +/// — the convergence quantities a caller (e.g. a solve report or benchmark +/// harness) needs but that aren't otherwise carried on the solution. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct QpResiduals { + /// Primal infeasibility: `max(|Ax − b|, max(0, Gx − h), bound violations)`. + pub primal_infeasibility: f64, + /// Dual infeasibility (stationarity): + /// `‖Px + c + Aᵀy + Gᵀz − z_lb + z_ub‖∞`. + pub dual_infeasibility: f64, + /// Complementarity: `max |zᵢ · slackᵢ|` over inequalities and finite bounds. + pub complementarity: f64, +} + +impl QpResiduals { + /// Overall KKT error — the max of the three components. + pub fn kkt_error(&self) -> f64 { + self.primal_infeasibility + .max(self.dual_infeasibility) + .max(self.complementarity) + } +} + +impl QpSolution { + /// Recompute the final KKT residuals of this solution against `prob`. + /// + /// Uses the convex solver's standard-form conventions — + /// `min ½xᵀPx + cᵀx s.t. Ax = b, Gx ≤ h, lb ≤ x ≤ ub`, with equality dual + /// `y`, inequality dual `z ≥ 0`, and bound duals `z_lb, z_ub ≥ 0`. The + /// stationarity residual is `∇ₓL = Px + c + Aᵀy + Gᵀz − z_lb + z_ub`, the + /// `−z_lb + z_ub` matching how variable bounds expand into `G`-rows and + /// split back into the bound multipliers. + pub fn kkt_residuals(&self, prob: &QpProblem) -> QpResiduals { + let n = prob.n; + + // Dual infeasibility (stationarity). + let mut r = vec![0.0; n]; + prob.p_mul(&self.x, &mut r); + for (((ri, &ci), &lb), &ub) in r.iter_mut().zip(&prob.c).zip(&self.z_lb).zip(&self.z_ub) { + *ri += ci - lb + ub; + } + prob.at_mul(&self.y, &mut r); + prob.gt_mul(&self.z, &mut r); + let dual_infeasibility = r.iter().fold(0.0_f64, |m, v| m.max(v.abs())); + + // Primal infeasibility. + let mut primal_infeasibility = 0.0_f64; + let mut ax = vec![0.0; prob.m_eq()]; + prob.a_mul(&self.x, &mut ax); + for (&axi, &bi) in ax.iter().zip(&prob.b) { + primal_infeasibility = primal_infeasibility.max((axi - bi).abs()); + } + let mut gx = vec![0.0; prob.m_ineq()]; + prob.g_mul(&self.x, &mut gx); + for (&gxi, &hi) in gx.iter().zip(&prob.h) { + primal_infeasibility = primal_infeasibility.max((gxi - hi).max(0.0)); + } + for i in 0..n { + primal_infeasibility = primal_infeasibility.max((prob.lb_of(i) - self.x[i]).max(0.0)); + primal_infeasibility = primal_infeasibility.max((self.x[i] - prob.ub_of(i)).max(0.0)); + } + + // Complementarity. + let mut complementarity = 0.0_f64; + for ((&zi, &hi), &gxi) in self.z.iter().zip(&prob.h).zip(&gx) { + complementarity = complementarity.max((zi * (hi - gxi)).abs()); + } + for i in 0..n { + let (lb, ub) = (prob.lb_of(i), prob.ub_of(i)); + if lb > -1e19 { + complementarity = complementarity.max((self.z_lb[i] * (self.x[i] - lb)).abs()); + } + if ub < 1e19 { + complementarity = complementarity.max((self.z_ub[i] * (ub - self.x[i])).abs()); + } + } + + QpResiduals { + primal_infeasibility, + dual_infeasibility, + complementarity, + } + } +} + +#[cfg(test)] +mod residual_tests { + use super::*; + use crate::ipm::{solve_qp_ipm, QpOptions}; + use pounce_feral::FeralSolverInterface; + use pounce_linsol::SparseSymLinearSolverInterface; + + fn backend() -> Box { + Box::new(FeralSolverInterface::new()) + } + + /// KKT residuals vanish at the optimum even when **variable bounds are + /// active** — the sharp check of the `−z_lb + z_ub` stationarity sign. + /// `min x0²+x1² −3x0 −4x1 s.t. 0 ≤ x ≤ 0.5` clamps to the upper bounds + /// `(0.5, 0.5)` (unconstrained optimum is `(1.5, 2)`), so `z_ub > 0` and + /// the stationarity term must carry it with the right sign. + #[test] + fn kkt_residuals_vanish_with_active_bounds() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-3.0, -4.0], + a: vec![], + b: vec![], + g: vec![], + h: vec![], + lb: vec![0.0, 0.0], + ub: vec![0.5, 0.5], + }; + let sol = solve_qp_ipm(&prob, &QpOptions::default(), backend); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 0.5).abs() < 1e-5 && (sol.x[1] - 0.5).abs() < 1e-5); + let res = sol.kkt_residuals(&prob); + assert!( + res.kkt_error() < 1e-6, + "active-bound residuals not small: {res:?}" + ); + } + + /// The opt-in iterate trace is populated only when requested, records one + /// entry per interior-point iteration *plus* a terminal record at the + /// converged iterate (the NLP path's N+1 convention), and reflects + /// convergence (μ and the residuals shrink toward the optimum). + #[test] + fn iterate_trace_is_opt_in_and_records_convergence() { + // A bounded QP (inequalities ⇒ a non-trivial central path, μ > 0). + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-3.0, -4.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + h: vec![1.0], + lb: vec![], + ub: vec![], + }; + // Off by default: no trace, no overhead. + let sol = solve_qp_ipm(&prob, &QpOptions::default(), backend); + assert!( + sol.iterates.is_empty(), + "default solve must not collect a trace" + ); + + // On: one record per iteration, μ and residuals decreasing to the end. + let opts = QpOptions { + collect_iterates: true, + ..QpOptions::default() + }; + let sol = solve_qp_ipm(&prob, &opts, backend); + assert_eq!(sol.status, QpStatus::Optimal); + assert!(!sol.iterates.is_empty(), "trace should be populated"); + let first = &sol.iterates[0]; + let last = sol.iterates.last().unwrap(); + assert!(first.iter == 0); + assert!(first.mu > 0.0, "early μ should be positive"); + assert!( + last.mu < first.mu, + "μ should decrease: {} -> {}", + first.mu, + last.mu + ); + // The trace ends at a (near-)converged iterate (this problem starts + // primal-feasible, so μ — not primal infeasibility — is the signal). + assert!(last.mu < 1e-6, "final traced μ {} should be tiny", last.mu); + assert!( + last.dual_infeasibility < 1e-5, + "final traced dual infeasibility {} should be small", + last.dual_infeasibility + ); + // Every stepping iterate has positive fraction-to-boundary lengths; + // the terminal converged record takes no step, so its α's are zero. + let (term, stepping) = sol.iterates.split_last().unwrap(); + for r in stepping { + assert!(r.alpha_primal > 0.0 && r.alpha_primal <= 1.0); + assert!(r.alpha_dual > 0.0 && r.alpha_dual <= 1.0); + } + assert_eq!(term.alpha_primal, 0.0, "converged record takes no step"); + assert_eq!(term.alpha_dual, 0.0, "converged record takes no step"); + } + + /// Inequality complementarity: a binding general inequality must show + /// `z·slack ≈ 0`, and stationarity must vanish with the `Gᵀz` term. + /// `min x0²+x1² −3x0 −4x1 s.t. x0+x1 ≤ 1` → optimum on the face (0.25, 0.75). + #[test] + fn kkt_residuals_vanish_with_binding_inequality() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-3.0, -4.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + h: vec![1.0], + lb: vec![], + ub: vec![], + }; + let sol = solve_qp_ipm(&prob, &QpOptions::default(), backend); + assert_eq!(sol.status, QpStatus::Optimal); + let res = sol.kkt_residuals(&prob); + assert!( + res.kkt_error() < 1e-6, + "binding-inequality residuals not small: {res:?}" + ); + } +} diff --git a/crates/pounce-convex/src/sensitivity.rs b/crates/pounce-convex/src/sensitivity.rs new file mode 100644 index 00000000..9820ec0a --- /dev/null +++ b/crates/pounce-convex/src/sensitivity.rs @@ -0,0 +1,578 @@ +//! Post-optimal sensitivity for the convex QP — the sIPOPT analog. +//! +//! Given a converged [`QpSolution`] to +//! +//! ```text +//! min ½xᵀPx + cᵀx s.t. Ax = b, Gx ≤ h, lb ≤ x ≤ ub, +//! ``` +//! +//! the first-order change of the primal–dual solution under a small +//! perturbation of the problem data — *holding the active set fixed* — is +//! the solution of the **active-set KKT system** +//! +//! ```text +//! ⎡ P Aᵀ B_aᵀ ⎤ ⎡ dx ⎤ ⎡ −dc ⎤ +//! ⎢ A 0 0 ⎥ ⎢ dy ⎥ = ⎢ db ⎥ +//! ⎣ B_a 0 0 ⎦ ⎣ dz_a⎦ ⎣ dr_a ⎦ +//! ``` +//! +//! where `B_a` stacks the **active** inequality rows of `G` and the active +//! variable-bound rows (`eⱼᵀ`), and the right-hand side is the parameter +//! derivative of the KKT residual. This is exactly the predictor used by +//! Ipopt's sIPOPT (Pirnay, López-Negrete & Biegler 2012) specialized to a +//! quadratic program, where the Lagrangian Hessian is the constant `P`. +//! +//! [`QpSensitivity`] assembles and factors this symmetric, indefinite +//! system **once** at the optimum; each [`QpSensitivity::parametric_step`] +//! is then a single back-substitution, so a parametric sweep costs one +//! solve per query (the build-once / solve-many idiom of the NLP +//! `Solver`). A tiny static regularization `δ` (the QP solver's own `reg`, +//! default `1e-8`) is placed on the diagonal so the indefinite factor is +//! stable; the induced error in the step is `O(δ)`. + +use crate::ipm::QpOptions; +use crate::qp::{QpProblem, QpSolution, QpStatus}; +use pounce_common::types::{Index, Number}; +use pounce_linalg::symmetric_eigen; +use pounce_linsol::{Factorization, SparseSymLinearSolverInterface}; +use std::collections::BTreeMap; + +/// A reason a [`QpSensitivity`] could not be built. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SensError { + /// The solution was not optimal, so the active set is undefined. + NotOptimal, + /// The active-set KKT factorization failed (e.g. the active constraint + /// gradients are rank-deficient, so the parametric step is not unique). + FactorizationFailed, +} + +/// Post-optimal sensitivity for a solved convex QP. +/// +/// Holds the factored active-set KKT system at the optimum. Build it once +/// from a [`QpProblem`] and its [`QpSolution`], then call +/// [`parametric_step`](Self::parametric_step) for each parameter +/// perturbation — the factorization is reused across queries. +pub struct QpSensitivity { + n: usize, + m_eq: usize, + /// KKT dimension `n + m_eq + n_active`. + dim: usize, + fact: Factorization, + /// Problem data, retained for the reduced-Hessian projection. + prob: QpProblem, + /// Active inequality rows (indices into `G`). + active_ineq: Vec, + /// Variables whose bound is active (one `eⱼᵀ` row each). + active_bound_vars: Vec, +} + +impl QpSensitivity { + /// Build the active-set sensitivity for `sol` (a solution of `prob`). + /// + /// The active set is read from the dual certificate: an inequality row + /// `i` is active when `zᵢ > active_tol`, a lower bound on `xⱼ` when + /// `z_lbⱼ > active_tol`, an upper bound when `z_ubⱼ > active_tol`. A + /// good default for `active_tol` is `1e-7` (see + /// [`build_default`](Self::build_default)). + /// + /// Returns [`SensError::NotOptimal`] if `sol` is not optimal, or + /// [`SensError::FactorizationFailed`] if the active-set KKT is singular. + pub fn build( + prob: &QpProblem, + sol: &QpSolution, + opts: &QpOptions, + active_tol: f64, + mut make_backend: F, + ) -> Result + where + F: FnMut() -> Box, + { + if sol.status != QpStatus::Optimal { + return Err(SensError::NotOptimal); + } + let n = prob.n; + let m_eq = prob.m_eq(); + let reg = opts.reg; + + // Active set: which inequality rows and which variable bounds bind. + let active_ineq: Vec = (0..prob.m_ineq()) + .filter(|&i| sol.z[i] > active_tol) + .collect(); + // A bound contributes one row `eⱼᵀ` (the gradient of `xⱼ = const` is + // `eⱼ` whether the lower or upper bound is the active one). + let active_bound_vars: Vec = (0..n) + .filter(|&j| sol.z_lb[j] > active_tol || sol.z_ub[j] > active_tol) + .collect(); + let n_active = active_ineq.len() + active_bound_vars.len(); + let dim = n + m_eq + n_active; + + // Assemble the lower triangle of the symmetric KKT matrix. + let mut entries: BTreeMap<(usize, usize), f64> = BTreeMap::new(); + let mut add = |r: usize, c: usize, v: f64| { + let (r, c) = if r >= c { (r, c) } else { (c, r) }; + *entries.entry((r, c)).or_insert(0.0) += v; + }; + + // (x,x): P + δI. + for t in &prob.p_lower { + add(t.row, t.col, t.val); + } + for i in 0..n { + add(i, i, reg); + } + // (y,x): A; (y,y): −δI. + for t in &prob.a { + add(n + t.row, t.col, t.val); + } + for i in 0..m_eq { + add(n + i, n + i, -reg); + } + // Active-row block `B_a` after the equality rows, in order: + // active inequality rows, then active bound rows. (·,·): −δI diagonal. + let abase = n + m_eq; + for (k, &i) in active_ineq.iter().enumerate() { + // The k-th active row holds G's row i. + for t in prob.g.iter().filter(|t| t.row == i) { + add(abase + k, t.col, t.val); + } + } + for (k, &j) in active_bound_vars.iter().enumerate() { + add(abase + active_ineq.len() + k, j, 1.0); + } + for k in 0..n_active { + add(abase + k, abase + k, -reg); + } + + // Triplets → 1-based lower-triangle arrays for the factorization. + let nnz = entries.len(); + let mut airn = Vec::with_capacity(nnz); + let mut ajcn = Vec::with_capacity(nnz); + let mut values = Vec::with_capacity(nnz); + for ((r, c), v) in entries { + airn.push((r + 1) as Index); + ajcn.push((c + 1) as Index); + values.push(v); + } + + let fact = Factorization::new(dim as Index, airn, ajcn, values, make_backend()) + .map_err(|_| SensError::FactorizationFailed)?; + + Ok(QpSensitivity { + n, + m_eq, + dim, + fact, + prob: prob.clone(), + active_ineq, + active_bound_vars, + }) + } + + /// [`build`](Self::build) with the QP's default options and an active-set + /// tolerance of `1e-7`. + pub fn build_default( + prob: &QpProblem, + sol: &QpSolution, + make_backend: F, + ) -> Result + where + F: FnMut() -> Box, + { + Self::build(prob, sol, &QpOptions::default(), 1e-7, make_backend) + } + + /// First-order primal step `dx ≈ x*(b + Δb) − x*(b)` for a perturbation + /// of the **equality right-hand side** `b`, the direct QP analog of + /// sIPOPT's "pin a constraint, perturb its value". Constraint + /// `pin_constraint_indices[k]` (an index into `b`) is perturbed by + /// `deltas[k]`; all others are held fixed. + /// + /// Returns the length-`n` primal sensitivity, so `x* + dx` predicts the + /// solution of the perturbed QP (exact to first order while the active + /// set is unchanged). The factorization is reused, so repeated calls + /// (e.g. a continuation sweep) cost one back-substitution each. + /// + /// # Panics + /// + /// Panics if `pin_constraint_indices` and `deltas` differ in length, or + /// if any pin index is `≥ m_eq`. + pub fn parametric_step( + &mut self, + pin_constraint_indices: &[usize], + deltas: &[f64], + ) -> Vec { + assert_eq!( + pin_constraint_indices.len(), + deltas.len(), + "pin_constraint_indices and deltas must have equal length" + ); + let mut db = vec![0.0; self.m_eq]; + for (&i, &d) in pin_constraint_indices.iter().zip(deltas) { + assert!( + i < self.m_eq, + "pin constraint index {i} out of range (m_eq = {})", + self.m_eq + ); + db[i] += d; + } + self.step_from_db(&db) + } + + /// Primal sensitivity for a full equality-RHS perturbation `db` (length + /// `m_eq`): solves the active-set KKT with right-hand side `[0; db; 0]` + /// and returns `dx = step[0..n]`. + pub fn step_from_db(&mut self, db: &[f64]) -> Vec { + assert_eq!(db.len(), self.m_eq, "db must have length m_eq"); + let mut rhs = vec![0.0 as Number; self.dim]; + rhs[self.n..self.n + self.m_eq].copy_from_slice(db); + // A singular factor would have been caught at build; a back-solve + // failure here is not recoverable, so surface a zero step. + if self.fact.solve_one(&mut rhs).is_err() { + return vec![0.0; self.n]; + } + rhs.truncate(self.n); + rhs + } + + /// The active-set KKT dimension `n + m_eq + n_active`. + pub fn kkt_dim(&self) -> usize { + self.dim + } + + /// Reduced Hessian of the QP at the optimum: the objective Hessian `P` + /// projected onto the null space of the **active constraints** + /// `B = [A; active G rows; active bound rows]`. If `Z` is an + /// orthonormal basis of `null(B)` (the feasible directions / degrees of + /// freedom), the reduced Hessian is `H_R = Zᵀ P Z`. Its eigenvalues are + /// the objective's curvatures along feasible directions: all positive + /// ⟺ a strict second-order minimizer (always so for a strictly convex + /// `P`), and their spread is the conditioning of the QP on the active + /// manifold. This mirrors the NLP `Solver.reduced_hessian` / + /// `solve_with_sens(compute_reduced_hessian=True)`. + /// + /// The basis `Z` is the null space of `B`, obtained from the + /// eigenvectors of `BᵀB` whose eigenvalue is below `rank_tol · λ_max` + /// (squared singular values; the count above the threshold is + /// `rank(B)`, so the degrees of freedom are `n − rank(B)`). The + /// computation densifies `B` and `P`, so it is `O(n³)` — intended, like + /// sIPOPT's reduced Hessian, for QPs with a modest number of variables + /// (the parametric step stays sparse and is the workhorse for large + /// problems). + pub fn reduced_hessian(&self, rank_tol: f64) -> ReducedHessian { + let n = self.n; + + // Active Jacobian B (m_act × n), dense row-major: equality rows, + // then active inequality rows, then active variable-bound rows. + let m_act = self.m_eq + self.active_ineq.len() + self.active_bound_vars.len(); + let mut b = vec![0.0; m_act * n]; + for t in &self.prob.a { + b[t.row * n + t.col] += t.val; + } + let mut row = self.m_eq; + for &i in &self.active_ineq { + for t in self.prob.g.iter().filter(|t| t.row == i) { + b[row * n + t.col] += t.val; + } + row += 1; + } + for &j in &self.active_bound_vars { + b[row * n + j] += 1.0; + row += 1; + } + + // Null space of B from the eigenvectors of BᵀB (symmetric, n×n, + // column-major for `symmetric_eigen`). BᵀB[a,c] = Σ_r B[r,a]·B[r,c]. + let mut btb = vec![0.0; n * n]; + for r in 0..m_act { + for a in 0..n { + let bra = b[r * n + a]; + if bra == 0.0 { + continue; + } + for c in 0..n { + btb[a * n + c] += bra * b[r * n + c]; + } + } + } + let mut sv = vec![0.0; n]; + let mut vecs = vec![0.0; n * n]; + symmetric_eigen(&btb, n, &mut sv, &mut vecs); // ascending eigenvalues + + // rank(B) = # squared-singular-values above the relative threshold; + // the null space is spanned by the eigenvectors of the rest (the + // smallest, ≈ 0). With ascending order those are the first columns. + let max_sv = sv.last().copied().unwrap_or(0.0).max(0.0); + let thresh = rank_tol * max_sv; + let rank = sv.iter().filter(|&&l| l > thresh).count(); + let n_dof = n - rank; + + // Dense symmetric P (n×n) from its lower triangle. + let mut p = vec![0.0; n * n]; + for t in &self.prob.p_lower { + p[t.row * n + t.col] += t.val; + if t.row != t.col { + p[t.col * n + t.row] += t.val; + } + } + + // H_R = Zᵀ P Z, with Z = first `n_dof` columns of `vecs` (the null + // space). Column-major throughout: column j of Z is vecs[j*n + ·]. + let z = |j: usize, r: usize| vecs[j * n + r]; + // PZ (n × n_dof), column-major. + let mut pz = vec![0.0; n * n_dof]; + for j in 0..n_dof { + for (r, pzr) in pz[j * n..(j + 1) * n].iter_mut().enumerate() { + let mut acc = 0.0; + for c in 0..n { + acc += p[r * n + c] * z(j, c); + } + *pzr = acc; + } + } + // H_R (n_dof × n_dof), column-major: H_R[i,j] = z_iᵀ (P z_j). + let mut hr = vec![0.0; n_dof * n_dof]; + for j in 0..n_dof { + for i in 0..n_dof { + let mut acc = 0.0; + for r in 0..n { + acc += z(i, r) * pz[j * n + r]; + } + hr[j * n_dof + i] = acc; + } + } + + // Eigendecompose the (small) reduced Hessian. + let mut eigenvalues = vec![0.0; n_dof]; + let mut eigenvectors = vec![0.0; n_dof * n_dof]; + symmetric_eigen(&hr, n_dof, &mut eigenvalues, &mut eigenvectors); + + ReducedHessian { + n_dof, + matrix: hr, + eigenvalues, + eigenvectors, + } + } + + /// [`reduced_hessian`](Self::reduced_hessian) with a relative rank + /// tolerance of `1e-9`. + pub fn reduced_hessian_default(&self) -> ReducedHessian { + self.reduced_hessian(1e-9) + } +} + +/// The reduced Hessian `H_R = Zᵀ P Z` of a QP on its active manifold, with +/// its eigendecomposition. All matrices are column-major and `n_dof × n_dof` +/// (`n_dof` = degrees of freedom = `n − rank` of the active Jacobian). +#[derive(Debug, Clone, PartialEq)] +pub struct ReducedHessian { + /// Degrees of freedom: the dimension of every field here. + pub n_dof: usize, + /// The reduced Hessian `H_R`, column-major `n_dof × n_dof` (symmetric). + pub matrix: Vec, + /// Eigenvalues of `H_R`, ascending (length `n_dof`). + pub eigenvalues: Vec, + /// Eigenvectors, column-major `n_dof × n_dof`; column `j` pairs with + /// `eigenvalues[j]`. + pub eigenvectors: Vec, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ipm::solve_qp_ipm; + use crate::qp::Triplet; + use pounce_feral::FeralSolverInterface; + + fn backend() -> Box { + Box::new(FeralSolverInterface::new()) + } + + /// `min ½‖x‖² s.t. x₀ + x₁ = b` (b = 2). The optimum is the projection + /// of the origin onto the line: `x = (b/2, b/2)`, so `dx/db = (½, ½)` + /// exactly. The parametric step for `Δb` must reproduce that. + #[test] + fn parametric_step_matches_closed_form_equality() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + c: vec![0.0, 0.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + b: vec![2.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let sol = solve_qp_ipm(&prob, &QpOptions::default(), backend); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 1.0).abs() < 1e-7 && (sol.x[1] - 1.0).abs() < 1e-7); + + let mut sens = QpSensitivity::build_default(&prob, &sol, backend).unwrap(); + let dx = sens.parametric_step(&[0], &[1.0]); // Δb = +1 + assert!((dx[0] - 0.5).abs() < 1e-6, "dx0 = {}", dx[0]); + assert!((dx[1] - 0.5).abs() < 1e-6, "dx1 = {}", dx[1]); + + // Predictor lands on the exact re-solve for the perturbed b. + let mut prob2 = prob.clone(); + prob2.b = vec![3.0]; + let sol2 = solve_qp_ipm(&prob2, &QpOptions::default(), backend); + assert!((sol.x[0] + dx[0] - sol2.x[0]).abs() < 1e-6); + assert!((sol.x[1] + dx[1] - sol2.x[1]).abs() < 1e-6); + } + + /// With an **active inequality** in the active set, the predictor must + /// still match the re-solve. `min ½‖x‖² s.t. x₀+x₁ = b, x₀ ≥ 1`. At + /// b = 1 the unconstrained projection would be (0.5, 0.5) but `x₀ ≥ 1` + /// binds, giving `x = (1, 0)`. Perturbing b shifts along the active + /// face: `x = (1, b−1)`, so `dx/db = (0, 1)`. + #[test] + fn parametric_step_with_active_inequality() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + c: vec![0.0, 0.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + b: vec![1.0], + g: vec![Triplet::new(0, 0, -1.0)], // −x₀ ≤ −1 ⇔ x₀ ≥ 1 + h: vec![-1.0], + lb: vec![], + ub: vec![], + }; + let sol = solve_qp_ipm(&prob, &QpOptions::default(), backend); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 1.0).abs() < 1e-6 && sol.x[1].abs() < 1e-6); + assert!(sol.z[0] > 1e-6, "inequality should be active"); + + let mut sens = QpSensitivity::build_default(&prob, &sol, backend).unwrap(); + let dx = sens.parametric_step(&[0], &[0.5]); + assert!(dx[0].abs() < 1e-6, "dx0 = {} (should stay on x₀=1)", dx[0]); + assert!((dx[1] - 0.5).abs() < 1e-6, "dx1 = {}", dx[1]); + } + + /// A non-optimal solution has no well-defined active set. + #[test] + fn build_rejects_non_optimal() { + let prob = QpProblem { + n: 1, + p_lower: vec![], + c: vec![-1.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, -1.0)], + h: vec![0.0], // x ≥ 0, min −x ⇒ unbounded + lb: vec![], + ub: vec![], + }; + let sol = solve_qp_ipm(&prob, &QpOptions::default(), backend); + assert_ne!(sol.status, QpStatus::Optimal); + assert!(matches!( + QpSensitivity::build_default(&prob, &sol, backend), + Err(SensError::NotOptimal) + )); + } + + /// Unconstrained-direction reduced Hessian equals `P` itself: with no + /// active constraints the null space is all of ℝⁿ, so `H_R = ZᵀPZ = P` + /// (up to an orthonormal rotation, hence the eigenvalues match `P`). + /// `min ½(2x₀² + 3x₁²)` has no binding constraints; eigenvalues = {2, 3}. + #[test] + fn reduced_hessian_unconstrained_is_p() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 3.0)], + c: vec![0.0, 0.0], + a: vec![], + b: vec![], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let sol = solve_qp_ipm(&prob, &QpOptions::default(), backend); + assert_eq!(sol.status, QpStatus::Optimal); + let sens = QpSensitivity::build_default(&prob, &sol, backend).unwrap(); + let rh = sens.reduced_hessian_default(); + assert_eq!(rh.n_dof, 2); + assert!( + (rh.eigenvalues[0] - 2.0).abs() < 1e-9, + "{:?}", + rh.eigenvalues + ); + assert!( + (rh.eigenvalues[1] - 3.0).abs() < 1e-9, + "{:?}", + rh.eigenvalues + ); + } + + /// One equality constraint removes one degree of freedom. `min ½‖x‖²` + /// (P = I) on the 3-D space with `x₀ + x₁ + x₂ = b` leaves a 2-D null + /// space; the reduced Hessian is the 2×2 identity (both curvatures = 1). + #[test] + fn reduced_hessian_drops_one_dof_per_active_constraint() { + let prob = QpProblem { + n: 3, + p_lower: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(1, 1, 1.0), + Triplet::new(2, 2, 1.0), + ], + c: vec![0.0, 0.0, 0.0], + a: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(0, 2, 1.0), + ], + b: vec![1.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let sol = solve_qp_ipm(&prob, &QpOptions::default(), backend); + assert_eq!(sol.status, QpStatus::Optimal); + let sens = QpSensitivity::build_default(&prob, &sol, backend).unwrap(); + let rh = sens.reduced_hessian_default(); + assert_eq!(rh.n_dof, 2, "one equality ⇒ 2 DOF"); + for &ev in &rh.eigenvalues { + assert!((ev - 1.0).abs() < 1e-9, "eig {ev}"); + } + } + + /// A non-identity reduced Hessian: `min ½xᵀPx` with a coupled `P` and an + /// equality that pins the sum, cross-checked against the hand-computed + /// `ZᵀPZ` for the unit null-space direction `z = (1,−1)/√2`. + #[test] + fn reduced_hessian_value_matches_hand_projection() { + // P = [[3, 1], [1, 2]]; constraint x₀ + x₁ = 0 ⇒ Z = (1,−1)/√2. + // zᵀPz = (3 − 1 − 1 + 2)/2 = 3/2. + let prob = QpProblem { + n: 2, + p_lower: vec![ + Triplet::new(0, 0, 3.0), + Triplet::new(1, 0, 1.0), + Triplet::new(1, 1, 2.0), + ], + c: vec![0.0, 0.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + b: vec![0.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let sol = solve_qp_ipm(&prob, &QpOptions::default(), backend); + assert_eq!(sol.status, QpStatus::Optimal); + let sens = QpSensitivity::build_default(&prob, &sol, backend).unwrap(); + let rh = sens.reduced_hessian_default(); + assert_eq!(rh.n_dof, 1); + assert!( + (rh.eigenvalues[0] - 1.5).abs() < 1e-9, + "H_R = {:?}", + rh.eigenvalues + ); + assert!((rh.matrix[0] - 1.5).abs() < 1e-9); + } +} diff --git a/crates/pounce-convex/src/sos.rs b/crates/pounce-convex/src/sos.rs new file mode 100644 index 00000000..334015e9 --- /dev/null +++ b/crates/pounce-convex/src/sos.rs @@ -0,0 +1,955 @@ +//! Sum-of-squares (SOS) **global lower bounds** for polynomial minimization +//! — the first step of polynomial global optimization on the SDP solver. +//! +//! For a polynomial `p(x)`, the SOS relaxation of `min_x p(x)` is +//! +//! ```text +//! max γ s.t. p(x) − γ is a sum of squares, +//! ``` +//! +//! and `p(x) − γ` is SOS iff there is a PSD Gram matrix `Q ⪰ 0` with +//! `p(x) − γ = z(x)ᵀ Q z(x)`, where `z(x)` is the vector of monomials up to +//! degree `d = ⌈deg p / 2⌉`. Matching the coefficient of each monomial `xᵅ` +//! turns this into a semidefinite program: +//! +//! ```text +//! max γ s.t. Σ_{βᵢ+βⱼ = α} Q_{ij} = p_α − γ·[α = 0], Q ⪰ 0. +//! ``` +//! +//! The optimal `γ*` is a **certified global lower bound**: `γ* ≤ min_x p(x)` +//! always, with equality whenever `p − p*` is itself SOS (e.g. univariate +//! polynomials, quadratics, and many low-degree cases — by Hilbert's +//! theorem not *every* nonnegative polynomial is SOS, so in general `γ*` can +//! be a strict lower bound). This is built as a conic program (one +//! [`crate::ConeSpec::Psd`] block plus coefficient-matching equalities) and +//! solved through [`crate::solve_socp_ipm`]. + +use crate::cones::psd::svec_index; +use crate::ipm::{solve_socp_ipm, QpOptions}; +use crate::qp::{QpProblem, QpStatus, Triplet}; +use crate::ConeSpec; +use pounce_linalg::symmetric_eigen; +use pounce_linsol::SparseSymLinearSolverInterface; +use std::collections::HashMap; + +/// A sparse multivariate polynomial over `n_vars` variables: a list of +/// `(exponent vector, coefficient)` terms. The exponent vector has length +/// `n_vars`; e.g. over `(x, y)` the term `3·x²y` is `(vec![2, 1], 3.0)`. +#[derive(Debug, Clone)] +pub struct Polynomial { + pub n_vars: usize, + pub terms: Vec<(Vec, f64)>, +} + +impl Polynomial { + pub fn new(n_vars: usize, terms: Vec<(Vec, f64)>) -> Self { + Polynomial { n_vars, terms } + } + + /// Total degree (the largest term-exponent sum); `0` for a constant. + pub fn degree(&self) -> usize { + self.terms + .iter() + .map(|(e, _)| e.iter().sum::()) + .max() + .unwrap_or(0) + } + + /// Coefficients keyed by exponent vector (summing any duplicate terms). + fn coeff_map(&self) -> HashMap, f64> { + let mut m: HashMap, f64> = HashMap::new(); + for (e, c) in &self.terms { + *m.entry(e.clone()).or_insert(0.0) += c; + } + m + } +} + +/// A constrained polynomial program `min p(x) s.t. gᵢ(x) ≥ 0, hⱼ(x) = 0`. +#[derive(Debug, Clone)] +pub struct PolyProblem { + pub n_vars: usize, + pub objective: Polynomial, + /// Inequality constraints `gᵢ(x) ≥ 0`. + pub inequalities: Vec, + /// Equality constraints `hⱼ(x) = 0`. + pub equalities: Vec, +} + +impl PolyProblem { + pub fn new(objective: Polynomial) -> Self { + let n_vars = objective.n_vars; + PolyProblem { + n_vars, + objective, + inequalities: Vec::new(), + equalities: Vec::new(), + } + } + + /// Add an inequality `g(x) ≥ 0`. + pub fn ge(mut self, g: Polynomial) -> Self { + self.inequalities.push(g); + self + } + + /// Add an equality `h(x) = 0`. + pub fn eq(mut self, h: Polynomial) -> Self { + self.equalities.push(h); + self + } +} + +/// Result of the SOS relaxation. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct SosBound { + /// The certified global lower bound `γ* ≤ min_x p(x)`. + pub lower_bound: f64, + /// Solve status of the underlying SDP. + pub status: QpStatus, +} + +/// All monomial exponent vectors over `n` variables with total degree +/// `≤ max_deg`, in a fixed (recursive) order. +fn monomials(n: usize, max_deg: usize) -> Vec> { + let mut out = Vec::new(); + let mut cur = vec![0usize; n]; + fn rec(pos: usize, remaining: usize, cur: &mut [usize], out: &mut Vec>) { + if pos == cur.len() { + out.push(cur.to_vec()); + return; + } + for e in 0..=remaining { + cur[pos] = e; + rec(pos + 1, remaining - e, cur, out); + } + cur[pos] = 0; + } + rec(0, max_deg, &mut cur, &mut out); + out +} + +/// Build and solve the unconstrained SOS lower-bound SDP for `p`, returning +/// the certified global lower bound. See the module docs for the model. +pub fn sos_lower_bound(p: &Polynomial, mut make_backend: F) -> SosBound +where + F: FnMut() -> Box, +{ + sos_lower_bound_opts(p, &sos_opts(), &mut make_backend) +} + +/// [`sos_lower_bound`] with explicit solver options. +pub fn sos_lower_bound_opts(p: &Polynomial, opts: &QpOptions, make_backend: F) -> SosBound +where + F: FnMut() -> Box, +{ + sos_constrained_lower_bound_opts(&PolyProblem::new(p.clone()), None, opts, make_backend) +} + +/// SOS / Lasserre lower bound for a **constrained** polynomial program +/// `min p s.t. gᵢ ≥ 0, hⱼ = 0` at relaxation order `order` (defaults to the +/// minimum admissible). Uses Putinar's representation +/// +/// ```text +/// p(x) − γ = σ₀(x) + Σᵢ σᵢ(x) gᵢ(x) + Σⱼ λⱼ(x) hⱼ(x), +/// ``` +/// +/// with `σ₀, σᵢ` SOS (PSD Gram blocks; the *localizing* multipliers `σᵢ` +/// use the smaller basis of degree `d − ⌈deg gᵢ/2⌉`) and `λⱼ` free +/// polynomials. The returned `γ*` is a certified lower bound on `min p` over +/// the feasible set; raising `order` tightens it (the Lasserre hierarchy). +pub fn sos_constrained_lower_bound( + prob: &PolyProblem, + order: Option, + make_backend: F, +) -> SosBound +where + F: FnMut() -> Box, +{ + sos_constrained_lower_bound_opts(prob, order, &sos_opts(), make_backend) +} + +/// Default solver options for an SOS/moment SDP. +/// +/// SOS relaxations are *degenerate by design*: an exact relaxation has a +/// rank-deficient optimal moment matrix sitting on the PSD-cone boundary, where +/// the Nesterov–Todd scaling has unbounded dynamic range. The infeasible-start +/// symmetric driver stalls or diverges there (e.g. the order-3 trace-penalty +/// refinement ran to the iteration limit and drifted to a `-6e7` "bound"); +/// the homogeneous self-dual embedding stays well-conditioned on the same +/// problems (≈10 iterations), so SOS solves default to it. +fn sos_opts() -> QpOptions { + QpOptions { + use_hsde: true, + ..QpOptions::default() + } +} + +/// The moment-side bookkeeping needed to recover the solution from the SDP +/// dual: the σ₀ monomial basis (= the moment-matrix index set) and the map +/// from a monomial `α` to the coefficient-matching equality whose dual +/// multiplier is the moment `y_α`. +struct MomentInfo { + n_vars: usize, + d: usize, + basis0: Vec>, + row_of: HashMap, usize>, +} + +/// Build the SOS / Putinar SDP for `prob` at the given (clamped) order, +/// returning the conic program, its cones, and the moment bookkeeping. +/// +/// `refine` selects the objective. `None` builds the ordinary lower-bound SDP +/// (`max γ` s.t. `p − γ` is in the Putinar cone) whose dual moments are the +/// analytic-center optimum. `Some(ε)` builds the **facial-reduction** SDP: the +/// objective polynomial is perturbed to `p + ε·θ` with the trace polynomial +/// `θ = Σ_{|β|≤d} x^{2β}`. Its dual moments then minimize `L(p) + ε·L(θ)` — +/// i.e. they pick the minimum-trace (lowest-rank) moment matrix among the +/// near-optimal ones, a standard nuclear-norm/low-rank surrogate. Because +/// `p + ε·θ` is coercive this stays as well-conditioned as the unperturbed +/// solve (unlike pinning `L(p)=γ*`, which is degenerate when `γ*≈0`), and the +/// recovered moment matrix is flat even when the optimum is non-unique. The +/// reported bound still comes from the unperturbed solve. +fn build_sos_sdp( + prob: &PolyProblem, + order: Option, + refine: Option, +) -> (QpProblem, Vec, MomentInfo) { + let n = prob.n_vars; + let r2 = std::f64::consts::SQRT_2; + + // Minimum relaxation order, then honor a user-requested (larger) order. + let mut d_min = prob.objective.degree().div_ceil(2); + for g in &prob.inequalities { + d_min = d_min.max(g.degree().div_ceil(2)); + } + for h in &prob.equalities { + d_min = d_min.max(h.degree().div_ceil(2)); + } + let d = order.map_or(d_min, |o| o.max(d_min)); + let basis0 = monomials(n, d); // σ₀ basis = moment-matrix index set + + // Column layout: x = (γ, svec(Q₀), svec(Q₁)…, free λ coefficients…). + let mut col = 1usize; + let mut cones: Vec = Vec::new(); + let mut g_rows: Vec = Vec::new(); + let mut g_h: Vec = Vec::new(); + let mut by_mono: HashMap, Vec<(usize, f64)>> = HashMap::new(); + let unit = [(vec![0usize; n], 1.0)]; // weight ≡ 1 for σ₀ + + // PSD (SOS) blocks: σ₀ (weight 1, basis degree d), then one localizing + // multiplier per inequality (weight gᵢ, basis degree d − ⌈deg gᵢ/2⌉). + let psd_specs = std::iter::once((d, &unit[..])).chain( + prob.inequalities + .iter() + .map(|g| (d - g.degree().div_ceil(2), &g.terms[..])), + ); + for (deg, weight) in psd_specs { + let basis = monomials(n, deg); + let bn = basis.len(); + let col_base = col; + for i in 0..bn { + for j in 0..=i { + let coef0 = if i == j { 1.0 } else { r2 }; + let qcol = col_base + svec_index(bn, i, j); + let base: Vec = basis[i].iter().zip(&basis[j]).map(|(a, b)| a + b).collect(); + for (delta, wc) in weight { + let alpha: Vec = base.iter().zip(delta).map(|(a, dd)| a + dd).collect(); + by_mono.entry(alpha).or_default().push((qcol, coef0 * wc)); + } + } + } + let sd = bn * (bn + 1) / 2; + for k in 0..sd { + let r = g_h.len(); + g_rows.push(Triplet::new(r, col_base + k, -1.0)); + g_h.push(0.0); + } + cones.push(ConeSpec::Psd(bn)); + col += sd; + } + + // Free multipliers λⱼ for equalities: a free coefficient per monomial of + // degree ≤ 2d − deg(hⱼ), contributing (× hⱼ's terms) with no cone. + for h in &prob.equalities { + let basis = monomials(n, 2 * d - h.degree()); + for nu in &basis { + let lcol = col; + col += 1; + for (delta, hc) in &h.terms { + let alpha: Vec = nu.iter().zip(delta).map(|(a, dd)| a + dd).collect(); + by_mono.entry(alpha).or_default().push((lcol, *hc)); + } + } + } + + let n_x = col; + + // Coefficient-matching RHS: the objective `p`, perturbed by `ε·θ` (with the + // trace polynomial `θ = Σ_b x^{2b}`) when doing the facial-reduction solve. + let pc = prob.objective.coeff_map(); + let mut rhs = pc.clone(); + if let Some(eps) = refine { + for b in &basis0 { + let dbl: Vec = b.iter().map(|e| 2 * e).collect(); + *rhs.entry(dbl).or_insert(0.0) += eps; + } + } + + // One coefficient-matching equality per distinct monomial; record the + // monomial→row map so the equality duals can be read back as moments. + let zero_exp = vec![0usize; n]; + let mut a: Vec = Vec::new(); + let mut b: Vec = Vec::new(); + let mut row_of: HashMap, usize> = HashMap::new(); + for (alpha, terms) in &by_mono { + let row = b.len(); + for &(c, coef) in terms { + a.push(Triplet::new(row, c, coef)); + } + if *alpha == zero_exp { + a.push(Triplet::new(row, 0, 1.0)); // + γ + } + b.push(rhs.get(alpha).copied().unwrap_or(0.0)); + row_of.insert(alpha.clone(), row); + } + + // Objective: maximize γ ⇔ minimize −γ. (The refinement biases the dual + // moments toward low trace purely through the perturbed RHS above.) + let mut c = vec![0.0; n_x]; + c[0] = -1.0; + + let qp = QpProblem { + n: n_x, + p_lower: Vec::new(), + c, + a, + b, + g: g_rows, + h: g_h, + lb: Vec::new(), + ub: Vec::new(), + }; + ( + qp, + cones, + MomentInfo { + n_vars: n, + d, + basis0, + row_of, + }, + ) +} + +/// [`sos_constrained_lower_bound`] with explicit solver options. +pub fn sos_constrained_lower_bound_opts( + prob: &PolyProblem, + order: Option, + opts: &QpOptions, + make_backend: F, +) -> SosBound +where + F: FnMut() -> Box, +{ + let (qp, cones, _moments) = build_sos_sdp(prob, order, None); + let sol = solve_socp_ipm(&qp, &cones, opts, make_backend); + SosBound { + lower_bound: sol.x.first().copied().unwrap_or(f64::NEG_INFINITY), + status: sol.status, + } +} + +/// The result of [`sos_minimize`]: the certified bound plus, when the moment +/// matrix is **flat** (exact relaxation), the global minimizer(s). +/// +/// `is_exact` is a *sufficient* exactness certificate: when it holds, +/// `lower_bound` is provably the global minimum and `minimizers` are the +/// global optimizers. +/// +/// An interior-point solver returns the **maximum-rank** (analytic-center) +/// optimal moment matrix, which is flat only when the optimal moment matrix is +/// unique — so a non-unique optimum would defeat flat truncation. To recover +/// these cases [`sos_minimize`] applies **facial reduction**: when the central +/// moment matrix is not flat it re-solves with a small trace penalty (a +/// low-rank surrogate) that collapses the spurious rank, so a non-unique but +/// exact optimum still certifies and all of its minimizers are extracted. +/// `is_exact` can still be `false` — e.g. when the relaxation order is too low +/// for flatness to be attainable (the moment-matrix rank exceeds the lower +/// basis dimension), or for a genuinely non-SOS-exact relaxation — but +/// `lower_bound` is a valid lower bound regardless. +#[derive(Debug, Clone, PartialEq)] +pub struct SosSolution { + /// Certified global lower bound `γ*` (= the global minimum when `is_exact`). + pub lower_bound: f64, + pub status: QpStatus, + /// `true` when the moment matrix is flat (`rank M_d = rank M_{d-1}`): the + /// relaxation is then exact, so `lower_bound` is the global minimum. + pub is_exact: bool, + /// Number of global minimizers (the flat moment-matrix rank) when exact. + pub num_minimizers: usize, + /// The extracted global minimizers (all `num_minimizers` atoms) when the + /// moment matrix is flat; recovered via the self-adjoint multiplication + /// operators in the moment inner product (symmetric eigensolver only). + pub minimizers: Vec>, +} + +/// Solve `prob` by the SOS/Lasserre relaxation **and** recover the solution +/// from the moment matrix: certify exactness via flat truncation and extract +/// the global minimizer when it is unique. See [`SosSolution`]. +pub fn sos_minimize(prob: &PolyProblem, order: Option, mut make_backend: F) -> SosSolution +where + F: FnMut() -> Box, +{ + let opts = sos_opts(); + let (qp, cones, mi) = build_sos_sdp(prob, order, None); + let sol = solve_socp_ipm(&qp, &cones, &opts, &mut make_backend); + let lower_bound = sol.x.first().copied().unwrap_or(f64::NEG_INFINITY); + if sol.status != QpStatus::Optimal { + return SosSolution { + lower_bound, + status: sol.status, + is_exact: false, + num_minimizers: 0, + minimizers: Vec::new(), + }; + } + + let mut rec = recover_from_moments(&mi, &sol.y); + + // Facial reduction. The interior-point solver lands on the analytic-center + // (maximum-rank) optimal moment matrix, which is flat only when the optimum + // is unique; a non-unique optimum (free moment directions, or spurious + // pseudo-moments invisible to a finite relaxation) inflates the rank and + // defeats flat truncation. Re-solve with a small trace penalty `ε·θ` on the + // objective (a low-rank / nuclear-norm surrogate): its moments collapse the + // spurious rank, so an exact relaxation now certifies and the minimizers + // can be extracted. The reported bound stays the unperturbed `γ*`. + if !rec.is_exact { + const TRACE_EPS: f64 = 1e-4; + let (qp2, cones2, mi2) = build_sos_sdp(prob, order, Some(TRACE_EPS)); + let sol2 = solve_socp_ipm(&qp2, &cones2, &opts, &mut make_backend); + if sol2.status == QpStatus::Optimal { + let rec2 = recover_from_moments(&mi2, &sol2.y); + if rec2.is_exact { + rec = rec2; + } + } + } + + SosSolution { + lower_bound, + status: sol.status, + is_exact: rec.is_exact, + num_minimizers: rec.num_minimizers, + minimizers: rec.minimizers, + } +} + +/// Flat-truncation test + minimizer extraction from an SDP solution's moments. +struct Recovery { + is_exact: bool, + num_minimizers: usize, + minimizers: Vec>, +} + +/// Read the moment matrix out of the equality duals `y` (`y_α = y[row_of(α)]`, +/// with `y_0 = 1` by γ-stationarity up to a global sign), test flat truncation +/// (`rank M_d = rank M_{d−1}`), and extract the global minimizers when flat. +fn recover_from_moments(mi: &MomentInfo, y: &[f64]) -> Recovery { + let moment = |alpha: &[usize]| -> f64 { y[mi.row_of[alpha]] }; + let zero = vec![0usize; mi.n_vars]; + let sign = if moment(&zero) < 0.0 { -1.0 } else { 1.0 }; + + // Moment matrix M_d[i][j] = y_{basis0ᵢ + basis0ⱼ} (row-major). + let big_n = mi.basis0.len(); + let mut m = vec![0.0; big_n * big_n]; + for i in 0..big_n { + for j in 0..big_n { + let a: Vec = mi.basis0[i] + .iter() + .zip(&mi.basis0[j]) + .map(|(p, q)| p + q) + .collect(); + m[i * big_n + j] = sign * moment(&a); + } + } + let rank_full = psd_rank(&m, big_n); + + // Flat truncation: compare with the rank on the degree-≤(d−1) sub-basis. + let is_exact = if mi.d == 0 { + true // a constant objective is trivially exact + } else { + let lower_idx: Vec = (0..big_n) + .filter(|&i| mi.basis0[i].iter().sum::() < mi.d) + .collect(); + let sub_n = lower_idx.len(); + let mut sub = vec![0.0; sub_n * sub_n]; + for (a, &ia) in lower_idx.iter().enumerate() { + for (b, &ib) in lower_idx.iter().enumerate() { + sub[a * sub_n + b] = m[ia * big_n + ib]; + } + } + psd_rank(&sub, sub_n) == rank_full + }; + + let num_minimizers = if is_exact { rank_full } else { 0 }; + let minimizers = if is_exact && rank_full >= 1 && mi.d >= 1 { + extract_atoms(mi, rank_full, |alpha| sign * y[mi.row_of[alpha]]) + } else { + Vec::new() + }; + + Recovery { + is_exact, + num_minimizers, + minimizers, + } +} + +/// Extract the `r` global minimizers (atoms of the optimal measure) from a +/// flat moment matrix, using only the symmetric eigensolver. +/// +/// Multiplication by a real variable `x_k` is **self-adjoint** in the moment +/// inner product `⟨f,g⟩ = L(fg)`, so whitening the degree-≤(d−1) moment +/// matrix `M` (`Wᵀ M W = I_r`) turns each multiplication operator into a +/// symmetric `r×r` matrix `B_k = Wᵀ M^{(k)} W`, where `M^{(k)}_{ij} = +/// y_{βᵢ+βⱼ+eₖ}` (a shifted moment matrix, available because flatness keeps +/// the degree ≤ 2d−1). The `B_k` commute, so a generic combination +/// `Σ cₖ Bₖ` is symmetric with the *common* eigenvectors `q_t`; the atoms' +/// coordinates are the Rayleigh quotients `x*_{t,k} = q_tᵀ Bₖ q_t`. +fn extract_atoms(mi: &MomentInfo, r: usize, moment: impl Fn(&[usize]) -> f64) -> Vec> { + let n = mi.n_vars; + // Quotient basis: monomials of degree ≤ d−1 (flatness ⇒ these span it). + let sub: Vec> = mi + .basis0 + .iter() + .filter(|b| b.iter().sum::() < mi.d) + .cloned() + .collect(); + let s = sub.len(); + if s < r || r == 0 { + return Vec::new(); + } + let mono = |i: usize, j: usize, shift: Option| -> Vec { + (0..n) + .map(|t| sub[i][t] + sub[j][t] + usize::from(shift == Some(t))) + .collect() + }; + + // M (s×s) and its top-r eigenpairs → whitening W (s×r), Wᵀ M W = I_r. + let mut m = vec![0.0; s * s]; + for i in 0..s { + for j in 0..s { + m[i * s + j] = moment(&mono(i, j, None)); + } + } + let mut vals = vec![0.0; s]; + let mut vecs = vec![0.0; s * s]; // column-major eigenvectors, ascending + if !symmetric_eigen(&m, s, &mut vals, &mut vecs) { + return Vec::new(); + } + // W column t ← eigenvector (s−1−t) scaled by 1/√λ. + let mut w = vec![0.0; s * r]; // row-major s×r + for t in 0..r { + let e = s - 1 - t; + let scale = 1.0 / vals[e].max(1e-12).sqrt(); + for i in 0..s { + w[i * r + t] = vecs[e * s + i] * scale; + } + } + + // Whitened multiplication matrices B_k = Wᵀ M^{(k)} W (r×r, symmetric). + let mut bk: Vec> = Vec::with_capacity(n); + for k in 0..n { + let mut mk = vec![0.0; s * s]; + for i in 0..s { + for j in 0..s { + mk[i * s + j] = moment(&mono(i, j, Some(k))); + } + } + // B = Wᵀ Mk W. + let mut mw = vec![0.0; s * r]; // Mk · W + for i in 0..s { + for t in 0..r { + let mut acc = 0.0; + for j in 0..s { + acc += mk[i * s + j] * w[j * r + t]; + } + mw[i * r + t] = acc; + } + } + let mut b = vec![0.0; r * r]; + for a in 0..r { + for c in 0..r { + let mut acc = 0.0; + for i in 0..s { + acc += w[i * r + a] * mw[i * r + c]; + } + b[a * r + c] = acc; + } + } + bk.push(b); + } + + // Generic combination Σ cₖ Bₖ; its eigenvectors are the common atoms' + // directions (cₖ = √(k+1) generically separates the combined eigenvalues). + let mut comb = vec![0.0; r * r]; + for (k, b) in bk.iter().enumerate() { + let ck = ((k + 1) as f64).sqrt(); + for idx in 0..r * r { + comb[idx] += ck * b[idx]; + } + } + let mut cvals = vec![0.0; r]; + let mut cvecs = vec![0.0; r * r]; + if !symmetric_eigen(&comb, r, &mut cvals, &mut cvecs) { + return Vec::new(); + } + + // Atom t: coordinate k = q_tᵀ B_k q_t (q_t orthonormal). + let mut atoms = Vec::with_capacity(r); + for t in 0..r { + let q: Vec = (0..r).map(|i| cvecs[t * r + i]).collect(); + let atom: Vec = bk + .iter() + .map(|b| { + let mut acc = 0.0; + for a in 0..r { + for c in 0..r { + acc += q[a] * b[a * r + c] * q[c]; + } + } + acc + }) + .collect(); + atoms.push(atom); + } + atoms +} + +/// Numerical rank of a symmetric PSD matrix (row-major `n×n`) for flat +/// truncation, by the **largest spectral gap**. +/// +/// A fixed relative threshold is fragile here: a flat moment matrix has a few +/// `O(1)` eigenvalues (one per atom) and a noise floor set by the solver's +/// dual accuracy, but where that floor lands varies with the driver — the +/// homogeneous self-dual embedding leaves an `O(1e-5)` residual while the +/// symmetric driver reaches `O(1e-7)`, straddling any single cutoff. What is +/// invariant is the *gap*: there are many orders of magnitude between the +/// smallest true eigenvalue and the largest noise eigenvalue. So we sort the +/// eigenvalues descending and cut at the largest consecutive ratio, searching +/// only within the plausible band `(1e-9, 1e-2)·λ_max` — above the band an +/// eigenvalue is certainly real, below it is certainly numerical zero. With no +/// gap in the band the matrix is effectively full rank over that band. +fn psd_rank(mat: &[f64], n: usize) -> usize { + if n == 0 { + return 0; + } + let mut vals = vec![0.0; n]; + let mut vecs = vec![0.0; n * n]; + if !symmetric_eigen(mat, n, &mut vals, &mut vecs) { + return n; + } + // Eigenvalues descending, floored at 0 (PSD; tiny negatives are noise), + // normalized by λ_max so the bands below are absolute. + let mut d: Vec = vals.iter().rev().map(|&v| v.max(0.0)).collect(); + let max = d[0]; + if max <= 1e-12 { + return 0; + } + for v in &mut d { + *v /= max; + } + const HI: f64 = 1e-2; // ≥ HI ⇒ certainly a real eigenvalue + const LO: f64 = 1e-9; // ≤ LO ⇒ certainly numerical zero + const MIN_GAP: f64 = 1e2; // a real rank cut spans ≥ this ratio + let r_certain = d.iter().filter(|&&v| v >= HI).count(); + let r_possible = d.iter().filter(|&&v| v > LO).count(); + if r_certain == r_possible { + return r_certain; // nothing in the ambiguous band + } + // Cut at the largest consecutive ratio gap within the ambiguous band; if no + // gap clears MIN_GAP, keep every eigenvalue above the numerical-zero floor. + let mut rank = r_possible; + let mut best = MIN_GAP; + for i in r_certain.max(1)..r_possible { + let ratio = d[i - 1] / d[i].max(1e-300); + if ratio > best { + best = ratio; + rank = i; + } + } + rank +} + +#[cfg(test)] +mod tests { + use super::*; + use pounce_feral::FeralSolverInterface; + + fn backend() -> Box { + Box::new(FeralSolverInterface::new()) + } + + #[test] + fn monomial_count_is_binomial() { + // #monomials over n vars of degree ≤ d is C(n+d, d). + assert_eq!(monomials(1, 2).len(), 3); // 1, x, x² + assert_eq!(monomials(2, 1).len(), 3); // 1, x, y + assert_eq!(monomials(2, 2).len(), 6); // 1,x,y,x²,xy,y² + assert_eq!(monomials(3, 2).len(), 10); + } + + #[test] + fn univariate_quartic_known_minimum() { + // p(x) = x⁴ − 2x² + 3. p' = 4x³ − 4x = 0 ⇒ x = 0, ±1; min at ±1 is + // 1 − 2 + 3 = 2. p − 2 = (x² − 1)² is SOS, so the bound is exact. + let p = Polynomial::new(1, vec![(vec![4], 1.0), (vec![2], -2.0), (vec![0], 3.0)]); + let r = sos_lower_bound(&p, backend); + assert_eq!(r.status, QpStatus::Optimal, "{:?}", r.status); + assert!( + (r.lower_bound - 2.0).abs() < 1e-5, + "bound = {}", + r.lower_bound + ); + } + + #[test] + fn shifted_paraboloid_two_vars() { + // p(x,y) = (x−1)² + y² = x² − 2x + 1 + y². Min 0 at (1, 0); SOS-exact. + let p = Polynomial::new( + 2, + vec![ + (vec![2, 0], 1.0), + (vec![1, 0], -2.0), + (vec![0, 0], 1.0), + (vec![0, 2], 1.0), + ], + ); + let r = sos_lower_bound(&p, backend); + assert_eq!(r.status, QpStatus::Optimal, "{:?}", r.status); + assert!(r.lower_bound.abs() < 1e-5, "bound = {}", r.lower_bound); + } + + #[test] + fn constant_polynomial() { + // p ≡ 7: the global minimum (and SOS bound) is 7. + let p = Polynomial::new(1, vec![(vec![0], 7.0)]); + let r = sos_lower_bound(&p, backend); + assert_eq!(r.status, QpStatus::Optimal); + assert!( + (r.lower_bound - 7.0).abs() < 1e-6, + "bound = {}", + r.lower_bound + ); + } + + #[test] + fn quadratic_lower_bound() { + // p(x) = x² − 4x + 5 = (x−2)² + 1. Min 1; basis degree d = 1. + let p = Polynomial::new(1, vec![(vec![2], 1.0), (vec![1], -4.0), (vec![0], 5.0)]); + let r = sos_lower_bound(&p, backend); + assert_eq!(r.status, QpStatus::Optimal); + assert!( + (r.lower_bound - 1.0).abs() < 1e-5, + "bound = {}", + r.lower_bound + ); + } + + #[test] + fn constrained_linear_lower_bound() { + // min x s.t. x − 1 ≥ 0 ⇒ min = 1 (the constraint binds). + let prob = PolyProblem::new(Polynomial::new(1, vec![(vec![1], 1.0)])) + .ge(Polynomial::new(1, vec![(vec![1], 1.0), (vec![0], -1.0)])); + let r = sos_constrained_lower_bound(&prob, None, backend); + assert_eq!(r.status, QpStatus::Optimal, "{:?}", r.status); + assert!( + (r.lower_bound - 1.0).abs() < 1e-5, + "bound = {}", + r.lower_bound + ); + } + + #[test] + fn constrained_nonconvex_box() { + // min −x s.t. 1 − x² ≥ 0 (x ∈ [−1,1]) ⇒ min = −1 at x = 1. + // The localizing multiplier σ₁ (a nonneg scalar) makes the bound + // exact — a nonconvex feasible-set bound from the SDP. + let prob = PolyProblem::new(Polynomial::new(1, vec![(vec![1], -1.0)])) + .ge(Polynomial::new(1, vec![(vec![0], 1.0), (vec![2], -1.0)])); + let r = sos_constrained_lower_bound(&prob, None, backend); + assert_eq!(r.status, QpStatus::Optimal, "{:?}", r.status); + assert!( + (r.lower_bound + 1.0).abs() < 1e-5, + "bound = {}", + r.lower_bound + ); + } + + #[test] + fn constrained_equality_lower_bound() { + // min x² + y² s.t. x + y − 2 = 0 ⇒ min = 2 at (1,1), via a free + // multiplier λ(x,y) for the equality. + let obj = Polynomial::new(2, vec![(vec![2, 0], 1.0), (vec![0, 2], 1.0)]); + let prob = PolyProblem::new(obj).eq(Polynomial::new( + 2, + vec![(vec![1, 0], 1.0), (vec![0, 1], 1.0), (vec![0, 0], -2.0)], + )); + let r = sos_constrained_lower_bound(&prob, None, backend); + assert_eq!(r.status, QpStatus::Optimal, "{:?}", r.status); + assert!( + (r.lower_bound - 2.0).abs() < 1e-5, + "bound = {}", + r.lower_bound + ); + } + + #[test] + fn extract_unique_minimizer_1d() { + // p(x) = x² − 4x + 5 = (x−2)² + 1. Unique min x* = 2, value 1. + let p = Polynomial::new(1, vec![(vec![2], 1.0), (vec![1], -4.0), (vec![0], 5.0)]); + let s = sos_minimize(&PolyProblem::new(p), None, backend); + assert_eq!(s.status, QpStatus::Optimal); + assert!(s.is_exact, "should be flat/exact"); + assert_eq!(s.num_minimizers, 1); + assert_eq!(s.minimizers.len(), 1); + assert!( + (s.minimizers[0][0] - 2.0).abs() < 1e-4, + "x* = {:?}", + s.minimizers[0] + ); + assert!((s.lower_bound - 1.0).abs() < 1e-5); + } + + #[test] + fn extract_unique_minimizer_2d() { + // p(x,y) = (x−1)² + (y−2)². Unique min (1, 2), value 0. + let p = Polynomial::new( + 2, + vec![ + (vec![2, 0], 1.0), + (vec![1, 0], -2.0), + (vec![0, 2], 1.0), + (vec![0, 1], -4.0), + (vec![0, 0], 5.0), + ], + ); + let s = sos_minimize(&PolyProblem::new(p), None, backend); + assert_eq!(s.status, QpStatus::Optimal); + assert!(s.is_exact); + assert_eq!(s.num_minimizers, 1); + let x = &s.minimizers[0]; + assert!( + (x[0] - 1.0).abs() < 1e-4 && (x[1] - 2.0).abs() < 1e-4, + "x* = {x:?}" + ); + } + + #[test] + fn extracts_two_global_minimizers() { + // p(x) = x⁴ − 2x² + 3 has TWO global minimizers x = ±1 (value 2). + // The relaxation is flat (moment-matrix rank 2) and the multi-atom + // extraction recovers both points. + let p = Polynomial::new(1, vec![(vec![4], 1.0), (vec![2], -2.0), (vec![0], 3.0)]); + let s = sos_minimize(&PolyProblem::new(p), None, backend); + assert_eq!(s.status, QpStatus::Optimal); + assert!(s.is_exact, "flat truncation should hold"); + assert_eq!(s.num_minimizers, 2, "two atoms at ±1"); + assert_eq!(s.minimizers.len(), 2); + let mut roots: Vec = s.minimizers.iter().map(|m| m[0]).collect(); + roots.sort_by(|a, b| a.partial_cmp(b).unwrap()); + assert!((roots[0] + 1.0).abs() < 1e-3, "min root {}", roots[0]); + assert!((roots[1] - 1.0).abs() < 1e-3, "max root {}", roots[1]); + assert!((s.lower_bound - 2.0).abs() < 1e-5); + } + + #[test] + fn facial_reduction_recovers_nonunique_minimizers() { + // p(x,y) = (x²−1)² + y², global min 0 at (±1, 0). The objective is + // SOS so the bound is exact (0), but the optimum is non-unique: the + // interior-point solver lands on the analytic-center moment matrix, + // whose rank is inflated by a spurious pseudo-moment direction + // (L(y⁴) > 0 while L(y²) = 0), so plain flat truncation fails. The + // facial-reduction (minimum-trace) re-solve collapses that rank and + // recovers both minimizers. + let p = Polynomial::new( + 2, + vec![ + (vec![4, 0], 1.0), + (vec![2, 0], -2.0), + (vec![0, 0], 1.0), + (vec![0, 2], 1.0), + ], + ); + let s = sos_minimize(&PolyProblem::new(p), None, backend); + assert_eq!(s.status, QpStatus::Optimal); + assert!(s.lower_bound.abs() < 1e-5, "bound = {}", s.lower_bound); + assert!(s.is_exact, "facial reduction should certify exactness"); + assert_eq!(s.num_minimizers, 2, "two atoms at (±1, 0)"); + let mut xs: Vec = s.minimizers.iter().map(|m| m[0]).collect(); + xs.sort_by(|a, b| a.partial_cmp(b).unwrap()); + assert!((xs[0] + 1.0).abs() < 1e-2, "x⁻ = {}", xs[0]); + assert!((xs[1] - 1.0).abs() < 1e-2, "x⁺ = {}", xs[1]); + for atom in &s.minimizers { + assert!(atom[1].abs() < 1e-2, "y = {}", atom[1]); + } + } + + #[test] + fn facial_reduction_three_minimizers_degree_six() { + // p(x) = x²(x−1)²(x+1)² = x⁶ − 2x⁴ + x², a nonnegative sextic with + // THREE global minima (value 0) at x = −1, 0, 1. The order-3 relaxation + // is degenerate (a boundary-rank optimum); the HSDE driver solves it and + // facial reduction recovers all three atoms. + let p = Polynomial::new(1, vec![(vec![6], 1.0), (vec![4], -2.0), (vec![2], 1.0)]); + let s = sos_minimize(&PolyProblem::new(p), None, backend); + assert_eq!(s.status, QpStatus::Optimal, "{:?}", s.status); + assert!(s.lower_bound.abs() < 1e-5, "bound = {}", s.lower_bound); + assert!(s.is_exact, "facial reduction should certify exactness"); + assert_eq!(s.num_minimizers, 3, "three atoms at −1, 0, 1"); + let mut roots: Vec = s.minimizers.iter().map(|m| m[0]).collect(); + roots.sort_by(|a, b| a.partial_cmp(b).unwrap()); + assert!((roots[0] + 1.0).abs() < 1e-2, "{roots:?}"); + assert!(roots[1].abs() < 1e-2, "{roots:?}"); + assert!((roots[2] - 1.0).abs() < 1e-2, "{roots:?}"); + } + + #[test] + fn facial_reduction_four_minimizers_2d_order_three() { + // p(x,y) = (x²−1)² + (y²−1)², four global minima (value 0) at (±1, ±1). + // Four atoms need moment-matrix rank 4, which cannot stabilize against + // the 3-dimensional degree-≤1 subspace until order 3 — a larger, more + // degenerate SDP that only the HSDE driver carries to optimality. + let p = Polynomial::new( + 2, + vec![ + (vec![4, 0], 1.0), + (vec![2, 0], -2.0), + (vec![0, 4], 1.0), + (vec![0, 2], -2.0), + (vec![0, 0], 2.0), + ], + ); + let s = sos_minimize(&PolyProblem::new(p), Some(3), backend); + assert_eq!(s.status, QpStatus::Optimal, "{:?}", s.status); + assert!(s.lower_bound.abs() < 1e-5, "bound = {}", s.lower_bound); + assert!(s.is_exact, "facial reduction should certify exactness"); + assert_eq!(s.num_minimizers, 4, "four atoms at (±1, ±1)"); + for atom in &s.minimizers { + assert!((atom[0].abs() - 1.0).abs() < 2e-2, "x = {}", atom[0]); + assert!((atom[1].abs() - 1.0).abs() < 2e-2, "y = {}", atom[1]); + } + // All four quadrants present. + let mut quad = [false; 4]; + for atom in &s.minimizers { + quad[usize::from(atom[0] > 0.0) + 2 * usize::from(atom[1] > 0.0)] = true; + } + assert!( + quad.iter().all(|&q| q), + "missing a quadrant: {:?}", + s.minimizers + ); + } +} diff --git a/crates/pounce-convex/tests/batch.rs b/crates/pounce-convex/tests/batch.rs new file mode 100644 index 00000000..070053bd --- /dev/null +++ b/crates/pounce-convex/tests/batch.rs @@ -0,0 +1,216 @@ +//! Batched / multiple-RHS convex-QP solving (pounce#74–#77 analogue at +//! the optimization layer). Each batched solution must match the +//! corresponding single-problem solve, in order. + +use pounce_convex::{ + solve_qp_batch, solve_qp_batch_parallel, solve_qp_ipm, solve_qp_multi_rhs, QpOptions, + QpProblem, QpStatus, Triplet, +}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +/// Inner-serial backend for the parallel batch path (outer-parallel / +/// inner-serial); feral's parallel and serial drivers are bit-identical, so +/// results match `backend`. +fn serial_backend() -> Box { + Box::new(FeralSolverInterface::serial()) +} + +/// A simple box-constrained QP `min ½‖x − t‖²·2 ... ` parameterized by a +/// target via the linear term. `c = −2·t` ⇒ unconstrained optimum at `t`, +/// clamped to [0, 1] by the bounds. +fn boxed_qp(c: Vec) -> QpProblem { + let n = c.len(); + QpProblem { + n, + p_lower: (0..n).map(|i| Triplet::new(i, i, 2.0)).collect(), + c, + a: vec![], + b: vec![], + g: vec![], + h: vec![], + lb: vec![0.0; n], + ub: vec![1.0; n], + } +} + +#[test] +fn batch_matches_individual_solves() { + let probs = vec![ + boxed_qp(vec![-1.0, -4.0]), // opt clamps to (0.5, 1.0) + boxed_qp(vec![-4.0, 1.0]), // opt clamps to (1.0, 0.0) + boxed_qp(vec![0.0, 0.0]), // opt at (0, 0) + ]; + let opts = QpOptions::default(); + + let batched = solve_qp_batch(&probs, &opts, backend); + assert_eq!(batched.len(), probs.len()); + + for (i, prob) in probs.iter().enumerate() { + let single = solve_qp_ipm(prob, &opts, backend); + assert_eq!(batched[i].status, QpStatus::Optimal); + assert_eq!(single.status, QpStatus::Optimal); + for j in 0..prob.n { + assert!( + (batched[i].x[j] - single.x[j]).abs() < 1e-9, + "batch[{i}].x[{j}] {} vs single {}", + batched[i].x[j], + single.x[j] + ); + } + assert!((batched[i].obj - single.obj).abs() < 1e-9); + } +} + +#[test] +fn multi_rhs_matches_individual_solves() { + // Same structure (P = 2I, 0 ≤ x ≤ 1), many objectives. + let base = boxed_qp(vec![0.0, 0.0]); + let cs = vec![ + vec![-1.0, -4.0], + vec![-4.0, 1.0], + vec![3.0, -2.0], + vec![0.0, 0.0], + ]; + let opts = QpOptions::default(); + + let many = solve_qp_multi_rhs(&base, &cs, &opts, backend); + assert_eq!(many.len(), cs.len()); + + for (i, c) in cs.iter().enumerate() { + let single = solve_qp_ipm(&boxed_qp(c.clone()), &opts, backend); + assert_eq!(many[i].status, QpStatus::Optimal); + for j in 0..base.n { + assert!( + (many[i].x[j] - single.x[j]).abs() < 1e-9, + "multi[{i}].x[{j}] {} vs single {}", + many[i].x[j], + single.x[j] + ); + } + } + + // Spot-check known clamped optima (IPM tolerance ~1e-4): + // c=(-1,-4) → unconstrained (0.5, 2.0) clamps to (0.5, 1.0). + assert!((many[0].x[0] - 0.5).abs() < 1e-4, "x0={}", many[0].x[0]); + assert!((many[0].x[1] - 1.0).abs() < 1e-4, "x1={}", many[0].x[1]); + // c=(3,-2) → unconstrained (−1.5, 1.0) clamps to (0.0, 1.0). + assert!(many[2].x[0].abs() < 1e-4, "x0={}", many[2].x[0]); + assert!((many[2].x[1] - 1.0).abs() < 1e-4, "x1={}", many[2].x[1]); +} + +#[test] +fn batch_preserves_per_instance_status() { + // Mix a feasible QP with an unbounded one; statuses must line up + // with the inputs by index. + let feasible = boxed_qp(vec![-1.0, -1.0]); + let unbounded = QpProblem { + n: 1, + p_lower: vec![], // LP + c: vec![-1.0], // min −x0 with x0 ≥ 0, no upper bound + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, -1.0)], + h: vec![0.0], + lb: vec![], + ub: vec![], + }; + let probs = vec![feasible, unbounded]; + let res = solve_qp_batch(&probs, &QpOptions::default(), backend); + assert_eq!(res[0].status, QpStatus::Optimal); + assert_eq!(res[1].status, QpStatus::DualInfeasible); +} + +#[test] +fn large_batch_parallel_path() { + // A batch big enough to exercise the dedicated parallel pool (and the + // worker-stack / feral-serial handling that prevents the nested-pool + // stack overflow). Results must match index-wise. + let opts = QpOptions::default(); + let probs: Vec = (0..1500) + .map(|k| { + let t = (k as f64) / 500.0; // sweeps across the box and beyond + boxed_qp(vec![-2.0 * t, -2.0 * (1.0 - t)]) + }) + .collect(); + let batched = solve_qp_batch_parallel(&probs, &opts, serial_backend); + assert_eq!(batched.len(), probs.len()); + // Compare a sample against single solves (full sweep would be slow). + for k in (0..probs.len()).step_by(97) { + assert_eq!(batched[k].status, QpStatus::Optimal, "k={k}"); + let single = solve_qp_ipm(&probs[k], &opts, backend); + for j in 0..probs[k].n { + assert!((batched[k].x[j] - single.x[j]).abs() < 1e-9, "k={k} j={j}"); + } + } +} + +// --- QpFactorization: build-once / solve-many across instances --- + +use pounce_convex::QpFactorization; + +#[test] +fn factorization_handle_matches_one_shot() { + // Fixed structure (P = 2I, 0 ≤ x ≤ 1), many objectives; the handle's + // reused symbolic factor must give the same answers as one-shot solves. + // + // This test is about the *factorization-reuse* mechanism, so it compares + // against the identical algorithm: the build-once handle path runs the + // direct (non-HSDE) IPM on a captured factorization and does not + // Ruiz-equilibrate (it preserves the captured structure across instances), + // so both `use_hsde` and `equilibrate` are disabled on the one-shot too — + // otherwise the two would be different solves and only agree to solver + // tolerance, not the bit-tight match the reuse correctness check wants. + let base = boxed_qp(vec![0.0, 0.0]); + let opts = QpOptions { + use_hsde: false, + equilibrate: false, + ..QpOptions::default() + }; + let mut handle = QpFactorization::build(&base, &opts, backend).expect("build"); + + for c in [ + vec![-1.0, -4.0], + vec![-4.0, 1.0], + vec![3.0, -2.0], + vec![0.0, 0.0], + vec![-2.0, -2.0], + ] { + let prob = boxed_qp(c.clone()); + let reused = handle.solve(&prob); + let one_shot = solve_qp_ipm(&prob, &opts, backend); + assert_eq!(reused.status, QpStatus::Optimal, "c={c:?}"); + for j in 0..base.n { + assert!( + (reused.x[j] - one_shot.x[j]).abs() < 1e-9, + "c={c:?} x[{j}] reused {} vs one-shot {}", + reused.x[j], + one_shot.x[j] + ); + // Bound duals must match too. + assert!((reused.z_lb[j] - one_shot.z_lb[j]).abs() < 1e-6); + assert!((reused.z_ub[j] - one_shot.z_ub[j]).abs() < 1e-6); + } + assert!((reused.obj - one_shot.obj).abs() < 1e-9); + } +} + +#[test] +fn factorization_handle_rejects_pattern_mismatch() { + // Built on a 2-var box QP; solving a 3-var problem must not silently + // reuse the wrong factor — it returns NumericalFailure. + let base = boxed_qp(vec![0.0, 0.0]); + let mut handle = QpFactorization::build(&base, &QpOptions::default(), backend).expect("build"); + + let mismatched = boxed_qp(vec![0.0, 0.0, 0.0]); // n = 3 + let sol = handle.solve(&mismatched); + assert_eq!(sol.status, QpStatus::NumericalFailure); + + // A matching-structure problem still solves fine afterward. + let ok = handle.solve(&boxed_qp(vec![-1.0, -1.0])); + assert_eq!(ok.status, QpStatus::Optimal); +} diff --git a/crates/pounce-convex/tests/bounded_form.rs b/crates/pounce-convex/tests/bounded_form.rs new file mode 100644 index 00000000..ba728aea --- /dev/null +++ b/crates/pounce-convex/tests/bounded_form.rs @@ -0,0 +1,207 @@ +//! Tests for the explicit variable-bound form: `lb ≤ x ≤ ub` as +//! first-class fields on `QpProblem`, solved by bound expansion in the +//! IPM with the bound multipliers reported in `z_lb` / `z_ub`. +//! +//! Each test cross-checks the bounded form against the equivalent +//! G-row encoding so the two representations agree, and checks the +//! KKT stationarity that includes the bound duals. + +use pounce_convex::presolve::solve_with_presolve; +use pounce_convex::{solve_qp_ipm, QpOptions, QpProblem, QpStatus, Triplet, NEG_INF, POS_INF}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +fn solve(prob: &QpProblem) -> pounce_convex::QpSolution { + solve_qp_ipm(prob, &QpOptions::default(), backend) +} + +/// Stationarity with bound duals: Px + c + Aᵀy + Gᵀz − z_lb + z_ub = 0. +fn assert_stationarity(prob: &QpProblem, sol: &pounce_convex::QpSolution, tol: f64) { + let mut g = prob.c.clone(); + prob.p_mul(&sol.x, &mut g); + prob.at_mul(&sol.y, &mut g); + prob.gt_mul(&sol.z, &mut g); + for i in 0..prob.n { + g[i] -= sol.z_lb[i]; + g[i] += sol.z_ub[i]; + } + for (i, gi) in g.iter().enumerate() { + assert!(gi.abs() < tol, "stationarity[{i}] = {gi}"); + } +} + +/// Upper bound binds: min ½(x0−3)²+(x1−4)² with x ≤ (1, +∞). +/// Optimum x0 = 1 (bound active), x1 = 4 (interior). f* = −10.5. +#[test] +fn upper_bound_binds() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + c: vec![-3.0, -4.0], + a: vec![], + b: vec![], + g: vec![], + h: vec![], + lb: vec![NEG_INF, NEG_INF], + ub: vec![1.0, POS_INF], + }; + let sol = solve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 1.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!((sol.x[1] - 4.0).abs() < 1e-6, "x1={}", sol.x[1]); + assert!((sol.obj - (-10.5)).abs() < 1e-6, "obj={}", sol.obj); + // Upper bound on x0 is active with a positive multiplier (= 2). + assert!(sol.z_ub[0] > 1.0, "z_ub[0]={}", sol.z_ub[0]); + assert!(sol.z_lb[0].abs() < 1e-5, "z_lb[0]={}", sol.z_lb[0]); + assert_stationarity(&prob, &sol, 1e-5); +} + +/// Lower bound binds: min ½(x0+3)² with x0 ≥ 0. Optimum x0 = 0. +#[test] +fn lower_bound_binds() { + let prob = QpProblem { + n: 1, + p_lower: vec![Triplet::new(0, 0, 1.0)], + c: vec![3.0], // unconstrained optimum at −3 + a: vec![], + b: vec![], + g: vec![], + h: vec![], + lb: vec![0.0], + ub: vec![POS_INF], + }; + let sol = solve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!(sol.x[0].abs() < 1e-6, "x0={}", sol.x[0]); + assert!(sol.z_lb[0] > 1.0, "z_lb[0]={}", sol.z_lb[0]); + assert_stationarity(&prob, &sol, 1e-5); +} + +/// Box-constrained LP: min −x0 − x1 with 0 ≤ x ≤ 1. Optimum (1, 1). +#[test] +fn box_constrained_lp() { + let prob = QpProblem { + n: 2, + p_lower: vec![], + c: vec![-1.0, -1.0], + a: vec![], + b: vec![], + g: vec![], + h: vec![], + lb: vec![0.0, 0.0], + ub: vec![1.0, 1.0], + }; + let sol = solve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 1.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!((sol.x[1] - 1.0).abs() < 1e-6, "x1={}", sol.x[1]); + assert!((sol.obj - (-2.0)).abs() < 1e-6, "obj={}", sol.obj); + assert_stationarity(&prob, &sol, 1e-5); +} + +/// The bounded form must agree with the equivalent G-row encoding. +#[test] +fn bounded_form_matches_g_row_encoding() { + // min ½‖x‖² + cᵀx, 0 ≤ x ≤ 2. + let bounded = QpProblem { + n: 3, + p_lower: vec![ + Triplet::new(0, 0, 2.0), + Triplet::new(1, 1, 2.0), + Triplet::new(2, 2, 2.0), + ], + c: vec![-5.0, 1.0, -0.5], + a: vec![], + b: vec![], + g: vec![], + h: vec![], + lb: vec![0.0, 0.0, 0.0], + ub: vec![2.0, 2.0, 2.0], + }; + // Same problem with bounds written as 2n G rows. + let mut g = Vec::new(); + let mut h = Vec::new(); + for i in 0..3 { + g.push(Triplet::new(2 * i, i, 1.0)); // x_i ≤ 2 + h.push(2.0); + g.push(Triplet::new(2 * i + 1, i, -1.0)); // −x_i ≤ 0 + h.push(0.0); + } + let g_form = QpProblem { + n: 3, + p_lower: bounded.p_lower.clone(), + c: bounded.c.clone(), + a: vec![], + b: vec![], + g, + h, + lb: vec![], + ub: vec![], + }; + + let sb = solve(&bounded); + let sg = solve(&g_form); + assert_eq!(sb.status, QpStatus::Optimal); + assert_eq!(sg.status, QpStatus::Optimal); + for i in 0..3 { + assert!( + (sb.x[i] - sg.x[i]).abs() < 1e-5, + "x[{i}]: bounded {} vs G-row {}", + sb.x[i], + sg.x[i] + ); + } + assert!( + (sb.obj - sg.obj).abs() < 1e-5, + "obj {} vs {}", + sb.obj, + sg.obj + ); +} + +/// Presolve respects bounds: a singleton equality that fixes a variable +/// outside its box is infeasible. +#[test] +fn presolve_singleton_fix_violates_bound() { + // x0 = 5 but x0 ≤ 1 → infeasible. + let prob = QpProblem { + n: 1, + p_lower: vec![Triplet::new(0, 0, 2.0)], + c: vec![0.0], + a: vec![Triplet::new(0, 0, 1.0)], + b: vec![5.0], + g: vec![], + h: vec![], + lb: vec![NEG_INF], + ub: vec![1.0], + }; + let sol = solve_with_presolve(&prob, |r| solve_qp_ipm(r, &QpOptions::default(), backend)); + assert_eq!(sol.status, QpStatus::PrimalInfeasible); +} + +/// Presolve free-column at a bound: a linear-only variable with positive +/// cost is pushed to its lower bound, and the rest solves normally. +#[test] +fn presolve_free_column_to_lower_bound() { + // min x0² + x1 (x1 linear-only, c=+1 → pushed to lb) s.t. x0 = 2, + // with x1 ∈ [3, 10]. Expect x1 = 3. + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0)], + c: vec![0.0, 1.0], + a: vec![Triplet::new(0, 0, 1.0)], // x0 = 2 + b: vec![2.0], + g: vec![], + h: vec![], + lb: vec![NEG_INF, 3.0], + ub: vec![POS_INF, 10.0], + }; + let sol = solve_with_presolve(&prob, |r| solve_qp_ipm(r, &QpOptions::default(), backend)); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 2.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!((sol.x[1] - 3.0).abs() < 1e-6, "x1={}", sol.x[1]); +} diff --git a/crates/pounce-convex/tests/debug.rs b/crates/pounce-convex/tests/debug.rs new file mode 100644 index 00000000..7580a927 --- /dev/null +++ b/crates/pounce-convex/tests/debug.rs @@ -0,0 +1,335 @@ +//! The convex IPM honors an attached `DebugHook`: it fires the shared +//! checkpoints, exposes the iterate through the `DebugState` surface, and +//! the attached hook does not change the solve result. + +use pounce_common::debug::{Checkpoint, DebugAction, DebugHook, DebugState}; +use pounce_convex::{solve_qp_ipm, solve_qp_ipm_debug, QpOptions, QpProblem, QpStatus, Triplet}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +/// min ½(x0² + x1²) s.t. x0 + x1 ≥ 2 (i.e. −x0 − x1 ≤ −2). Optimum (1, 1), +/// f* = 1, the inequality active with z ≈ 1 — a nonempty cone, so the IPM +/// takes several predictor-corrector iterations. +fn active_ineq_qp() -> QpProblem { + QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + c: vec![0.0, 0.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, -1.0), Triplet::new(0, 1, -1.0)], + h: vec![-2.0], + lb: vec![], + ub: vec![], + } +} + +/// Records what the debugger sees at each checkpoint, and resumes. +#[derive(Default)] +struct Recorder { + checkpoints: Vec, + max_mu: f64, + saw_nonempty_z: bool, + saw_tau: bool, + x_dim_at_iter_start: Option, + terminal_status: Option, +} + +impl DebugHook for Recorder { + fn at_checkpoint(&mut self, st: &mut dyn DebugState) -> DebugAction { + self.checkpoints.push(st.checkpoint()); + self.max_mu = self.max_mu.max(st.mu()); + if let Some(z) = st.block("z") { + if !z.is_empty() { + self.saw_nonempty_z = true; + } + } + if st.block("tau").is_some() { + self.saw_tau = true; + } + if st.checkpoint() == Checkpoint::IterStart { + self.x_dim_at_iter_start = st.block("x").map(|v| v.len()); + } + if st.checkpoint() == Checkpoint::Terminated { + self.terminal_status = st.status().map(str::to_owned); + } + DebugAction::Resume + } +} + +#[test] +fn convex_ipm_fires_checkpoints_and_exposes_state() { + let prob = active_ineq_qp(); + let opts = QpOptions::default(); + let mut rec = Recorder::default(); + let sol = solve_qp_ipm_debug(&prob, &opts, &mut rec, backend); + + // The solve still reaches the known optimum. + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + assert!((sol.x[0] - 1.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!((sol.x[1] - 1.0).abs() < 1e-6, "x1={}", sol.x[1]); + + // Every checkpoint kind fired at least once. + let fired = |c| rec.checkpoints.contains(&c); + assert!(fired(Checkpoint::IterStart), "no IterStart"); + assert!( + fired(Checkpoint::AfterSearchDirection), + "no AfterSearchDirection" + ); + assert!(fired(Checkpoint::AfterStep), "no AfterStep"); + assert!(fired(Checkpoint::Terminated), "no Terminated"); + + // State surfaced correctly: nonempty cone, μ moved, x has the right + // dimension, and the terminal checkpoint carried the status. + assert!( + rec.saw_nonempty_z, + "z block should be nonempty (one cone row)" + ); + assert!(rec.max_mu > 0.0, "mu should be positive on a coned solve"); + assert_eq!(rec.x_dim_at_iter_start, Some(2), "x dim"); + assert_eq!(rec.terminal_status.as_deref(), Some("Optimal")); +} + +#[test] +fn attaching_a_hook_does_not_change_the_result() { + let prob = active_ineq_qp(); + let opts = QpOptions::default(); + + let plain = solve_qp_ipm(&prob, &opts, backend); + let mut rec = Recorder::default(); + let debugged = solve_qp_ipm_debug(&prob, &opts, &mut rec, backend); + + assert_eq!(plain.status, debugged.status); + assert_eq!(plain.iters, debugged.iters, "iteration count must match"); + for (a, b) in plain.x.iter().zip(&debugged.x) { + assert!((a - b).abs() < 1e-12, "x differs: {a} vs {b}"); + } + assert!((plain.obj - debugged.obj).abs() < 1e-12, "obj differs"); +} + +/// The HSDE driver (`use_hsde`) is debuggable through the same entry: it +/// fires the checkpoints, exposes the homogenizing τ/κ as blocks, and the +/// hook does not change the recovered solution. +#[test] +fn hsde_driver_is_debuggable_and_exposes_tau_kappa() { + let prob = active_ineq_qp(); + let opts = QpOptions { + use_hsde: true, + ..QpOptions::default() + }; + + let mut rec = Recorder::default(); + let sol = solve_qp_ipm_debug(&prob, &opts, &mut rec, backend); + + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + assert!((sol.x[0] - 1.0).abs() < 1e-5, "x0={}", sol.x[0]); + assert!((sol.x[1] - 1.0).abs() < 1e-5, "x1={}", sol.x[1]); + + assert!( + rec.checkpoints.contains(&Checkpoint::IterStart), + "IterStart" + ); + assert!( + rec.checkpoints.contains(&Checkpoint::AfterStep), + "AfterStep" + ); + assert!( + rec.checkpoints.contains(&Checkpoint::Terminated), + "Terminated" + ); + assert!(rec.saw_tau, "HSDE must expose the `tau` block"); + assert_eq!(rec.terminal_status.as_deref(), Some("Optimal")); + + // The attached hook leaves the HSDE result untouched. + let plain = { + let o = QpOptions { + use_hsde: true, + ..QpOptions::default() + }; + solve_qp_ipm(&prob, &o, backend) + }; + assert_eq!(plain.status, sol.status); + for (a, b) in plain.x.iter().zip(&sol.x) { + assert!((a - b).abs() < 1e-10, "x differs: {a} vs {b}"); + } +} + +/// The non-symmetric (exponential/power) HSDE driver is debuggable too, +/// through `solve_conic_hsde_nonsym_debug`. Uses the exp-cone epigraph +/// `min z s.t. x=1, y=1, (x,y,z) ∈ K_exp` (optimum z = e). +#[test] +fn nonsym_exp_cone_driver_is_debuggable() { + use pounce_convex::hsde_nonsym::{ + solve_conic_hsde_nonsym, solve_conic_hsde_nonsym_debug, NsBlock, + }; + + let e = std::f64::consts::E; + let prob = QpProblem { + n: 3, + p_lower: vec![], + c: vec![0.0, 0.0, 1.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + b: vec![1.0, 1.0], + g: vec![ + Triplet::new(0, 0, -1.0), + Triplet::new(1, 1, -1.0), + Triplet::new(2, 2, -1.0), + ], + h: vec![0.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let specs = [NsBlock::exp()]; + let opts = QpOptions::default(); + + let mut rec = Recorder::default(); + let sol = solve_conic_hsde_nonsym_debug(&prob, &specs, &opts, &mut rec, backend); + + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + assert!((sol.x[2] - e).abs() < 1e-5, "z={} vs e", sol.x[2]); + + assert!( + rec.checkpoints.contains(&Checkpoint::IterStart), + "IterStart" + ); + assert!( + rec.checkpoints.contains(&Checkpoint::AfterStep), + "AfterStep" + ); + assert!( + rec.checkpoints.contains(&Checkpoint::Terminated), + "Terminated" + ); + assert!(rec.saw_tau, "nonsym HSDE must expose the `tau` block"); + assert_eq!(rec.terminal_status.as_deref(), Some("Optimal")); + + // The hook leaves the recovered solution untouched. + let plain = solve_conic_hsde_nonsym(&prob, &specs, &opts, backend); + assert_eq!(plain.status, sol.status); + for (a, b) in plain.x.iter().zip(&sol.x) { + assert!((a - b).abs() < 1e-9, "x differs: {a} vs {b}"); + } +} + +/// The debugger can edit the iterate in place (`set`) and snapshot/restore +/// it (`goto`). `set mu` is rejected (μ is derived). +#[test] +fn convex_debugger_supports_set_and_rewind() { + use std::cell::RefCell; + + // A hook that, at the first IterStart, snapshots the iterate, perturbs + // `x`, confirms the edit took, then restores — all via the trait. + #[derive(Default)] + struct Mutator { + snap: RefCell>>, + edited_x0: RefCell>, + restored_x0: RefCell>, + set_mu_err: RefCell, + done: bool, + } + impl DebugHook for Mutator { + fn at_checkpoint(&mut self, st: &mut dyn DebugState) -> DebugAction { + if self.done || st.checkpoint() != Checkpoint::IterStart { + return DebugAction::Resume; + } + self.done = true; + // Snapshot, then edit x[0]. + *self.snap.borrow_mut() = st.snapshot(); + let mut x = st.block("x").unwrap(); + x[0] += 1.25; + st.set_block("x", &x).expect("set_block x"); + *self.edited_x0.borrow_mut() = st.block("x").map(|v| v[0]); + // μ is derived — editing it must be refused. + *self.set_mu_err.borrow_mut() = st.set_mu(0.5).is_err(); + // Restore the snapshot and read x[0] back. + let snap = self.snap.borrow_mut().take().unwrap(); + assert!(st.restore(snap.as_ref()), "restore should succeed"); + *self.restored_x0.borrow_mut() = st.block("x").map(|v| v[0]); + DebugAction::Resume + } + } + + let prob = active_ineq_qp(); + let opts = QpOptions::default(); + let mut hook = Mutator::default(); + let sol = solve_qp_ipm_debug(&prob, &opts, &mut hook, backend); + + // The edit was observed, set_mu refused, and the restore undid the edit. + assert_eq!(hook.edited_x0.into_inner(), Some(1.25), "edit visible"); + assert!(hook.set_mu_err.into_inner(), "set mu must be rejected"); + assert_eq!( + hook.restored_x0.into_inner(), + Some(0.0), + "restore should bring x[0] back to the cold-start 0" + ); + // The solve still converges (the edit+restore was a no-op net change). + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + assert!((sol.x[0] - 1.0).abs() < 1e-6 && (sol.x[1] - 1.0).abs() < 1e-6); +} + +/// `solve_socp_ipm_debug` is the umbrella conic debug entry used by the +/// `pounce_cblib --debug` CLI path: exp/power cones route to the +/// non-symmetric driver, all others to the direct symmetric IPM. Here an +/// exp-cone epigraph (optimum z = e) exercises the routing. +#[test] +fn solve_socp_ipm_debug_routes_and_fires() { + use pounce_convex::{solve_socp_ipm, solve_socp_ipm_debug, ConeSpec}; + + let e = std::f64::consts::E; + let prob = QpProblem { + n: 3, + p_lower: vec![], + c: vec![0.0, 0.0, 1.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + b: vec![1.0, 1.0], + g: vec![ + Triplet::new(0, 0, -1.0), + Triplet::new(1, 1, -1.0), + Triplet::new(2, 2, -1.0), + ], + h: vec![0.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let cones = [ConeSpec::Exponential]; + let opts = QpOptions::default(); + + let mut rec = Recorder::default(); + let sol = solve_socp_ipm_debug(&prob, &cones, &opts, &mut rec, backend); + + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + assert!((sol.x[2] - e).abs() < 1e-5, "z={} vs e", sol.x[2]); + assert!( + rec.checkpoints.contains(&Checkpoint::IterStart), + "IterStart" + ); + assert!(rec.saw_tau, "exp cone routes to HSDE → tau exposed"); + + let plain = solve_socp_ipm(&prob, &cones, &opts, backend); + assert_eq!(plain.status, sol.status); + for (a, b) in plain.x.iter().zip(&sol.x) { + assert!((a - b).abs() < 1e-9, "x differs: {a} vs {b}"); + } +} + +/// A hook that requests `Stop` at the first checkpoint halts the solve +/// short of convergence (the debugger `quit` path). +#[test] +fn stop_action_halts_the_solve() { + struct StopNow; + impl DebugHook for StopNow { + fn at_checkpoint(&mut self, _st: &mut dyn DebugState) -> DebugAction { + DebugAction::Stop + } + } + let prob = active_ineq_qp(); + let opts = QpOptions::default(); + let mut hook = StopNow; + let sol = solve_qp_ipm_debug(&prob, &opts, &mut hook, backend); + // Stopped at iteration 0 before convergence — not Optimal. + assert_ne!(sol.status, QpStatus::Optimal); +} diff --git a/crates/pounce-convex/tests/infeasibility.rs b/crates/pounce-convex/tests/infeasibility.rs new file mode 100644 index 00000000..470931b1 --- /dev/null +++ b/crates/pounce-convex/tests/infeasibility.rs @@ -0,0 +1,246 @@ +//! Verified infeasibility / unboundedness detection (the HSDE benefit: +//! clean status instead of exhausting the iteration budget). +//! +//! Each declared status is backed by a checked certificate, so these +//! tests also implicitly confirm there are no false positives — the +//! feasible/optimal problems in the rest of the suite must still report +//! `Optimal`, and a couple of those are re-checked here for contrast. + +use pounce_convex::{solve_qp_ipm, QpOptions, QpProblem, QpStatus, Triplet}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +fn solve(prob: &QpProblem) -> pounce_convex::QpSolution { + solve_qp_ipm(prob, &QpOptions::default(), backend) +} + +/// Primal-infeasible: contradictory equalities x0 = 1 and x0 = 2. +/// (min x0² subject to both.) No x satisfies the constraints. +#[test] +fn primal_infeasible_contradictory_equalities() { + let prob = QpProblem { + n: 1, + p_lower: vec![Triplet::new(0, 0, 2.0)], + c: vec![0.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 0, 1.0)], + b: vec![1.0, 2.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob); + assert_eq!( + sol.status, + QpStatus::PrimalInfeasible, + "expected primal infeasible, got {:?} after {} iters", + sol.status, + sol.iters + ); +} + +/// Primal-infeasible via inequalities: x0 ≤ 0 and x0 ≥ 1 (written +/// −x0 ≤ −1). Empty feasible set. +#[test] +fn primal_infeasible_contradictory_inequalities() { + let prob = QpProblem { + n: 1, + p_lower: vec![Triplet::new(0, 0, 2.0)], + c: vec![0.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), // x0 ≤ 0 + Triplet::new(1, 0, -1.0), // −x0 ≤ −1 (x0 ≥ 1) + ], + h: vec![0.0, -1.0], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob); + assert_eq!( + sol.status, + QpStatus::PrimalInfeasible, + "got {:?} after {} iters", + sol.status, + sol.iters + ); +} + +/// Unbounded LP: min −x0 with x0 ≥ 0 (no upper bound). Objective → −∞ +/// along the recession direction d = (1). +#[test] +fn dual_infeasible_unbounded_lp() { + let prob = QpProblem { + n: 1, + p_lower: vec![], // LP (P = 0) + c: vec![-1.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, -1.0)], // −x0 ≤ 0 (x0 ≥ 0) + h: vec![0.0], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob); + assert_eq!( + sol.status, + QpStatus::DualInfeasible, + "expected unbounded (dual infeasible), got {:?} after {} iters", + sol.status, + sol.iters + ); +} + +/// Unbounded QP: a singular Hessian with a recession direction. min x1² +/// − x0 with x0 free, x1 free. The x0 direction has Pd = 0 and cᵀd < 0, +/// so the objective is unbounded below. +#[test] +fn dual_infeasible_unbounded_qp_singular_hessian() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(1, 1, 2.0)], // only x1 is in P + c: vec![-1.0, 0.0], // −x0 + a: vec![], + b: vec![], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob); + assert_eq!( + sol.status, + QpStatus::DualInfeasible, + "got {:?} after {} iters", + sol.status, + sol.iters + ); +} + +/// Contrast: a feasible, bounded QP must still report Optimal — the +/// detector must not false-positive. min (x0−1)² + (x1−1)², 0 ≤ x ≤ 5. +#[test] +fn feasible_bounded_still_optimal() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-2.0, -2.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(1, 1, 1.0), + Triplet::new(2, 0, -1.0), + Triplet::new(3, 1, -1.0), + ], + h: vec![5.0, 5.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob); + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + assert!((sol.x[0] - 1.0).abs() < 1e-6); + assert!((sol.x[1] - 1.0).abs() < 1e-6); +} + +// --- Status / edge-case honesty (PR70 item C) ----------------------------- +// +// A solver that stops early for *any* reason must say so. The danger these +// guard against is a confident `Optimal` (or a spurious infeasible/unbounded) +// on a problem the solver did not actually finish or that is degenerate. + +/// Iteration-limit honesty: a real, feasible, bounded QP that needs several +/// IPM iterations must report `IterationLimit` — never a premature `Optimal`, +/// and never a false infeasible/unbounded — when starved of iterations. +#[test] +fn iteration_limit_reported_not_optimal() { + // The same well-posed box QP as `feasible_bounded_still_optimal`, which + // converges in several iterations at the default cap. With max_iter = 1 it + // cannot have converged, so the only honest status is IterationLimit. + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-2.0, -2.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(1, 1, 1.0), + Triplet::new(2, 0, -1.0), + Triplet::new(3, 1, -1.0), + ], + h: vec![5.0, 5.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let opts = QpOptions { + max_iter: 1, + ..QpOptions::default() + }; + let sol = solve_qp_ipm(&prob, &opts, backend); + assert_eq!( + sol.status, + QpStatus::IterationLimit, + "1-iteration solve must report IterationLimit, got {:?}", + sol.status + ); + assert_ne!( + sol.status, + QpStatus::Optimal, + "must not claim Optimal after a single iteration" + ); +} + +/// Degenerate input — a variable fixed by equal bounds (lb == ub) — must +/// solve honestly to `Optimal` at the fixed value, not trip a spurious +/// infeasible/unbounded or numerical failure. +#[test] +fn fixed_variable_equal_bounds_optimal() { + // min x0² + x1² − 6x0 − 6x1, x0 fixed to 1 (lb==ub==1), x1 ∈ [0, 10]. + // Unconstrained min is (3, 3); with x0 pinned the optimum is (1, 3). + // obj = 1 + 9 − 6 − 18 = −14. + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-6.0, -6.0], + a: vec![], + b: vec![], + g: vec![], + h: vec![], + lb: vec![1.0, 0.0], + ub: vec![1.0, 10.0], + }; + let sol = solve(&prob); + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + assert!((sol.x[0] - 1.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!((sol.x[1] - 3.0).abs() < 1e-6, "x1={}", sol.x[1]); + assert!((sol.obj - (-14.0)).abs() < 1e-6, "obj={}", sol.obj); +} + +/// Edge input — a fully unconstrained QP (no equalities, no inequalities, no +/// bounds) — must still solve to its stationary point and report `Optimal`. +#[test] +fn unconstrained_qp_optimal() { + // min x0² + x1² − 6x0 + 4x1 -> min at (3, −2), obj = 9 + 4 − 18 − 8 = −13. + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-6.0, 4.0], + a: vec![], + b: vec![], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob); + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + assert!((sol.x[0] - 3.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!((sol.x[1] - (-2.0)).abs() < 1e-6, "x1={}", sol.x[1]); + assert!((sol.obj - (-13.0)).abs() < 1e-6, "obj={}", sol.obj); +} diff --git a/crates/pounce-convex/tests/presolve_bound_tightening.rs b/crates/pounce-convex/tests/presolve_bound_tightening.rs new file mode 100644 index 00000000..b0e151c1 --- /dev/null +++ b/crates/pounce-convex/tests/presolve_bound_tightening.rs @@ -0,0 +1,320 @@ +//! Bound-tightening presolve: domain propagation shrinks variable boxes, +//! and an *active* tightened bound's multiplier is re-attributed to the row +//! that implied it (the multiplier on a non-real bound belongs to the +//! constraint, not the variable). Because that dual recovery is the subtle +//! part, this suite leans on **randomized KKT roundtrip** testing: many +//! random tightening-rich problems are solved with and without presolve, +//! and the postsolved `(x, y, z, z_lb, z_ub)` is checked to be a valid KKT +//! point of the *original* problem (and to match the direct primal). + +use pounce_convex::presolve::{presolve, solve_with_presolve, PresolveOutcome}; +use pounce_convex::{solve_qp_ipm, QpOptions, QpProblem, QpSolution, QpStatus, Triplet}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +fn direct(prob: &QpProblem) -> QpSolution { + solve_qp_ipm(prob, &QpOptions::default(), backend) +} + +fn with_presolve(prob: &QpProblem) -> QpSolution { + solve_with_presolve(prob, |r| solve_qp_ipm(r, &QpOptions::default(), backend)) +} + +/// Bound-aware KKT validity to tolerance `tol`. +fn assert_kkt(prob: &QpProblem, sol: &QpSolution, tol: f64) { + let n = prob.n; + let mut g = prob.c.clone(); + prob.p_mul(&sol.x, &mut g); + prob.at_mul(&sol.y, &mut g); + prob.gt_mul(&sol.z, &mut g); + for i in 0..n { + let stat = g[i] + sol.z_ub[i] - sol.z_lb[i]; + assert!(stat.abs() < tol, "stationarity[{i}] = {stat}"); + assert!( + sol.z_lb[i] > -tol && sol.z_ub[i] > -tol, + "bound dual sign [{i}]" + ); + assert!( + sol.x[i] >= prob.lb_of(i) - tol && sol.x[i] <= prob.ub_of(i) + tol, + "box [{i}]: {} in [{}, {}]", + sol.x[i], + prob.lb_of(i), + prob.ub_of(i) + ); + assert!( + (sol.z_lb[i] * (sol.x[i] - prob.lb_of(i))).abs() < 1e-4, + "lb comp [{i}]" + ); + assert!( + (sol.z_ub[i] * (prob.ub_of(i) - sol.x[i])).abs() < 1e-4, + "ub comp [{i}]" + ); + } + let mut gx = vec![0.0; prob.m_ineq()]; + prob.g_mul(&sol.x, &mut gx); + for i in 0..prob.m_ineq() { + let slack = prob.h[i] - gx[i]; + assert!(slack > -tol, "Gx≤h row {i}: slack {slack}"); + assert!(sol.z[i] > -tol, "z[{i}] < 0"); + assert!((sol.z[i] * slack).abs() < 1e-4, "ineq comp row {i}"); + } + let mut ax = vec![0.0; prob.m_eq()]; + prob.a_mul(&sol.x, &mut ax); + for (i, (&axi, &bi)) in ax.iter().zip(&prob.b).enumerate() { + assert!((axi - bi).abs() < tol, "Ax=b row {i}: {axi} vs {bi}"); + } +} + +/// Tiny deterministic LCG, so the randomized sweep is reproducible. +struct Rng(u64); +impl Rng { + fn next_u64(&mut self) -> u64 { + self.0 = self + .0 + .wrapping_mul(6364136223846793005) + .wrapping_add(1442695040888963407); + self.0 + } + fn unif(&mut self, lo: f64, hi: f64) -> f64 { + let u = (self.next_u64() >> 11) as f64 / (1u64 << 53) as f64; + lo + (hi - lo) * u + } +} + +/// A specific hand-checked case: a singleton inequality tightens a box and +/// the bound is active, so the multiplier must move to the row. +#[test] +fn singleton_inequality_tightens_and_reattributes() { + // min ½·2·(x0−5)² + ½·2·(x1−5)² (via c=−10) s.t. 2·x0 ≤ 3, 0 ≤ x ≤ 10. + // 2x0 ≤ 3 ⇒ x0 ≤ 1.5 (tightened); the objective pulls x0 to 5, so the + // tightened bound is active. x1 is unconstrained ⇒ x1 = 5. + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-10.0, -10.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 2.0)], + h: vec![3.0], + lb: vec![0.0, 0.0], + ub: vec![10.0, 10.0], + }; + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => assert!(ps.stats().tightened_bounds >= 1), + other => panic!( + "expected Reduced, got {:?}", + matches!(other, PresolveOutcome::Reduced(_)) + ), + } + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 1.5).abs() < 1e-5, "x0={}", sol.x[0]); + assert!((sol.x[1] - 5.0).abs() < 1e-5, "x1={}", sol.x[1]); + assert_kkt(&prob, &sol, 1e-5); + // The force holding x0 is the row, not the (slack) real bound: the + // inequality multiplier is positive and the bound multiplier ~0. + assert!( + sol.z[0] > 0.1, + "row multiplier should carry the force: {}", + sol.z[0] + ); + assert!( + sol.z_ub[0].abs() < 1e-5, + "real bound slack ⇒ z_ub≈0: {}", + sol.z_ub[0] + ); + let d = direct(&prob); + assert!((sol.obj - d.obj).abs() < 1e-5); +} + +/// Two-variable forcing-via-tightening: x0 − x1 ≤ −4 with 0≤x≤5 tightens +/// x0's upper toward 1 (when x1 at its min) — the other variable sits at +/// its activity bound, exercising the re-attribution's other-column path. +#[test] +fn pair_inequality_tightening() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-10.0, 6.0], // pull x0 up, push x1 down + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, -1.0)], + h: vec![-4.0], + lb: vec![0.0, 0.0], + ub: vec![5.0, 5.0], + }; + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert_kkt(&prob, &sol, 1e-5); + let d = direct(&prob); + for i in 0..2 { + assert!( + (sol.x[i] - d.x[i]).abs() < 1e-5, + "x[{i}]: {} vs {}", + sol.x[i], + d.x[i] + ); + } +} + +/// Randomized sweep: many tightening-rich problems, each KKT-validated and +/// primal-matched against a direct solve. Constraints live on disjoint +/// variable groups (singletons and pairs) so tightening fires often. +#[test] +fn randomized_bound_tightening_roundtrip() { + let mut rng = Rng(0x1234_5678_9abc_def0); + let mut total_tightened = 0usize; + let mut checked = 0usize; + + for _ in 0..300 { + let n = 6usize; + // Strictly convex diagonal P and random linear cost. + let p_lower: Vec = (0..n) + .map(|i| Triplet::new(i, i, rng.unif(0.5, 3.0))) + .collect(); + let c: Vec = (0..n).map(|_| rng.unif(-8.0, 8.0)).collect(); + let lb = vec![0.0; n]; + let ub = vec![10.0; n]; + + // Disjoint constraint groups: a singleton on x0, x1; a pair on + // (x2,x3); a pair on (x4,x5). Coefficients/RHS random but in a + // range that often (not always) tightens. + let mut g = Vec::new(); + let mut h = Vec::new(); + // singletons + g.push(Triplet::new(0, 0, rng.unif(1.0, 3.0))); + h.push(rng.unif(1.0, 12.0)); + g.push(Triplet::new(1, 1, rng.unif(1.0, 3.0))); + h.push(rng.unif(1.0, 12.0)); + // pair (x2, x3) + let s = if rng.unif(0.0, 1.0) < 0.5 { 1.0 } else { -1.0 }; + g.push(Triplet::new(2, 2, rng.unif(1.0, 2.0))); + g.push(Triplet::new(2, 3, s * rng.unif(1.0, 2.0))); + h.push(rng.unif(-3.0, 8.0)); + // pair (x4, x5) + g.push(Triplet::new(3, 4, rng.unif(1.0, 2.0))); + g.push(Triplet::new(3, 5, rng.unif(1.0, 2.0))); + h.push(rng.unif(2.0, 14.0)); + + let prob = QpProblem { + n, + p_lower, + c, + a: vec![], + b: vec![], + g, + h, + lb, + ub, + }; + + // Skip presolve-detected infeasible instances (random RHS can make + // a group infeasible); the direct solve agrees by status. + match presolve(&prob) { + PresolveOutcome::Infeasible => { + assert_eq!(direct(&prob).status, QpStatus::PrimalInfeasible); + continue; + } + PresolveOutcome::Unbounded => continue, + PresolveOutcome::Reduced(ps) => total_tightened += ps.stats().tightened_bounds, + } + + let sol = with_presolve(&prob); + let d = direct(&prob); + if sol.status != QpStatus::Optimal || d.status != QpStatus::Optimal { + continue; + } + assert_kkt(&prob, &sol, 1e-4); + for i in 0..n { + assert!( + (sol.x[i] - d.x[i]).abs() < 1e-4, + "primal x[{i}]: presolve {} vs direct {}", + sol.x[i], + d.x[i] + ); + } + assert!( + (sol.obj - d.obj).abs() < 1e-4, + "obj {} vs {}", + sol.obj, + d.obj + ); + checked += 1; + } + + assert!(checked > 50, "too few optimal instances checked: {checked}"); + assert!(total_tightened > 0, "no bound tightening exercised"); +} + +/// Randomized sweep with **overlapping** constraints (consecutive rows +/// share a variable, forming a chain). Here tightening sources overlap, so +/// no single round can use them all — the fixpoint must resolve them across +/// rounds while keeping the re-attributed duals correct. KKT-validated. +#[test] +fn randomized_overlapping_tightening_roundtrip() { + let mut rng = Rng(0xC0FF_EE00_1234_5678); + let mut checked = 0usize; + let mut total_tightened = 0usize; + + for _ in 0..300 { + let n = 6usize; + let p_lower: Vec = (0..n) + .map(|i| Triplet::new(i, i, rng.unif(0.5, 3.0))) + .collect(); + let c: Vec = (0..n).map(|_| rng.unif(-8.0, 8.0)).collect(); + + // Chain of overlapping pair inequalities: row i couples x_i, x_{i+1}. + let mut g = Vec::new(); + let mut h = Vec::new(); + for i in 0..n - 1 { + let s = if rng.unif(0.0, 1.0) < 0.5 { 1.0 } else { -1.0 }; + g.push(Triplet::new(i, i, rng.unif(1.0, 2.0))); + g.push(Triplet::new(i, i + 1, s * rng.unif(1.0, 2.0))); + h.push(rng.unif(-2.0, 10.0)); + } + + let prob = QpProblem { + n, + p_lower, + c, + a: vec![], + b: vec![], + g, + h, + lb: vec![0.0; n], + ub: vec![10.0; n], + }; + + match presolve(&prob) { + PresolveOutcome::Infeasible => { + assert_eq!(direct(&prob).status, QpStatus::PrimalInfeasible); + continue; + } + PresolveOutcome::Unbounded => continue, + PresolveOutcome::Reduced(ps) => total_tightened += ps.stats().tightened_bounds, + } + + let sol = with_presolve(&prob); + let d = direct(&prob); + if sol.status != QpStatus::Optimal || d.status != QpStatus::Optimal { + continue; + } + assert_kkt(&prob, &sol, 1e-4); + for i in 0..n { + assert!( + (sol.x[i] - d.x[i]).abs() < 1e-4, + "primal x[{i}]: presolve {} vs direct {}", + sol.x[i], + d.x[i] + ); + } + checked += 1; + } + + assert!(checked > 50, "too few optimal instances: {checked}"); + assert!(total_tightened > 0, "no overlapping tightening exercised"); +} diff --git a/crates/pounce-convex/tests/presolve_conic.rs b/crates/pounce-convex/tests/presolve_conic.rs new file mode 100644 index 00000000..c034fb0b --- /dev/null +++ b/crates/pounce-convex/tests/presolve_conic.rs @@ -0,0 +1,128 @@ +//! Cone-aware presolve (`presolve_conic`): the orthant/equality reductions +//! apply, second-order-cone rows are preserved, and the reduced cone +//! partition is recovered — so presolve composes with the SOCP solve and +//! the postsolved point is KKT-valid for the original problem. + +use pounce_convex::presolve::{presolve_conic, PresolveOutcome}; +use pounce_convex::{solve_socp_ipm, ConeSpec, QpOptions, QpProblem, QpStatus, Triplet}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +fn in_soc(u: &[f64], tol: f64) -> bool { + let tail: f64 = u[1..].iter().map(|v| v * v).sum::().sqrt(); + u[0] + tol >= tail +} + +/// A mixed problem: projection onto a second-order cone for (x0,x1,x2), +/// plus an orthant bound `x3 ≤ 5` that appears **twice** (a duplicate the +/// presolve should drop) while leaving the SOC rows verbatim. +#[test] +fn conic_presolve_roundtrip_mixed() { + // min ½‖(x0,x1,x2)‖² − pᵀ(x0,x1,x2) − x3 s.t. + // (x0,x1,x2) ∈ SOC(3) [rows 0,1,2: s = −Gx = x] + // x3 ≤ 5 [row 3, nonneg] + // x3 ≤ 5 (duplicate) [row 4, nonneg] + let p = [1.0, 2.0, 0.0]; // proj onto SOC = (1.5, 1.5, 0) + let prob = QpProblem { + n: 4, + p_lower: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(1, 1, 1.0), + Triplet::new(2, 2, 1.0), + ], + c: vec![-p[0], -p[1], -p[2], -1.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, -1.0), + Triplet::new(1, 1, -1.0), + Triplet::new(2, 2, -1.0), + Triplet::new(3, 3, 1.0), // x3 ≤ 5 + Triplet::new(4, 3, 1.0), // x3 ≤ 5 (duplicate) + ], + h: vec![0.0, 0.0, 0.0, 5.0, 5.0], + lb: vec![], + ub: vec![], + }; + let cones = [ConeSpec::SecondOrder(3), ConeSpec::Nonneg(2)]; + let opts = QpOptions::default(); + + let ps = match presolve_conic(&prob, &cones) { + PresolveOutcome::Reduced(ps) => ps, + other => panic!( + "expected Reduced, got {:?}", + matches!(other, PresolveOutcome::Reduced(_)) + ), + }; + // The duplicate orthant row is dropped; the SOC block survives intact. + let rc = ps.reduced_cones(&cones); + assert_eq!( + rc, + vec![ConeSpec::SecondOrder(3), ConeSpec::Nonneg(1)], + "reduced cones {rc:?}" + ); + assert_eq!(ps.reduced.m_ineq(), 4, "5 → 4 inequality rows"); + + // Solve the reduced SOCP and postsolve to the original space. + let red = solve_socp_ipm(&ps.reduced, &rc, &opts, backend); + assert_eq!(red.status, QpStatus::Optimal); + let sol = ps.postsolve(&red); + + // Primal: SOC projection + x3 = 5. + assert!((sol.x[0] - 1.5).abs() < 1e-5, "x0={}", sol.x[0]); + assert!((sol.x[1] - 1.5).abs() < 1e-5, "x1={}", sol.x[1]); + assert!(sol.x[2].abs() < 1e-5, "x2={}", sol.x[2]); + assert!((sol.x[3] - 5.0).abs() < 1e-5, "x3={}", sol.x[3]); + + // KKT of the original: s = h − Gx, the SOC block ∈ K, z ∈ K, sᵀz ≈ 0, + // stationarity Px + c + Gᵀz = 0. + let mut gx = vec![0.0; prob.m_ineq()]; + prob.g_mul(&sol.x, &mut gx); + let s: Vec = (0..prob.m_ineq()).map(|i| prob.h[i] - gx[i]).collect(); + assert!(in_soc(&s[0..3], 1e-6), "SOC slack {:?}", &s[0..3]); + assert!(in_soc(&sol.z[0..3], 1e-6), "SOC dual {:?}", &sol.z[0..3]); + for i in 3..prob.m_ineq() { + assert!(s[i] > -1e-6 && sol.z[i] > -1e-6, "orthant feas row {i}"); + } + let sz: f64 = s.iter().zip(&sol.z).map(|(a, b)| a * b).sum(); + assert!(sz.abs() < 1e-5, "complementarity {sz}"); + let mut g = prob.c.clone(); + prob.p_mul(&sol.x, &mut g); + prob.gt_mul(&sol.z, &mut g); + for i in 0..prob.n { + assert!(g[i].abs() < 1e-5, "stationarity[{i}] = {}", g[i]); + } +} + +/// A pure SOCP: presolve must be a near-no-op on the cone rows (only the +/// objective/equality machinery can act), leaving the partition unchanged. +#[test] +fn conic_presolve_pure_socp_preserves_cone() { + let prob = QpProblem { + n: 3, + p_lower: (0..3).map(|i| Triplet::new(i, i, 1.0)).collect(), + c: vec![-1.0, -2.0, 0.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, -1.0), + Triplet::new(1, 1, -1.0), + Triplet::new(2, 2, -1.0), + ], + h: vec![0.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let cones = [ConeSpec::SecondOrder(3)]; + match presolve_conic(&prob, &cones) { + PresolveOutcome::Reduced(ps) => { + assert_eq!(ps.reduced.m_ineq(), 3, "SOC rows must all survive"); + assert_eq!(ps.reduced_cones(&cones), vec![ConeSpec::SecondOrder(3)]); + } + _ => panic!("expected Reduced"), + } +} diff --git a/crates/pounce-convex/tests/presolve_forcing.rs b/crates/pounce-convex/tests/presolve_forcing.rs new file mode 100644 index 00000000..48c02ef2 --- /dev/null +++ b/crates/pounce-convex/tests/presolve_forcing.rs @@ -0,0 +1,308 @@ +//! Forcing-constraint presolve: a row whose activity range touches its +//! RHS pins every involved variable to a bound. Correctness is checked by +//! verifying the postsolved `(x, y, z, z_lb, z_ub)` is a valid KKT point +//! of the *original* problem — not by comparing duals to a direct solve, +//! because a forcing constraint's multiplier is generally **not unique** +//! (it ranges over an interval), so two valid solves can report different +//! — both correct — duals. The primal of a strictly convex QP is unique, +//! so that we do compare. + +use pounce_convex::presolve::{presolve, solve_with_presolve, PresolveOutcome}; +use pounce_convex::{solve_qp_ipm, QpOptions, QpProblem, QpSolution, QpStatus, Triplet}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +const TOL: f64 = 1e-5; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +fn direct(prob: &QpProblem) -> QpSolution { + solve_qp_ipm(prob, &QpOptions::default(), backend) +} + +fn with_presolve(prob: &QpProblem) -> QpSolution { + solve_with_presolve(prob, |reduced| { + solve_qp_ipm(reduced, &QpOptions::default(), backend) + }) +} + +/// Assert `sol` satisfies the KKT system of `prob` to `TOL`: +/// primal feasibility, dual feasibility (z, z_lb, z_ub ≥ 0), +/// stationarity `Px + c + Aᵀy + Gᵀz + z_ub − z_lb = 0`, and +/// complementarity on every inequality and bound. +fn assert_kkt(prob: &QpProblem, sol: &QpSolution) { + let n = prob.n; + let me = prob.m_eq(); + let mi = prob.m_ineq(); + + // Primal feasibility. + let mut ax = vec![0.0; me]; + prob.a_mul(&sol.x, &mut ax); + for i in 0..me { + assert!( + (ax[i] - prob.b[i]).abs() < TOL, + "Ax=b row {i}: {} vs {}", + ax[i], + prob.b[i] + ); + } + let mut gx = vec![0.0; mi]; + prob.g_mul(&sol.x, &mut gx); + for i in 0..mi { + assert!( + gx[i] <= prob.h[i] + TOL, + "Gx≤h row {i}: {} vs {}", + gx[i], + prob.h[i] + ); + } + for i in 0..n { + assert!( + sol.x[i] >= prob.lb_of(i) - TOL && sol.x[i] <= prob.ub_of(i) + TOL, + "box {i}: {} in [{}, {}]", + sol.x[i], + prob.lb_of(i), + prob.ub_of(i) + ); + } + + // Dual feasibility. + for (i, &zi) in sol.z.iter().enumerate() { + assert!(zi >= -TOL, "z[{i}] = {zi} < 0"); + } + for i in 0..n { + assert!(sol.z_lb[i] >= -TOL, "z_lb[{i}] = {} < 0", sol.z_lb[i]); + assert!(sol.z_ub[i] >= -TOL, "z_ub[{i}] = {} < 0", sol.z_ub[i]); + } + + // Stationarity: Px + c + Aᵀy + Gᵀz + z_ub − z_lb = 0. + let mut g = prob.c.clone(); + prob.p_mul(&sol.x, &mut g); + prob.at_mul(&sol.y, &mut g); + prob.gt_mul(&sol.z, &mut g); + for i in 0..n { + let stat = g[i] + sol.z_ub[i] - sol.z_lb[i]; + assert!(stat.abs() < TOL, "stationarity[{i}] = {stat}"); + } + + // Complementarity. + for i in 0..mi { + assert!( + (sol.z[i] * (prob.h[i] - gx[i])).abs() < TOL, + "ineq comp {i}: z={} slack={}", + sol.z[i], + prob.h[i] - gx[i] + ); + } + for i in 0..n { + assert!( + (sol.z_lb[i] * (sol.x[i] - prob.lb_of(i))).abs() < TOL, + "lb comp {i}" + ); + assert!( + (sol.z_ub[i] * (prob.ub_of(i) - sol.x[i])).abs() < TOL, + "ub comp {i}" + ); + } +} + +fn forcing_rows(prob: &QpProblem) -> usize { + match presolve(prob) { + PresolveOutcome::Reduced(ps) => ps.stats().forcing_rows, + _ => 0, + } +} + +#[test] +fn inequality_forcing_to_lower_bounds() { + // min ½‖x‖² − 2x0 − 3x1 s.t. x0 + x1 ≤ 0, 0 ≤ x ≤ 5. + // min-activity of x0+x1 over the box is 0 = h ⇒ forces x0 = x1 = 0. + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + c: vec![-2.0, -3.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + h: vec![0.0], + lb: vec![0.0, 0.0], + ub: vec![5.0, 5.0], + }; + assert_eq!( + forcing_rows(&prob), + 1, + "the row should be detected as forcing" + ); + + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!( + sol.x[0].abs() < TOL && sol.x[1].abs() < TOL, + "x pinned to 0: {:?}", + sol.x + ); + assert_kkt(&prob, &sol); + // Primal matches the direct solve (unique for strictly convex P). + let d = direct(&prob); + assert!((sol.x[0] - d.x[0]).abs() < TOL && (sol.x[1] - d.x[1]).abs() < TOL); + assert!( + (sol.obj - d.obj).abs() < TOL, + "obj {} vs {}", + sol.obj, + d.obj + ); +} + +#[test] +fn inequality_forcing_with_mixed_signs() { + // x0 − x1 ≤ −5 with 0 ≤ x0 ≤ 5, 0 ≤ x1 ≤ 5: min activity of x0 − x1 is + // 0 − 5 = −5 = h ⇒ forces x0 = 0 (lower), x1 = 5 (upper). + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + c: vec![1.0, -1.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, -1.0)], + h: vec![-5.0], + lb: vec![0.0, 0.0], + ub: vec![5.0, 5.0], + }; + assert_eq!(forcing_rows(&prob), 1); + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!( + (sol.x[0]).abs() < TOL && (sol.x[1] - 5.0).abs() < TOL, + "x={:?}", + sol.x + ); + assert_kkt(&prob, &sol); +} + +#[test] +fn equality_forcing_min_vertex() { + // x0 + 2x1 = 0 with 0 ≤ x ≤ 4: min activity 0 = b ⇒ x0 = x1 = 0. + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + c: vec![-1.0, -1.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 2.0)], + b: vec![0.0], + g: vec![], + h: vec![], + lb: vec![0.0, 0.0], + ub: vec![4.0, 4.0], + }; + assert_eq!(forcing_rows(&prob), 1); + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!( + sol.x[0].abs() < TOL && sol.x[1].abs() < TOL, + "x={:?}", + sol.x + ); + assert_kkt(&prob, &sol); +} + +#[test] +fn equality_forcing_max_vertex() { + // x0 + x1 = 8 with 0 ≤ x ≤ 4: max activity 4+4 = 8 = b ⇒ x0 = x1 = 4. + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + c: vec![1.0, 5.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + b: vec![8.0], + g: vec![], + h: vec![], + lb: vec![0.0, 0.0], + ub: vec![4.0, 4.0], + }; + assert_eq!(forcing_rows(&prob), 1); + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!( + (sol.x[0] - 4.0).abs() < TOL && (sol.x[1] - 4.0).abs() < TOL, + "x={:?}", + sol.x + ); + assert_kkt(&prob, &sol); +} + +#[test] +fn overlapping_forcing_rows_resolved_by_fixpoint() { + // Two forcing rows sharing x1: x0+x1 ≤ 0 and x1+x2 ≤ 0 (box [0,5]). + // A single round can only fire one (disjoint-column rule); the fixpoint + // fires the second next round once x1 is fixed — and the composed + // postsolve recovers a valid KKT point with both rows' multipliers. + let prob = QpProblem { + n: 3, + p_lower: (0..3).map(|i| Triplet::new(i, i, 1.0)).collect(), + c: vec![-2.0, -3.0, -1.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), // x0 + x1 ≤ 0 + Triplet::new(1, 1, 1.0), + Triplet::new(1, 2, 1.0), // x1 + x2 ≤ 0 (shares x1) + ], + h: vec![0.0, 0.0], + lb: vec![0.0; 3], + ub: vec![5.0; 3], + }; + // Both rows forcing ⇒ all three variables pinned to 0. + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + for i in 0..3 { + assert!( + sol.x[i].abs() < 1e-6, + "x[{i}]={} (all pinned to 0)", + sol.x[i] + ); + } + assert_kkt(&prob, &sol); +} + +#[test] +fn forcing_combined_with_other_rows() { + // A forcing inequality x0 + x1 ≤ 0 (pins x0=x1=0) alongside a live + // inequality x2 + x3 ≤ 3, on a strictly convex objective. Checks that + // forcing coexists with kept rows and the recovered KKT is valid. + let prob = QpProblem { + n: 4, + p_lower: (0..4).map(|i| Triplet::new(i, i, 1.0)).collect(), + c: vec![-2.0, -3.0, -1.0, -1.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), // forcing: x0+x1 ≤ 0 + Triplet::new(1, 2, 1.0), + Triplet::new(1, 3, 1.0), // live: x2+x3 ≤ 3 + ], + h: vec![0.0, 3.0], + lb: vec![0.0; 4], + ub: vec![5.0; 4], + }; + assert_eq!(forcing_rows(&prob), 1); + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!( + sol.x[0].abs() < TOL && sol.x[1].abs() < TOL, + "forced x={:?}", + &sol.x[..2] + ); + assert_kkt(&prob, &sol); + let d = direct(&prob); + for i in 0..4 { + assert!( + (sol.x[i] - d.x[i]).abs() < TOL, + "x[{i}]: {} vs {}", + sol.x[i], + d.x[i] + ); + } +} diff --git a/crates/pounce-convex/tests/presolve_reductions.rs b/crates/pounce-convex/tests/presolve_reductions.rs new file mode 100644 index 00000000..2f783e04 --- /dev/null +++ b/crates/pounce-convex/tests/presolve_reductions.rs @@ -0,0 +1,993 @@ +//! Tests for the LP-oriented presolve reductions (free columns, +//! duplicate rows) and their detections. +//! +//! Duplicate-row multipliers are non-unique, so where a reduction's dual +//! is not uniquely determined we verify that the postsolved point is a +//! *valid KKT point of the original problem* (stationarity, primal +//! feasibility, sign and complementarity of inequality duals) rather +//! than asserting equality with an independent solve. + +use pounce_convex::presolve::{presolve, solve_with_presolve, PresolveOutcome}; +use pounce_convex::{solve_qp_ipm, QpOptions, QpProblem, QpStatus, Triplet}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +fn with_presolve(prob: &QpProblem) -> pounce_convex::QpSolution { + solve_with_presolve(prob, |r| solve_qp_ipm(r, &QpOptions::default(), backend)) +} + +/// Assert the solution satisfies the original problem's KKT conditions. +fn assert_kkt(prob: &QpProblem, sol: &pounce_convex::QpSolution, tol: f64) { + // Stationarity: Px + c + Aᵀy + Gᵀz = 0. + let mut g = prob.c.clone(); + prob.p_mul(&sol.x, &mut g); + prob.at_mul(&sol.y, &mut g); + prob.gt_mul(&sol.z, &mut g); + for (i, gi) in g.iter().enumerate() { + assert!(gi.abs() < tol, "stationarity[{i}] = {gi}"); + } + // Primal equality feasibility: Ax = b. + let mut ax = vec![0.0; prob.m_eq()]; + prob.a_mul(&sol.x, &mut ax); + for (i, (&axi, &bi)) in ax.iter().zip(&prob.b).enumerate() { + assert!((axi - bi).abs() < tol, "Ax=b row {i}: {axi} vs {bi}"); + } + // Primal inequality feasibility Gx ≤ h, dual sign z ≥ 0, and + // complementarity z·(h − Gx) ≈ 0. + let mut gx = vec![0.0; prob.m_ineq()]; + prob.g_mul(&sol.x, &mut gx); + for i in 0..prob.m_ineq() { + let slack = prob.h[i] - gx[i]; + assert!(slack > -tol, "Gx≤h row {i}: slack {slack}"); + assert!(sol.z[i] > -tol, "z[{i}] = {} < 0", sol.z[i]); + assert!( + (sol.z[i] * slack).abs() < 1e-4, + "complementarity row {i}: z={} slack={slack}", + sol.z[i] + ); + } +} + +/// Bound-aware KKT check (for reductions that leave a variable at an +/// active box bound, e.g. dominated columns): stationarity carries the +/// bound multipliers, `Px + c + Aᵀy + Gᵀz + z_ub − z_lb = 0`, and both the +/// inequality and the bound complementarities must hold. +fn assert_kkt_bounds(prob: &QpProblem, sol: &pounce_convex::QpSolution, tol: f64) { + let n = prob.n; + let mut g = prob.c.clone(); + prob.p_mul(&sol.x, &mut g); + prob.at_mul(&sol.y, &mut g); + prob.gt_mul(&sol.z, &mut g); + for i in 0..n { + let stat = g[i] + sol.z_ub[i] - sol.z_lb[i]; + assert!(stat.abs() < tol, "stationarity[{i}] = {stat}"); + assert!( + sol.z_lb[i] > -tol && sol.z_ub[i] > -tol, + "bound dual sign [{i}]" + ); + assert!( + sol.x[i] >= prob.lb_of(i) - tol && sol.x[i] <= prob.ub_of(i) + tol, + "box [{i}]: {} in [{}, {}]", + sol.x[i], + prob.lb_of(i), + prob.ub_of(i) + ); + assert!( + (sol.z_lb[i] * (sol.x[i] - prob.lb_of(i))).abs() < 1e-4, + "lb comp [{i}]" + ); + assert!( + (sol.z_ub[i] * (prob.ub_of(i) - sol.x[i])).abs() < 1e-4, + "ub comp [{i}]" + ); + } + let mut ax = vec![0.0; prob.m_eq()]; + prob.a_mul(&sol.x, &mut ax); + for (i, (&axi, &bi)) in ax.iter().zip(&prob.b).enumerate() { + assert!((axi - bi).abs() < tol, "Ax=b row {i}: {axi} vs {bi}"); + } + let mut gx = vec![0.0; prob.m_ineq()]; + prob.g_mul(&sol.x, &mut gx); + for i in 0..prob.m_ineq() { + let slack = prob.h[i] - gx[i]; + assert!(slack > -tol, "Gx≤h row {i}: slack {slack}"); + assert!(sol.z[i] > -tol, "z[{i}] < 0"); + assert!((sol.z[i] * slack).abs() < 1e-4, "ineq comp row {i}"); + } +} + +// --- free / empty columns --- + +/// A variable absent from P, A, G with zero cost is irrelevant: presolve +/// pins it to 0 and the rest of the problem solves normally. +#[test] +fn free_column_zero_cost_dropped() { + // min x0² s.t. x0 = 2 ; x1 is free with c1 = 0 (irrelevant). + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0)], + c: vec![0.0, 0.0], + a: vec![Triplet::new(0, 0, 1.0)], // x0 = 2 + b: vec![2.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 2.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!( + sol.x[1].abs() < 1e-9, + "free x1 should be 0, got {}", + sol.x[1] + ); +} + +/// A free column with nonzero cost makes the problem unbounded below. +#[test] +fn free_column_nonzero_cost_unbounded() { + // min x0² − x1, x1 free → unbounded (x1 → +∞). + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0)], + c: vec![0.0, -1.0], + a: vec![], + b: vec![], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + assert!(matches!(presolve(&prob), PresolveOutcome::Unbounded)); + assert_eq!(with_presolve(&prob).status, QpStatus::DualInfeasible); +} + +// --- duplicate rows --- + +/// Duplicate equality rows with the same rhs are redundant: drop one, +/// solve, recovered point is KKT-valid for the original problem. +#[test] +fn duplicate_equality_rows_redundant() { + // min x0²+x1² s.t. x0+x1=2 (twice). Optimum (1,1). + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![0.0, 0.0], + a: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(1, 0, 1.0), // duplicate of row 0 + Triplet::new(1, 1, 1.0), + ], + b: vec![2.0, 2.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 1.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!((sol.x[1] - 1.0).abs() < 1e-6, "x1={}", sol.x[1]); + assert_kkt(&prob, &sol, 1e-5); +} + +/// Duplicate equality rows with *different* rhs are infeasible. +#[test] +fn duplicate_equality_rows_conflicting_infeasible() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![0.0, 0.0], + a: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(1, 0, 1.0), + Triplet::new(1, 1, 1.0), + ], + b: vec![2.0, 3.0], // x0+x1 can't be both 2 and 3 + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + assert!(matches!(presolve(&prob), PresolveOutcome::Infeasible)); + assert_eq!(with_presolve(&prob).status, QpStatus::PrimalInfeasible); +} + +/// Duplicate inequality rows: keep the tightest. `x0+x1 ≤ 3` and +/// `x0+x1 ≤ 1` (same lhs) → effective bound is 1. +#[test] +fn duplicate_inequality_keeps_tightest() { + // min ½‖x−(5,5)‖² (via c=−5·2) s.t. x0+x1 ≤ 3 and x0+x1 ≤ 1. + // Tightest is x0+x1 ≤ 1; optimum on that line at (0.5, 0.5). + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-10.0, -10.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), // x0+x1 ≤ 3 + Triplet::new(1, 0, 1.0), + Triplet::new(1, 1, 1.0), // x0+x1 ≤ 1 (tighter) + ], + h: vec![3.0, 1.0], + lb: vec![], + ub: vec![], + }; + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 0.5).abs() < 1e-5, "x0={}", sol.x[0]); + assert!((sol.x[1] - 0.5).abs() < 1e-5, "x1={}", sol.x[1]); + assert_kkt(&prob, &sol, 1e-5); +} + +/// A many-duplicate problem exercises the parallel hashing path and must +/// still produce a KKT-valid point. +#[test] +fn many_duplicate_rows_parallel_path() { + // min Σ x_i² s.t. Σ x_i = n repeated K times. Optimum x = 1. + let n = 30usize; + let k = 50usize; // K identical equality rows + let mut p_lower = Vec::new(); + for i in 0..n { + p_lower.push(Triplet::new(i, i, 2.0)); + } + let mut a = Vec::new(); + for row in 0..k { + for i in 0..n { + a.push(Triplet::new(row, i, 1.0)); + } + } + let prob = QpProblem { + n, + p_lower, + c: vec![0.0; n], + a, + b: vec![n as f64; k], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + for i in 0..n { + assert!((sol.x[i] - 1.0).abs() < 1e-5, "x[{i}]={}", sol.x[i]); + } + assert_kkt(&prob, &sol, 1e-4); +} + +// --- fixpoint cascade --- + +/// A chain of fixings that only a *fixpoint* presolve fully unwinds: only +/// one singleton exists initially, but fixing it exposes the next, and so +/// on. Iteration fixes the whole chain (reduced problem empty); a single +/// pass would stop after the first. +#[test] +fn fixpoint_cascades_chain_of_fixings() { + // x3 = 3 (singleton) → x2 = 5−x3 = 2 → x1 = 7−x2 = 5 → x0 = 9−x1 = 4. + let prob = QpProblem { + n: 4, + p_lower: (0..4).map(|i| Triplet::new(i, i, 2.0)).collect(), + c: vec![0.0; 4], + a: vec![ + Triplet::new(0, 2, 1.0), + Triplet::new(0, 3, 1.0), // x2 + x3 = 5 + Triplet::new(1, 1, 1.0), + Triplet::new(1, 2, 1.0), // x1 + x2 = 7 + Triplet::new(2, 0, 1.0), + Triplet::new(2, 1, 1.0), // x0 + x1 = 9 + Triplet::new(3, 3, 1.0), // x3 = 3 (the only initial singleton) + ], + b: vec![5.0, 7.0, 9.0, 3.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => { + // Whole chain fixed ⇒ nothing left to solve. + assert_eq!(ps.reduced.n, 0, "fixpoint should fix all four variables"); + assert!(ps.stats().fixed_vars >= 4 || ps.stats().free_col_singletons >= 1); + } + other => panic!("expected Reduced, got {}", status_of(&other)), + } + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + let expect = [4.0, 5.0, 2.0, 3.0]; + for i in 0..4 { + assert!( + (sol.x[i] - expect[i]).abs() < 1e-6, + "x[{i}]={} want {}", + sol.x[i], + expect[i] + ); + } + assert_kkt(&prob, &sol, 1e-5); +} + +// --- parallel rows (scalar multiples, not just exact duplicates) --- + +/// Parallel equality rows: `x0 + x1 = 2` and `3x0 + 3x1 = 6` are the same +/// constraint scaled by 3. One is dropped; the recovered point is valid. +#[test] +fn parallel_equality_rows_redundant() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![0.0, 0.0], + a: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), // x0 + x1 = 2 + Triplet::new(1, 0, 3.0), + Triplet::new(1, 1, 3.0), // 3x0 + 3x1 = 6 (= 3×row0) + ], + b: vec![2.0, 6.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + // One equality row removed by parallel detection. + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => assert_eq!(ps.reduced.m_eq(), 1), + other => panic!("expected Reduced, got {}", status_of(&other)), + } + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 1.0).abs() < 1e-6 && (sol.x[1] - 1.0).abs() < 1e-6); + assert_kkt(&prob, &sol, 1e-5); +} + +/// Negatively-scaled parallel equalities: `x0 + x1 = 2` and +/// `−2x0 − 2x1 = −4` are the same constraint. Detected and merged. +#[test] +fn parallel_equality_negative_scale() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![0.0, 0.0], + a: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(1, 0, -2.0), + Triplet::new(1, 1, -2.0), // −2×row0 + ], + b: vec![2.0, -4.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => assert_eq!(ps.reduced.m_eq(), 1), + other => panic!("expected Reduced, got {}", status_of(&other)), + } + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert_kkt(&prob, &sol, 1e-5); +} + +/// Parallel equalities with inconsistent scaled rhs are infeasible: +/// `x0 + x1 = 2` and `2x0 + 2x1 = 5` (≠ 4). +#[test] +fn parallel_equality_inconsistent_infeasible() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![0.0, 0.0], + a: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(1, 0, 2.0), + Triplet::new(1, 1, 2.0), + ], + b: vec![2.0, 5.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + assert!(matches!(presolve(&prob), PresolveOutcome::Infeasible)); +} + +/// Parallel inequalities (positive multiple): `x0 + x1 ≤ 3` and +/// `2x0 + 2x1 ≤ 2` (⟺ x0 + x1 ≤ 1). The tighter (second) is kept; the +/// optimum lands on x0 + x1 = 1. +#[test] +fn parallel_inequality_keeps_tightest() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-10.0, -10.0], // pull both up; constraint binds + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), // x0 + x1 ≤ 3 + Triplet::new(1, 0, 2.0), + Triplet::new(1, 1, 2.0), // 2x0 + 2x1 ≤ 2 ⟺ x0 + x1 ≤ 1 + ], + h: vec![3.0, 2.0], + lb: vec![], + ub: vec![], + }; + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => assert_eq!(ps.reduced.m_ineq(), 1), + other => panic!("expected Reduced, got {}", status_of(&other)), + } + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] + sol.x[1] - 1.0).abs() < 1e-5, "x={:?}", sol.x); + assert_kkt(&prob, &sol, 1e-5); + // Matches the direct solve's primal. + let d = direct(&prob); + assert!((sol.x[0] - d.x[0]).abs() < 1e-5 && (sol.x[1] - d.x[1]).abs() < 1e-5); +} + +/// Opposite-direction inequalities are *not* merged: `x0 ≤ 3` and +/// `−x0 ≤ −1` (i.e. x0 ≥ 1) form a range, not a duplicate — both kept. +#[test] +fn antiparallel_inequalities_not_merged() { + let prob = QpProblem { + n: 1, + p_lower: vec![Triplet::new(0, 0, 2.0)], + c: vec![0.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 0, -1.0)], + h: vec![3.0, -1.0], // x0 ≤ 3 and x0 ≥ 1 + lb: vec![], + ub: vec![], + }; + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => assert_eq!(ps.reduced.m_ineq(), 2, "both kept"), + other => panic!("expected Reduced, got {}", status_of(&other)), + } + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert_kkt(&prob, &sol, 1e-5); +} + +// --- dominated columns --- + +/// Dominated column fixed to its lower bound: x2 has no quadratic/equality +/// term, appears only with a nonnegative coefficient in `≤` rows, and has +/// cost c2 ≥ 0 — so pushing it down never hurts. Presolve fixes x2 = lb. +#[test] +fn dominated_column_fixed_to_lower() { + // min x0² + x1² + 0.5·x2 s.t. x0 + x1 + x2 ≤ 3, 0 ≤ x ≤ 5. + // x2: not in P, only in the ≤ row with +1, cost +0.5 ≥ 0 ⇒ x2 = 0. + let prob = QpProblem { + n: 3, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-4.0, -4.0, 0.5], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(0, 2, 1.0), + ], + h: vec![3.0], + lb: vec![0.0, 0.0, 0.0], + ub: vec![5.0, 5.0, 5.0], + }; + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => { + assert_eq!(ps.stats().dominated_cols, 1); + assert_eq!(ps.reduced.n, 2); + } + other => panic!("expected Reduced, got {}", status_of(&other)), + } + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!(sol.x[2].abs() < 1e-6, "x2 fixed to 0: {}", sol.x[2]); + assert_kkt_bounds(&prob, &sol, 1e-5); + let d = direct(&prob); + for i in 0..3 { + assert!( + (sol.x[i] - d.x[i]).abs() < 1e-5, + "x[{i}]: {} vs {}", + sol.x[i], + d.x[i] + ); + } +} + +/// Dominated column fixed to its upper bound (mirror): negative `≤` +/// coefficient and nonpositive cost ⇒ pushing it up never hurts. +#[test] +fn dominated_column_fixed_to_upper() { + // min x0² + x1² − 0.5·x2 s.t. x0 + x1 − x2 ≤ 1, 0 ≤ x ≤ 4. + // x2: not in P, coefficient −1 in the ≤ row, cost −0.5 ≤ 0 ⇒ x2 = 4. + let prob = QpProblem { + n: 3, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-1.0, -1.0, -0.5], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(0, 2, -1.0), + ], + h: vec![1.0], + lb: vec![0.0, 0.0, 0.0], + ub: vec![4.0, 4.0, 4.0], + }; + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => assert_eq!(ps.stats().dominated_cols, 1), + other => panic!("expected Reduced, got {}", status_of(&other)), + } + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[2] - 4.0).abs() < 1e-6, "x2 fixed to 4: {}", sol.x[2]); + assert_kkt_bounds(&prob, &sol, 1e-5); + let d = direct(&prob); + for i in 0..3 { + assert!( + (sol.x[i] - d.x[i]).abs() < 1e-5, + "x[{i}]: {} vs {}", + sol.x[i], + d.x[i] + ); + } +} + +/// A column with *mixed-sign* inequality coefficients is NOT dominated +/// (its effect on feasibility is not sign-definite) — left in place. +#[test] +fn mixed_sign_column_not_dominated() { + let prob = QpProblem { + n: 3, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-1.0, -1.0, 0.5], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 2, 1.0), // +x2 in row 0 + Triplet::new(1, 2, -1.0), // −x2 in row 1 → mixed sign + Triplet::new(0, 0, 1.0), + Triplet::new(1, 1, 1.0), + ], + h: vec![3.0, 3.0], + lb: vec![0.0, 0.0, 0.0], + ub: vec![5.0, 5.0, 5.0], + }; + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => assert_eq!(ps.stats().dominated_cols, 0), + // A no-op presolve is also acceptable here. + _ => {} + } + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert_kkt_bounds(&prob, &sol, 1e-5); +} + +/// Dominated column in a pure LP (P = 0), the common case. +#[test] +fn dominated_column_lp() { + // min −x0 + x1 s.t. x0 + x1 ≤ 2, 0 ≤ x ≤ 3. + // x1: cost +1 ≥ 0, coefficient +1 ≥ 0, not in P ⇒ x1 = 0; then x0 = 2. + let prob = QpProblem { + n: 2, + p_lower: vec![], + c: vec![-1.0, 1.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + h: vec![2.0], + lb: vec![0.0, 0.0], + ub: vec![3.0, 3.0], + }; + match presolve(&prob) { + // x1 is dominated; fixpoint iteration then cascades (x0's row + // becomes redundant, leaving x0 dominated too) — ≥ 1 dominated. + PresolveOutcome::Reduced(ps) => assert!(ps.stats().dominated_cols >= 1), + other => panic!("expected Reduced, got {}", status_of(&other)), + } + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!( + sol.x[1].abs() < 1e-6 && (sol.x[0] - 2.0).abs() < 1e-6, + "x={:?}", + sol.x + ); + assert_kkt_bounds(&prob, &sol, 1e-5); +} + +// --- activity-bound reductions (need the variable box) --- + +use pounce_convex::{NEG_INF, POS_INF}; + +/// Redundant inequality: with x ∈ [0,1]², `x0 + x1 ≤ 5` has max activity +/// 2 ≤ 5, so it is always satisfied → presolve drops it; the recovered +/// point is KKT-valid for the original (un-dropped) problem. +#[test] +fn redundant_inequality_dropped() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-1.0, -1.0], // pull toward (0.5, 0.5), interior + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], // x0+x1 ≤ 5 + h: vec![5.0], + lb: vec![0.0, 0.0], + ub: vec![1.0, 1.0], + }; + // Presolve should drop the redundant row (0 kept inequalities). + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => { + assert_eq!(ps.reduced.m_ineq(), 0, "redundant row should be dropped"); + } + other => panic!("expected Reduced, got {:?}", status_of(&other)), + } + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 0.5).abs() < 1e-5, "x0={}", sol.x[0]); + assert!((sol.x[1] - 0.5).abs() < 1e-5, "x1={}", sol.x[1]); + // The dropped row's dual is 0 — still a valid KKT point. + assert_kkt(&prob, &sol, 1e-5); +} + +/// Activity-infeasible inequality: with x ∈ [2,3], `x0 ≤ 1` has min +/// activity 2 > 1, so no feasible point exists. +#[test] +fn activity_infeasible_inequality() { + let prob = QpProblem { + n: 1, + p_lower: vec![Triplet::new(0, 0, 2.0)], + c: vec![0.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0)], // x0 ≤ 1 + h: vec![1.0], + lb: vec![2.0], + ub: vec![3.0], + }; + assert!(matches!(presolve(&prob), PresolveOutcome::Infeasible)); + assert_eq!(with_presolve(&prob).status, QpStatus::PrimalInfeasible); +} + +/// Activity-infeasible equality: with x ∈ [0,1]², `x0 + x1 = 5` is +/// outside the activity range [0, 2]. +#[test] +fn activity_infeasible_equality() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![0.0, 0.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], // x0+x1 = 5 + b: vec![5.0], + g: vec![], + h: vec![], + lb: vec![0.0, 0.0], + ub: vec![1.0, 1.0], + }; + assert!(matches!(presolve(&prob), PresolveOutcome::Infeasible)); + assert_eq!(with_presolve(&prob).status, QpStatus::PrimalInfeasible); +} + +/// A negative-coefficient row exercises the `a < 0` branch of the +/// activity computation: with x ∈ [0,1]², `−x0 − x1 ≤ 0.5` has min +/// activity −2 ≤ 0.5 (not infeasible) and max activity 0 ≤ 0.5 +/// (redundant) → dropped. +#[test] +fn redundant_inequality_negative_coeffs() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-1.0, -1.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, -1.0), Triplet::new(0, 1, -1.0)], // −x0−x1 ≤ 0.5 + h: vec![0.5], + lb: vec![0.0, 0.0], + ub: vec![1.0, 1.0], + }; + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => assert_eq!(ps.reduced.m_ineq(), 0), + other => panic!("expected Reduced, got {:?}", status_of(&other)), + } + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert_kkt(&prob, &sol, 1e-5); +} + +/// Unbounded variables must *not* make a row look redundant: with x0 +/// free (no upper bound), `x0 ≤ 1` has max activity +∞, so the row is +/// kept and genuinely binds the solution. +#[test] +fn unbounded_variable_row_not_dropped() { + let prob = QpProblem { + n: 1, + p_lower: vec![Triplet::new(0, 0, 2.0)], + c: vec![-10.0], // unconstrained optimum at 5, so x0 ≤ 1 binds + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0)], // x0 ≤ 1 + h: vec![1.0], + lb: vec![NEG_INF], + ub: vec![POS_INF], + }; + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => { + assert_eq!(ps.reduced.m_ineq(), 1, "row must be kept (activity +∞)"); + } + other => panic!("expected Reduced, got {:?}", status_of(&other)), + } + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + assert!((sol.x[0] - 1.0).abs() < 1e-5, "x0={}", sol.x[0]); +} + +/// Helper for panic messages: name the non-Reduced outcome. +fn status_of(o: &PresolveOutcome) -> &'static str { + match o { + PresolveOutcome::Reduced(_) => "Reduced", + PresolveOutcome::Infeasible => "Infeasible", + PresolveOutcome::Unbounded => "Unbounded", + } +} + +// --- free column singleton substitution --- + +fn direct(prob: &QpProblem) -> pounce_convex::QpSolution { + solve_qp_ipm(prob, &QpOptions::default(), backend) +} + +/// A free variable in exactly one equality row is substituted out, +/// eliminating both the variable and the row; the recovered (x, y) must +/// match a direct solve. +/// +/// min x0² + x1² s.t. x0 + x1 + x2 = 3, with x2 free (no bounds, not +/// in P/G). x2 is a free column singleton in the single equality row; it +/// is substituted as x2 = 3 − x0 − x1. The reduced problem has 2 vars +/// and 0 equality rows. Optimum: x0 = x1 = 0, x2 = 3. +#[test] +fn free_column_singleton_substituted() { + let prob = QpProblem { + n: 3, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], // x2 absent from P + c: vec![0.0, 0.0, 0.0], + a: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(0, 2, 1.0), + ], + b: vec![3.0], + g: vec![], + h: vec![], + lb: vec![NEG_INF, NEG_INF, NEG_INF], + ub: vec![POS_INF, POS_INF, POS_INF], + }; + // Presolve must eliminate the row and the free column. + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => { + assert_eq!(ps.reduced.n, 2, "x2 should be substituted out"); + assert_eq!(ps.reduced.m_eq(), 0, "the equality row should be consumed"); + } + other => panic!("expected Reduced, got {:?}", status_of(&other)), + } + let d = direct(&prob); + let p = with_presolve(&prob); + assert_eq!(p.status, QpStatus::Optimal); + for i in 0..3 { + assert!( + (p.x[i] - d.x[i]).abs() < 1e-5, + "x[{i}]: presolve {} vs direct {}", + p.x[i], + d.x[i] + ); + } + assert!((p.x[2] - 3.0).abs() < 1e-5, "x2={}", p.x[2]); + // The consumed row's multiplier must match the direct solve. + assert!( + (p.y[0] - d.y[0]).abs() < 1e-5, + "y[0]: presolve {} vs direct {}", + p.y[0], + d.y[0] + ); + assert_kkt(&prob, &p, 1e-5); +} + +/// Free column singleton with a nonzero objective on the free variable, +/// so the substitution shifts cost onto the surviving variables. +/// +/// min x0² + 2·x1 s.t. x0 + 3·x1 = 6, x1 free (linear-only, not in +/// P/G). x1 = (6 − x0)/3 is substituted; the reduced objective becomes +/// x0² + 2·(6−x0)/3 = x0² − (2/3)x0 + 4. Optimum x0 = 1/3. +#[test] +fn free_column_singleton_shifts_cost() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0)], + c: vec![0.0, 2.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 3.0)], + b: vec![6.0], + g: vec![], + h: vec![], + lb: vec![NEG_INF, NEG_INF], + ub: vec![POS_INF, POS_INF], + }; + let d = direct(&prob); + let p = with_presolve(&prob); + assert_eq!(p.status, QpStatus::Optimal); + assert!((p.x[0] - (1.0 / 3.0)).abs() < 1e-5, "x0={}", p.x[0]); + for i in 0..2 { + assert!( + (p.x[i] - d.x[i]).abs() < 1e-5, + "x[{i}]: {} vs {}", + p.x[i], + d.x[i] + ); + } + assert!( + (p.obj - d.obj).abs() < 1e-5, + "obj: presolve {} vs direct {}", + p.obj, + d.obj + ); + assert!( + (p.y[0] - d.y[0]).abs() < 1e-5, + "y[0]: {} vs {}", + p.y[0], + d.y[0] + ); + assert_kkt(&prob, &p, 1e-5); +} + +/// Regression for the capri LP wrong-answer bug: a free column singleton +/// whose consumed equality row also contains a variable fixed by a +/// *separate* singleton equality row. Postsolve restores the free +/// singleton from the formula `x_col = (b_r − Σ_{j≠col} a_j x_j)/a_col`, +/// which reads the fixed variable's value — so the fixed variable must be +/// restored *before* the free singleton. Naive reverse-LIFO replay (the +/// old code) restored them in push order, leaving the free singleton +/// computed against the fixed var's zero-initialized value and producing a +/// point that violates the consumed row (the silent capri 2625 vs 2690 +/// wrong answer). +/// +/// min x2² s.t. x0 + x1 + x2 = 10, x1 = 3, x2 ≥ 0, x0 free. +/// x1 fixes to 3 (singleton row), the first row becomes x0 + x2 = 7, and +/// x0 (free, now a singleton there) is substituted as x0 = 10 − x1 − x2. +/// Reduced problem: min x2², x2 ≥ 0 → x2 = 0, then x0 = 7, x1 = 3. +#[test] +fn free_singleton_depends_on_fixed_var_postsolve_order() { + let prob = QpProblem { + n: 3, + p_lower: vec![Triplet::new(2, 2, 2.0)], // only x2 in the objective + c: vec![0.0, 0.0, 0.0], + a: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(0, 2, 1.0), // x0 + x1 + x2 = 10 + Triplet::new(1, 1, 1.0), // x1 = 3 (singleton → FixedVar) + ], + b: vec![10.0, 3.0], + g: vec![], + h: vec![], + lb: vec![NEG_INF, NEG_INF, 0.0], // x0 free; x2 ≥ 0 + ub: vec![POS_INF, POS_INF, POS_INF], + }; + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + // The recovered point must satisfy *both* equality rows. Before the + // two-pass postsolve fix, row 0 was violated by 3 (x0 restored as 10 + // instead of 7 because x1 was still 0 when the formula was applied). + let mut ax = vec![0.0; prob.m_eq()]; + prob.a_mul(&sol.x, &mut ax); + for (i, (&axi, &bi)) in ax.iter().zip(&prob.b).enumerate() { + assert!((axi - bi).abs() < 1e-6, "Ax=b row {i}: {axi} vs {bi}"); + } + // x2 only approaches its active bound asymptotically (near-boundary + // IPM slack), so values are checked to 1e-4; feasibility above is the + // tight regression guard. + assert!((sol.x[0] - 7.0).abs() < 1e-4, "x0={} (want 7)", sol.x[0]); + assert!((sol.x[1] - 3.0).abs() < 1e-4, "x1={} (want 3)", sol.x[1]); + assert!((sol.x[2] - 0.0).abs() < 1e-4, "x2={} (want 0)", sol.x[2]); +} + +/// A bounded variable in one row is *not* a free column singleton (its +/// box can bind), so it must not be substituted. +#[test] +fn bounded_variable_not_substituted() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0)], + c: vec![0.0, 0.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + b: vec![3.0], + g: vec![], + h: vec![], + lb: vec![0.0, 0.0], // x1 has a finite lower bound → not free + ub: vec![POS_INF, POS_INF], + }; + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => { + // Neither var is substituted; the equality row survives. + assert_eq!(ps.reduced.m_eq(), 1, "bounded var must keep its row"); + } + other => panic!("expected Reduced, got {:?}", status_of(&other)), + } + let sol = with_presolve(&prob); + assert_eq!(sol.status, QpStatus::Optimal); + // Degenerate vertex (bound and constraint both active), so the IPM + // converges to looser KKT tolerance — the point of this test is the + // *non*-substitution above, not solver precision. + assert_kkt(&prob, &sol, 1e-3); +} + +// --- presolve statistics --- + +/// `Presolve::stats()` reports the reduction sizes and counts by type. +#[test] +fn presolve_stats_report() { + // x2 (free singleton) is substituted out → removes a var and a row; + // x3 (free, zero cost) is dropped as a free column. + let prob = QpProblem { + n: 4, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![0.0, 0.0, 0.0, 0.0], + a: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(0, 2, 1.0), // x2 free singleton in this row + ], + b: vec![3.0], + g: vec![], + h: vec![], + lb: vec![NEG_INF, NEG_INF, NEG_INF, NEG_INF], + ub: vec![POS_INF, POS_INF, POS_INF, POS_INF], + }; + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => { + let s = ps.stats(); + assert!(s.reduced_anything()); + assert_eq!(s.orig_vars, 4); + assert_eq!(s.orig_rows, 1); + // x2 substituted (removes var+row), x3 dropped as free column. + assert_eq!(s.free_col_singletons, 1, "stats={s:?}"); + assert_eq!(s.free_cols_fixed, 1, "stats={s:?}"); + assert_eq!(s.reduced_rows, 0, "the row is consumed; stats={s:?}"); + assert_eq!(s.reduced_vars, 2, "x2,x3 removed; stats={s:?}"); + } + other => panic!("expected Reduced, got {:?}", status_of(&other)), + } +} + +/// A no-op presolve reports `reduced_anything() == false`. +#[test] +fn presolve_stats_noop() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-1.0, -1.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + h: vec![1.0], + lb: vec![0.0, 0.0], + ub: vec![10.0, 10.0], + }; + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => { + let s = ps.stats(); + assert!(!s.reduced_anything(), "stats={s:?}"); + assert_eq!(s.reduced_vars, s.orig_vars); + assert_eq!(s.reduced_rows, s.orig_rows); + } + other => panic!("expected Reduced, got {:?}", status_of(&other)), + } +} diff --git a/crates/pounce-convex/tests/presolve_roundtrip.rs b/crates/pounce-convex/tests/presolve_roundtrip.rs new file mode 100644 index 00000000..31f10020 --- /dev/null +++ b/crates/pounce-convex/tests/presolve_roundtrip.rs @@ -0,0 +1,350 @@ +//! Presolve round-trip exactness (the Phase 3.5 correctness contract): +//! solving with presolve must reproduce the no-presolve `(x, y, z)` to +//! tolerance — primal *and* dual. Also covers presolve-detected +//! infeasibility. +//! +//! Tolerance note: each assertion compares *two independent* IPM solves +//! (direct vs presolved), so the bar is the solvers' own convergence +//! tolerance, not exact equality. We use 1e-5. + +use pounce_convex::presolve::{presolve, solve_with_presolve, PresolveOutcome}; +use pounce_convex::{solve_qp_ipm, QpOptions, QpProblem, QpStatus, Triplet, NEG_INF, POS_INF}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +const TOL: f64 = 1e-5; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +fn direct(prob: &QpProblem) -> pounce_convex::QpSolution { + solve_qp_ipm(prob, &QpOptions::default(), backend) +} + +fn with_presolve(prob: &QpProblem) -> pounce_convex::QpSolution { + solve_with_presolve(prob, |reduced| { + solve_qp_ipm(reduced, &QpOptions::default(), backend) + }) +} + +fn assert_close(a: &[f64], b: &[f64], what: &str) { + assert_eq!(a.len(), b.len(), "{what}: length mismatch"); + for (i, (x, y)) in a.iter().zip(b).enumerate() { + assert!((x - y).abs() < TOL, "{what}[{i}]: {x} vs {y}"); + } +} + +/// Fixed-variable elimination: `min x0²+x1²+x2² s.t. x0+x1+x2=3, x2=2`. +/// The singleton row `x2=2` fixes x2; presolve substitutes it out. +#[test] +fn fixed_variable_roundtrip_matches_direct() { + let prob = QpProblem { + n: 3, + p_lower: vec![ + Triplet::new(0, 0, 2.0), + Triplet::new(1, 1, 2.0), + Triplet::new(2, 2, 2.0), + ], + c: vec![0.0, 0.0, 0.0], + a: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 1, 1.0), + Triplet::new(0, 2, 1.0), + Triplet::new(1, 2, 1.0), // singleton → fixes x2 = 2 + ], + b: vec![3.0, 2.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let d = direct(&prob); + let p = with_presolve(&prob); + assert_eq!(d.status, QpStatus::Optimal); + assert_eq!(p.status, QpStatus::Optimal); + assert_close(&p.x, &d.x, "x"); + assert_close(&p.y, &d.y, "y"); + assert!((p.obj - d.obj).abs() < TOL, "obj {} vs {}", p.obj, d.obj); + assert!((p.x[2] - 2.0).abs() < 1e-9, "x2={}", p.x[2]); +} + +/// Fixed variable coupling through an off-diagonal Hessian term, so the +/// substitution must move `P` coupling into the linear term: +/// `min x0² + x0 x1 + x1² s.t. x1 = 1`. +#[test] +fn fixed_variable_with_hessian_coupling_roundtrip() { + let prob = QpProblem { + n: 2, + p_lower: vec![ + Triplet::new(0, 0, 2.0), + Triplet::new(1, 0, 1.0), // x0 x1 coupling + Triplet::new(1, 1, 2.0), + ], + c: vec![0.0, 0.0], + a: vec![Triplet::new(0, 1, 1.0)], // x1 = 1 + b: vec![1.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let d = direct(&prob); + let p = with_presolve(&prob); + assert_eq!(p.status, QpStatus::Optimal); + assert_close(&p.x, &d.x, "x"); + assert_close(&p.y, &d.y, "y"); + assert!((p.obj - d.obj).abs() < TOL, "obj {} vs {}", p.obj, d.obj); +} + +/// Fixed variable plus an inequality whose RHS must be adjusted by the +/// substitution: `min x0²-6x0 s.t. x1=1, x0+x1 ≤ 3`. After fixing x1=1 +/// the inequality becomes `x0 ≤ 2`, which binds (unconstrained x0=3). +#[test] +fn fixed_variable_adjusts_inequality_rhs() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-6.0, 0.0], + a: vec![Triplet::new(0, 1, 1.0)], + b: vec![1.0], + g: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], // x0+x1≤3 + h: vec![3.0], + lb: vec![], + ub: vec![], + }; + let d = direct(&prob); + let p = with_presolve(&prob); + assert_eq!(p.status, QpStatus::Optimal); + assert_close(&p.x, &d.x, "x"); + assert_close(&p.y, &d.y, "y"); + assert_close(&p.z, &d.z, "z"); + assert!((p.obj - d.obj).abs() < TOL, "obj {} vs {}", p.obj, d.obj); + // The inequality binds with a clearly nonzero multiplier (~2). + assert!(p.z[0] > 1.0, "inequality should bind, z={}", p.z[0]); +} + +/// Empty-row removal must not change the solution and the empty row's +/// dual is 0. (Non-degenerate: the kept constraint is a strict equality.) +#[test] +fn empty_row_roundtrip() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![0.0, 0.0], + a: vec![ + Triplet::new(0, 0, 0.0), // empty row, b=0 → feasible, dropped + Triplet::new(1, 0, 1.0), // x0 + x1 = 2 + Triplet::new(1, 1, 1.0), + ], + b: vec![0.0, 2.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let d = direct(&prob); + let p = with_presolve(&prob); + assert_eq!(p.status, QpStatus::Optimal); + assert_close(&p.x, &d.x, "x"); + assert!(p.y[0].abs() < 1e-9, "empty-row dual={}", p.y[0]); +} + +/// Presolve detects trivial primal infeasibility from `0 = 5`. +#[test] +fn empty_row_infeasible_detected() { + let prob = QpProblem { + n: 1, + p_lower: vec![Triplet::new(0, 0, 2.0)], + c: vec![0.0], + a: vec![Triplet::new(0, 0, 0.0)], // 0·x0 = 5 + b: vec![5.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + assert!(matches!(presolve(&prob), PresolveOutcome::Infeasible)); + assert_eq!(with_presolve(&prob).status, QpStatus::PrimalInfeasible); +} + +/// Full-KKT check on the *original* problem, carrying every recovered dual +/// (equality `y`, inequality `z`, and bound multipliers `z_lb`/`z_ub`). If +/// postsolve mis-reconstructed any dual on a heavily-reduced problem, the +/// stationarity residual would not vanish — so this validates the *whole* +/// recovered solution, not just the primal. +fn assert_original_kkt(prob: &QpProblem, sol: &pounce_convex::QpSolution, tol: f64) { + let n = prob.n; + let mut g = prob.c.clone(); + prob.p_mul(&sol.x, &mut g); + prob.at_mul(&sol.y, &mut g); + prob.gt_mul(&sol.z, &mut g); + for i in 0..n { + // Stationarity with bound multipliers: ∇L + z_ub − z_lb = 0. + let stat = g[i] + sol.z_ub[i] - sol.z_lb[i]; + assert!(stat.abs() < tol, "stationarity[{i}] = {stat}"); + assert!( + sol.z_lb[i] > -tol && sol.z_ub[i] > -tol, + "bound dual sign [{i}]: z_lb={} z_ub={}", + sol.z_lb[i], + sol.z_ub[i] + ); + assert!( + sol.x[i] >= prob.lb_of(i) - tol && sol.x[i] <= prob.ub_of(i) + tol, + "box [{i}]: {} not in [{}, {}]", + sol.x[i], + prob.lb_of(i), + prob.ub_of(i) + ); + // Complementarity only applies to finite bounds (an infinite bound can + // never be active, and `0 · ∞` would be NaN). + if prob.lb_of(i).is_finite() { + assert!( + (sol.z_lb[i] * (sol.x[i] - prob.lb_of(i))).abs() < 1e-4, + "lb complementarity [{i}]" + ); + } + if prob.ub_of(i).is_finite() { + assert!( + (sol.z_ub[i] * (prob.ub_of(i) - sol.x[i])).abs() < 1e-4, + "ub complementarity [{i}]" + ); + } + } + let mut ax = vec![0.0; prob.m_eq()]; + prob.a_mul(&sol.x, &mut ax); + for (i, (&axi, &bi)) in ax.iter().zip(&prob.b).enumerate() { + assert!((axi - bi).abs() < tol, "Ax=b row {i}: {axi} vs {bi}"); + } + let mut gx = vec![0.0; prob.m_ineq()]; + prob.g_mul(&sol.x, &mut gx); + for i in 0..prob.m_ineq() { + let slack = prob.h[i] - gx[i]; + assert!(slack > -tol, "Gx≤h row {i}: slack {slack}"); + assert!(sol.z[i] > -tol, "z[{i}] = {} < 0", sol.z[i]); + assert!( + (sol.z[i] * slack).abs() < 1e-4, + "ineq complementarity row {i}: z={} slack={slack}", + sol.z[i] + ); + } +} + +/// Heavily-reduced problem: a single QP that fires *four distinct* reductions +/// at once — a fixed variable (equality singleton), a free-column singleton +/// (substituted out), a dominated column (fixed to a bound), and a binding +/// inequality — collapsing 6 variables / 2 equalities to a tiny core. Presolve +/// + postsolve must recover the full primal AND dual (equality `y`, inequality +/// `z`, bound `z_lb`/`z_ub`), matching a direct no-presolve solve and the +/// original problem's KKT system. +#[test] +fn heavily_reduced_mixed_reductions_recovers_primal_and_dual() { + // vars: x0,x1,x2 (in P, solved by the IPM); x3 fixed by `x3 = 1`; + // x4 free singleton in `x0+x1+x4 = 4` (substituted); x5 dominated + // (only in the ≤ row with +1, cost ≥ 0, box [0,5]) → fixed to 0. + // The inequality x0 + x2 + x5 ≤ 3 binds at the optimum (nonzero z). + let prob = QpProblem { + n: 6, + p_lower: vec![ + Triplet::new(0, 0, 2.0), + Triplet::new(1, 1, 2.0), + Triplet::new(2, 2, 2.0), + ], + // x0 x1 x2 x3 x4 x5 + c: vec![-8.0, -2.0, -4.0, -3.0, 0.0, 0.5], + a: vec![ + Triplet::new(0, 3, 1.0), // x3 = 1 (fixed variable) + Triplet::new(1, 0, 1.0), + Triplet::new(1, 1, 1.0), + Triplet::new(1, 4, 1.0), // x0+x1+x4 = 4 (x4 free singleton) + ], + b: vec![1.0, 4.0], + g: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(0, 2, 1.0), + Triplet::new(0, 5, 1.0), // x0+x2+x5 ≤ 3 (x5 dominated) + ], + h: vec![3.0], + lb: vec![0.0, 0.0, 0.0, 0.0, NEG_INF, 0.0], + ub: vec![5.0, 5.0, 5.0, 5.0, POS_INF, 5.0], + }; + + // Presolve must fire all three structural reductions and shrink the core. + match presolve(&prob) { + PresolveOutcome::Reduced(ps) => { + let s = ps.stats(); + assert!(s.fixed_vars >= 1, "expected a fixed var, stats={s:?}"); + assert!( + s.free_col_singletons >= 1, + "expected a free-column singleton, stats={s:?}" + ); + assert!( + s.dominated_cols >= 1, + "expected a dominated column, stats={s:?}" + ); + assert!( + ps.reduced.n <= 3, + "core should collapse to ≤3 vars, got {}", + ps.reduced.n + ); + } + PresolveOutcome::Infeasible => panic!("expected Reduced, got Infeasible"), + PresolveOutcome::Unbounded => panic!("expected Reduced, got Unbounded"), + } + + let d = direct(&prob); + let p = with_presolve(&prob); + assert_eq!(d.status, QpStatus::Optimal); + assert_eq!(p.status, QpStatus::Optimal); + + // Full primal recovery (all six original variables, incl. substituted x4 + // and the fixed/dominated x3,x5). + assert_close(&p.x, &d.x, "x"); + assert!((p.obj - d.obj).abs() < TOL, "obj {} vs {}", p.obj, d.obj); + assert!((p.x[3] - 1.0).abs() < 1e-9, "x3 fixed: {}", p.x[3]); + assert!(p.x[5].abs() < 1e-6, "x5 dominated to 0: {}", p.x[5]); + + // Full dual recovery: equality multipliers, inequality multiplier, and the + // bound multipliers all match the direct solve… + assert_close(&p.y, &d.y, "y"); + assert_close(&p.z, &d.z, "z"); + assert_close(&p.z_lb, &d.z_lb, "z_lb"); + assert_close(&p.z_ub, &d.z_ub, "z_ub"); + // …and the recovered (x, y, z, z_lb, z_ub) is a KKT point of the ORIGINAL. + assert_original_kkt(&prob, &p, 1e-5); + // The inequality genuinely binds (a nonzero recovered multiplier). + assert!(p.z[0] > 1e-3, "inequality should bind, z={}", p.z[0]); + // The dominated column's bound multiplier is recovered nonzero. + assert!( + p.z_lb[5] > 1e-3, + "dominated-column bound dual should be nonzero, z_lb[5]={}", + p.z_lb[5] + ); +} + +/// Nothing to presolve → identity round-trip. Non-degenerate: the bound +/// that binds (x0 ≤ 1, with unconstrained optimum x0 = 3) has a clearly +/// nonzero multiplier, so the two solves agree well within tolerance. +#[test] +fn noop_presolve_roundtrip() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 2.0), Triplet::new(1, 1, 2.0)], + c: vec![-6.0, -4.0], // unconstrained opt (3, 2) + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), // x0 ≤ 1 (binds, mult ~4) + Triplet::new(1, 1, 1.0), // x1 ≤ 5 (inactive) + Triplet::new(2, 0, -1.0), // x0 ≥ 0 + Triplet::new(3, 1, -1.0), // x1 ≥ 0 + ], + h: vec![1.0, 5.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let d = direct(&prob); + let p = with_presolve(&prob); + assert_close(&p.x, &d.x, "x"); + assert_close(&p.z, &d.z, "z"); +} diff --git a/crates/pounce-convex/tests/qp_known_optima.rs b/crates/pounce-convex/tests/qp_known_optima.rs new file mode 100644 index 00000000..e163b747 --- /dev/null +++ b/crates/pounce-convex/tests/qp_known_optima.rs @@ -0,0 +1,197 @@ +//! Validation of the convex-QP interior-point solver against problems +//! with analytically known optima (Phase 2). Each test checks the +//! primal solution, the objective, and — where the optimum is interior +//! or the active set is known — the dual/KKT conditions. +//! +//! FERAL backs the augmented-system factorization so the IPM runs +//! end-to-end without an external linear solver. + +use pounce_convex::{solve_qp_ipm, QpOptions, QpProblem, QpStatus, Triplet}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +fn solve(prob: &QpProblem) -> pounce_convex::QpSolution { + let opts = QpOptions::default(); + solve_qp_ipm(prob, &opts, backend) +} + +/// min ½‖x − x*‖² , i.e. P = I, c = −x*, no constraints. Optimum x = x*. +#[test] +fn unconstrained_quadratic() { + // min ½(x0² + x1²) − 3 x0 − 4 x1 → optimum (3, 4), f* = −12.5 + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + c: vec![-3.0, -4.0], + a: vec![], + b: vec![], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob); + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + assert!((sol.x[0] - 3.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!((sol.x[1] - 4.0).abs() < 1e-6, "x1={}", sol.x[1]); + assert!((sol.obj - (-12.5)).abs() < 1e-6, "obj={}", sol.obj); +} + +/// Equality-constrained QP with a closed-form KKT solution. +/// min ½(x0² + x1²) s.t. x0 + x1 = 2. Optimum (1, 1), f* = 1, y = −1. +#[test] +fn equality_constrained_quadratic() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + c: vec![0.0, 0.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + b: vec![2.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob); + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + assert!((sol.x[0] - 1.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!((sol.x[1] - 1.0).abs() < 1e-6, "x1={}", sol.x[1]); + assert!((sol.obj - 1.0).abs() < 1e-6, "obj={}", sol.obj); +} + +/// Inequality-constrained QP where the constraint is active at optimum. +/// min ½(x0² + x1²) s.t. x0 + x1 ≥ 2 (written as −x0 − x1 ≤ −2). +/// Optimum (1, 1), f* = 1, active with z = 1. +#[test] +fn inequality_active_at_optimum() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + c: vec![0.0, 0.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, -1.0), Triplet::new(0, 1, -1.0)], + h: vec![-2.0], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob); + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + assert!((sol.x[0] - 1.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!((sol.x[1] - 1.0).abs() < 1e-6, "x1={}", sol.x[1]); + assert!((sol.obj - 1.0).abs() < 1e-6, "obj={}", sol.obj); + assert!( + sol.z[0] > 0.5, + "constraint should be active, z={}", + sol.z[0] + ); +} + +/// Inequality that is *inactive* at optimum: the unconstrained optimum +/// already satisfies it, so z → 0. +/// min ½((x0−3)² + (x1−4)²) s.t. x0 + x1 ≤ 100. Optimum (3, 4), z ≈ 0. +#[test] +fn inequality_inactive_at_optimum() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + c: vec![-3.0, -4.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + h: vec![100.0], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob); + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + assert!((sol.x[0] - 3.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!((sol.x[1] - 4.0).abs() < 1e-6, "x1={}", sol.x[1]); + assert!( + sol.z[0] < 1e-5, + "constraint should be inactive, z={}", + sol.z[0] + ); +} + +/// Bound-constrained QP: min ½(x0² + x1²) − 3 x0 − 4 x1 s.t. x0 ≤ 1. +/// Bounds are expressed as inequality rows. Optimum: x0 = 1 (bound +/// active), x1 = 4 (free). f* = ½(1+16) − 3 − 16 = 8.5 − 19 = −10.5. +#[test] +fn bound_constrained_quadratic() { + let prob = QpProblem { + n: 2, + p_lower: vec![Triplet::new(0, 0, 1.0), Triplet::new(1, 1, 1.0)], + c: vec![-3.0, -4.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, 1.0)], // x0 ≤ 1 + h: vec![1.0], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob); + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + assert!((sol.x[0] - 1.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!((sol.x[1] - 4.0).abs() < 1e-6, "x1={}", sol.x[1]); + assert!((sol.obj - (-10.5)).abs() < 1e-6, "obj={}", sol.obj); +} + +/// LP as the P = 0 case: min −x0 − x1 s.t. x0 ≤ 1, x1 ≤ 1, x ≥ 0. +/// Optimum (1, 1), f* = −2. +#[test] +fn lp_via_empty_hessian() { + let prob = QpProblem { + n: 2, + p_lower: vec![], // P = 0 → LP + c: vec![-1.0, -1.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), // x0 ≤ 1 + Triplet::new(1, 1, 1.0), // x1 ≤ 1 + Triplet::new(2, 0, -1.0), // −x0 ≤ 0 (x0 ≥ 0) + Triplet::new(3, 1, -1.0), // −x1 ≤ 0 (x1 ≥ 0) + ], + h: vec![1.0, 1.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob); + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + assert!((sol.x[0] - 1.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!((sol.x[1] - 1.0).abs() < 1e-6, "x1={}", sol.x[1]); + assert!((sol.obj - (-2.0)).abs() < 1e-6, "obj={}", sol.obj); +} + +/// Coupled Hessian (off-diagonal P term) with an equality constraint. +/// min ½(x0² + x1²) + x0 x1 s.t. x0 + x1 = 2 → wait, P = [[1,1],[1,1]] +/// is only PSD (singular). Use P = [[2,1],[1,2]] (PD): min ½ xᵀP x with +/// x0 + x1 = 2. Optimum is x0 = x1 = 1 by symmetry; f* = ½·(2+2+2)=3. +#[test] +fn coupled_hessian_equality() { + let prob = QpProblem { + n: 2, + p_lower: vec![ + Triplet::new(0, 0, 2.0), + Triplet::new(1, 0, 1.0), // off-diagonal (lower) + Triplet::new(1, 1, 2.0), + ], + c: vec![0.0, 0.0], + a: vec![Triplet::new(0, 0, 1.0), Triplet::new(0, 1, 1.0)], + b: vec![2.0], + g: vec![], + h: vec![], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob); + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + assert!((sol.x[0] - 1.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!((sol.x[1] - 1.0).abs() < 1e-6, "x1={}", sol.x[1]); + assert!((sol.obj - 3.0).abs() < 1e-6, "obj={}", sol.obj); +} diff --git a/crates/pounce-convex/tests/scaling_iterations.rs b/crates/pounce-convex/tests/scaling_iterations.rs new file mode 100644 index 00000000..56f5be90 --- /dev/null +++ b/crates/pounce-convex/tests/scaling_iterations.rs @@ -0,0 +1,70 @@ +//! Scaling regression: the convex-QP IPM's *iteration count* must stay +//! roughly flat as the problem grows — the defining property of a +//! healthy interior-point method. (Wall-clock growth is the shared +//! pounce-linsol factorization's concern, not the IPM's, so this test +//! guards iterations, not time.) +//! +//! A box-constrained tridiagonal convex QP is solved at sizes spanning +//! three orders of magnitude; the iteration count must not drift upward +//! with n. + +use pounce_convex::{solve_qp_ipm, QpOptions, QpProblem, QpStatus, Triplet}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +fn sparse_box_qp(n: usize) -> QpProblem { + let mut p_lower = Vec::with_capacity(2 * n); + for i in 0..n { + p_lower.push(Triplet::new(i, i, 4.0)); + if i > 0 { + p_lower.push(Triplet::new(i, i - 1, -1.0)); + } + } + let c: Vec = (0..n).map(|i| -2.0 - (i % 5) as f64).collect(); + let mut g = Vec::with_capacity(2 * n); + let mut h = Vec::with_capacity(2 * n); + for i in 0..n { + g.push(Triplet::new(2 * i, i, 1.0)); // x_i ≤ 1 + h.push(1.0); + g.push(Triplet::new(2 * i + 1, i, -1.0)); // −x_i ≤ 0 + h.push(0.0); + } + QpProblem { + n, + p_lower, + c, + a: vec![], + b: vec![], + g, + h, + lb: vec![], + ub: vec![], + } +} + +#[test] +fn iteration_count_is_flat_across_sizes() { + let mut counts = Vec::new(); + for &n in &[100usize, 1_000, 5_000] { + let sol = solve_qp_ipm(&sparse_box_qp(n), &QpOptions::default(), backend); + assert_eq!(sol.status, QpStatus::Optimal, "n={n} did not converge"); + counts.push(sol.iters); + } + // The iteration count for a well-behaved IPM grows at most very + // slowly (theoretically ~√n, in practice near-constant on these + // well-conditioned problems). Assert it never exceeds a small flat + // bound across 50× growth in n — catches a regression that ties + // iteration count to problem size. + for (i, &c) in counts.iter().enumerate() { + assert!(c <= 20, "size index {i}: {c} iters (expected flat, ≤20)"); + } + // And that it does not blow up 100→5000: at most a couple extra. + assert!( + counts[2] <= counts[0] + 3, + "iteration count drifted with size: {counts:?}" + ); +} diff --git a/crates/pounce-convex/tests/sdp_cone.rs b/crates/pounce-convex/tests/sdp_cone.rs new file mode 100644 index 00000000..2a08c12d --- /dev/null +++ b/crates/pounce-convex/tests/sdp_cone.rs @@ -0,0 +1,141 @@ +//! End-to-end semidefinite programs through the PSD cone (PR70 item D). +//! +//! `ConeSpec::Psd(n)` is the least-exercised symmetric cone at the *program* +//! level — the unit tests in `cones/psd.rs` cover the cone primitives (svec / +//! smat / projection / barrier), but nothing drives a full SDP through +//! `solve_socp_ipm`. These tests do, against problems with closed-form optima. +//! +//! svec convention (see `cones/psd.rs`): lower triangle, column by column — +//! `(0,0),(1,0),…,(n-1,0),(1,1),…`, with off-diagonal entries scaled by `√2` +//! so `⟨X,Y⟩_F = svec(X)·svec(Y)`. A program constrains the slack +//! `s = h − G x ∈ PSD`, so `s` must equal `svec(M(x))`. + +use pounce_convex::{solve_socp_ipm, ConeSpec, QpOptions, QpProblem, QpStatus, Triplet}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +fn opts() -> QpOptions { + QpOptions { + max_iter: 200, + ..QpOptions::default() + } +} + +const R2: f64 = std::f64::consts::SQRT_2; + +/// Minimum `t` such that `[[t, 1], [1, t]] ⪰ 0`. Eigenvalues are `t ± 1`, so +/// the matrix is PSD iff `t ≥ 1`; the optimum is `t = 1` (a rank-deficient, +/// on-the-boundary solution — the adversarial case for a PSD IPM). +#[test] +fn sdp_min_diagonal_psd_cone_2x2() { + // var: t (n=1). svec(M(t)) = (t, √2·1, t). s = h − G t ∈ PSD₂. + // s0 = M00 = t -> h0=0, G(0,0) = −1 + // s1 = √2·M10 = √2 -> h1=√2, G row absent + // s2 = M11 = t -> h2=0, G(2,0) = −1 + let prob = QpProblem { + n: 1, + p_lower: vec![], + c: vec![1.0], // min t + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, -1.0), Triplet::new(2, 0, -1.0)], + h: vec![0.0, R2, 0.0], + lb: vec![], + ub: vec![], + }; + let sol = solve_socp_ipm(&prob, &[ConeSpec::Psd(2)], &opts(), backend); + assert_eq!(sol.status, QpStatus::Optimal, "status {:?}", sol.status); + assert!((sol.x[0] - 1.0).abs() < 1e-5, "t = {} (want 1)", sol.x[0]); + assert!((sol.obj - 1.0).abs() < 1e-5, "obj = {} (want 1)", sol.obj); +} + +/// Maximum-eigenvalue SDP: `min t s.t. t·I − A ⪰ 0` gives `t = λ_max(A)`. +/// For `A = [[2, 1], [1, 2]]`, `λ_max = 3`. This exercises a non-trivial +/// constant matrix in the constraint and a known spectral optimum. +#[test] +fn sdp_max_eigenvalue_psd_cone() { + // var: t (n=1). M(t) = t·I − A = [[t−2, −1], [−1, t−2]]. + // svec(M) = (t−2, √2·(−1), t−2). s = h − G t ∈ PSD₂. + // s0 = t − 2 -> h0=−2, G(0,0) = −1 + // s1 = −√2 -> h1=−√2, G row absent + // s2 = t − 2 -> h2=−2, G(2,0) = −1 + let prob = QpProblem { + n: 1, + p_lower: vec![], + c: vec![1.0], + a: vec![], + b: vec![], + g: vec![Triplet::new(0, 0, -1.0), Triplet::new(2, 0, -1.0)], + h: vec![-2.0, -R2, -2.0], + lb: vec![], + ub: vec![], + }; + let sol = solve_socp_ipm(&prob, &[ConeSpec::Psd(2)], &opts(), backend); + assert_eq!(sol.status, QpStatus::Optimal, "status {:?}", sol.status); + assert!( + (sol.x[0] - 3.0).abs() < 1e-5, + "λ_max = {} (want 3)", + sol.x[0] + ); +} + +/// Infeasibility honesty on the PSD cone: require both `[[t,2],[2,t]] ⪰ 0` +/// (needs `t ≥ 2`) and `t ≤ 1`. Empty feasible set — the solver must NOT +/// report a false optimum. +/// +/// LIMITATION (PR70 item D finding): unlike the orthant path — which returns a +/// clean `PrimalInfeasible` Farkas certificate — the symmetric HSDE driver here +/// hits a KKT factorization breakdown (`NumericalFailure`) near the PSD cone +/// boundary *before* the embedding drives τ→0 far enough to extract the +/// certificate. That is a robustness gap, not a wrong-answer bug: the +/// safety-critical property (never a confident wrong `Optimal`) still holds, so +/// we assert exactly that. Tighten to `== PrimalInfeasible` once PSD +/// infeasibility certification is hardened. +#[test] +fn sdp_infeasible_psd_cone_never_reports_optimal() { + // var: t (n=1). Rows 0..3: svec of [[t,2],[2,t]] ∈ PSD₂. Row 3: t ≤ 1. + // s0 = t -> h0=0, G(0,0) = −1 + // s1 = 2√2 -> h1=2√2, G row absent + // s2 = t -> h2=0, G(2,0) = −1 + // s3 = 1 − t ≥ 0 (Nonneg) -> h3=1, G(3,0) = 1 + let prob = QpProblem { + n: 1, + p_lower: vec![], + c: vec![1.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, -1.0), + Triplet::new(2, 0, -1.0), + Triplet::new(3, 0, 1.0), + ], + h: vec![0.0, 2.0 * R2, 0.0, 1.0], + lb: vec![], + ub: vec![], + }; + let sol = solve_socp_ipm( + &prob, + &[ConeSpec::Psd(2), ConeSpec::Nonneg(1)], + &opts(), + backend, + ); + // Safety property: an empty feasible set must never be reported as solved. + assert_ne!( + sol.status, + QpStatus::Optimal, + "infeasible SDP must not report Optimal" + ); + // With the cone-aware Farkas check (the multiplier `z` is validated against + // the actual PSD/orthant dual cone, not merely componentwise), the + // infeasible SDP now yields the clean `PrimalInfeasible` certificate. + assert_eq!( + sol.status, + QpStatus::PrimalInfeasible, + "expected a PrimalInfeasible Farkas certificate, got {:?}", + sol.status + ); +} diff --git a/crates/pounce-convex/tests/socp.rs b/crates/pounce-convex/tests/socp.rs new file mode 100644 index 00000000..70d3cb92 --- /dev/null +++ b/crates/pounce-convex/tests/socp.rs @@ -0,0 +1,290 @@ +//! End-to-end SOCP validation (Phase 2b of the SOCP extension). +//! +//! There's no external reference here: correctness is **intrinsic**. The +//! IPM only reports `Optimal` when the *unregularized* KKT residual +//! (stationarity, `Ax=b`, `s=h−Gx`, `μ=⟨s,z⟩/2 → 0`) is below tolerance, +//! with `s,z` kept inside the cone by the fraction-to-boundary step — so a +//! convergent solve is a verified KKT point. We additionally check the +//! recovered solution against the SOCP KKT conditions and, where the +//! optimum is known in closed form, the primal. + +use pounce_convex::{ + solve_socp_ipm, solve_socp_ipm_warm, ConeSpec, QpOptions, QpProblem, QpStatus, QpWarmStart, + Triplet, +}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +fn solve(prob: &QpProblem, cones: &[ConeSpec]) -> pounce_convex::QpSolution { + let mut opts = QpOptions::default(); + opts.max_iter = 100; + solve_socp_ipm(prob, cones, &opts, backend) +} + +/// In-cone test for a second-order cone block: `u₀ ≥ ‖u_{1..}‖`. +fn in_soc(u: &[f64], tol: f64) -> bool { + let tail: f64 = u[1..].iter().map(|v| v * v).sum::().sqrt(); + u[0] + tol >= tail +} + +/// Assert the SOCP KKT conditions for a single SOC inequality block (the +/// whole `m_ineq` is one cone here): `s = h−Gx ∈ K`, `z ∈ K`, `sᵀz ≈ 0`, +/// `Ax=b`, and stationarity `Px+c+Aᵀy+Gᵀz = 0`. +fn assert_socp_kkt(prob: &QpProblem, sol: &pounce_convex::QpSolution, tol: f64) { + let n = prob.n; + let mi = prob.m_ineq(); + // s = h − Gx. + let mut gx = vec![0.0; mi]; + prob.g_mul(&sol.x, &mut gx); + let s: Vec = (0..mi).map(|i| prob.h[i] - gx[i]).collect(); + assert!(in_soc(&s, tol), "s = h−Gx not in cone: {s:?}"); + assert!(in_soc(&sol.z, tol), "z not in cone: {:?}", sol.z); + let sz: f64 = s.iter().zip(&sol.z).map(|(a, b)| a * b).sum(); + assert!(sz.abs() < tol, "complementarity sᵀz = {sz}"); + // Ax = b. + let mut ax = vec![0.0; prob.m_eq()]; + prob.a_mul(&sol.x, &mut ax); + for (i, (&axi, &bi)) in ax.iter().zip(&prob.b).enumerate() { + assert!((axi - bi).abs() < tol, "Ax=b row {i}: {axi} vs {bi}"); + } + // Stationarity Px + c + Aᵀy + Gᵀz = 0. + let mut g = prob.c.clone(); + prob.p_mul(&sol.x, &mut g); + prob.at_mul(&sol.y, &mut g); + prob.gt_mul(&sol.z, &mut g); + for i in 0..n { + assert!(g[i].abs() < tol, "stationarity[{i}] = {}", g[i]); + } +} + +/// min t s.t. t ≥ ‖x − x*‖ (i.e. minimize the norm to a point), encoded +/// with one second-order cone. Optimum: t* = 0, x = x*. We add the cone +/// rows `(t; x − x*) ∈ K` as `h − G·[t,x] ∈ K`. +#[test] +fn min_norm_to_point_socp() { + // vars: [t, x0, x1]. Cone: (t, x0 − a, x1 − b) ∈ SOC(3). + // s = h − G v ∈ K means: s0 = t, s1 = x0 − a, s2 = x1 − b. + // So G v = (−t, −x0, −x1) and h = (0, −a, −b) ⇒ s = (t, x0−a, x1−b). + let (a, b) = (2.0, -1.0); + let prob = QpProblem { + n: 3, + p_lower: vec![], // LP objective: minimize t + c: vec![1.0, 0.0, 0.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, -1.0), + Triplet::new(1, 1, -1.0), + Triplet::new(2, 2, -1.0), + ], + h: vec![0.0, -a, -b], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob, &[ConeSpec::SecondOrder(3)]); + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + // t* = 0, x = (a, b). + assert!(sol.x[0].abs() < 1e-6, "t={}", sol.x[0]); + assert!((sol.x[1] - a).abs() < 1e-6, "x0={}", sol.x[1]); + assert!((sol.x[2] - b).abs() < 1e-6, "x1={}", sol.x[2]); + assert_socp_kkt(&prob, &sol, 1e-6); +} + +/// Minimize a linear cost over a second-order cone with an equality: +/// min −x1 s.t. x0 = 1, (x0, x1, x2) ∈ SOC(3). +/// With x0 = 1, the cone is ‖(x1,x2)‖ ≤ 1; minimizing −x1 ⇒ x1 = 1, x2 = 0. +#[test] +fn linear_over_soc_with_equality() { + // vars [x0, x1, x2]; cone (x0,x1,x2) ∈ K ⇒ s = G·(−I)·x ... encode + // s = x directly: h = 0, G = −I ⇒ s = −Gx = x. Equality x0 = 1. + let prob = QpProblem { + n: 3, + p_lower: vec![], + c: vec![0.0, -1.0, 0.0], + a: vec![Triplet::new(0, 0, 1.0)], + b: vec![1.0], + g: vec![ + Triplet::new(0, 0, -1.0), + Triplet::new(1, 1, -1.0), + Triplet::new(2, 2, -1.0), + ], + h: vec![0.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob, &[ConeSpec::SecondOrder(3)]); + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + assert!((sol.x[0] - 1.0).abs() < 1e-6, "x0={}", sol.x[0]); + assert!((sol.x[1] - 1.0).abs() < 1e-6, "x1={}", sol.x[1]); + assert!(sol.x[2].abs() < 1e-6, "x2={}", sol.x[2]); + assert_socp_kkt(&prob, &sol, 1e-6); +} + +/// A convex-QP objective over a second-order cone: project a point onto +/// the cone. min ½‖x − p‖² s.t. x ∈ SOC(3), with p outside the cone. +#[test] +fn projection_onto_soc_qp() { + // P = I, c = −p ⇒ ½‖x‖² − pᵀx = ½‖x−p‖² − const. x ∈ K via s = x. + let p = [1.0, 2.0, 0.0]; // ‖(2,0)‖ = 2 > 1 ⇒ p outside the cone + let prob = QpProblem { + n: 3, + p_lower: vec![ + Triplet::new(0, 0, 1.0), + Triplet::new(1, 1, 1.0), + Triplet::new(2, 2, 1.0), + ], + c: vec![-p[0], -p[1], -p[2]], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, -1.0), + Triplet::new(1, 1, -1.0), + Triplet::new(2, 2, -1.0), + ], + h: vec![0.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob, &[ConeSpec::SecondOrder(3)]); + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + // The Euclidean projection of (1,2,0) onto the SOC has the closed form + // for a point with t < ‖x₁‖: scale = (‖x₁‖+t)/(2‖x₁‖); proj = + // scale·(‖x₁‖, x₁). Here t=1, ‖x₁‖=2 ⇒ scale = 3/4 ⇒ (1.5, 1.5, 0). + assert!((sol.x[0] - 1.5).abs() < 1e-5, "x0={}", sol.x[0]); + assert!((sol.x[1] - 1.5).abs() < 1e-5, "x1={}", sol.x[1]); + assert!(sol.x[2].abs() < 1e-5, "x2={}", sol.x[2]); + assert_socp_kkt(&prob, &sol, 1e-6); +} + +/// SOC warm start: from a nearby SOCP's solution, the warm solve reaches +/// the same KKT point (the projection onto the cone) and takes no more +/// iterations than cold. Exercises the SOC `λ_min` recentering. +#[test] +fn soc_warm_start_matches_cold() { + let base = QpProblem { + n: 3, + p_lower: (0..3).map(|i| Triplet::new(i, i, 1.0)).collect(), + c: vec![-1.0, -2.0, 0.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, -1.0), + Triplet::new(1, 1, -1.0), + Triplet::new(2, 2, -1.0), + ], + h: vec![0.0, 0.0, 0.0], + lb: vec![], + ub: vec![], + }; + let cones = [ConeSpec::SecondOrder(3)]; + let opts = QpOptions::default(); + let base_sol = solve_socp_ipm(&base, &cones, &opts, backend); + assert_eq!(base_sol.status, QpStatus::Optimal); + + // Perturb the target slightly. + let mut pert = base.clone(); + pert.c = vec![-1.1, -1.9, 0.05]; + let cold = solve_socp_ipm(&pert, &cones, &opts, backend); + let warm = solve_socp_ipm_warm( + &pert, + &cones, + &QpWarmStart::from_solution(&base_sol), + &opts, + backend, + ); + assert_eq!(warm.status, QpStatus::Optimal); + for i in 0..3 { + assert!( + (cold.x[i] - warm.x[i]).abs() < 1e-6, + "x[{i}]: cold={} warm={}", + cold.x[i], + warm.x[i] + ); + } + assert_socp_kkt(&pert, &warm, 1e-6); + // SOC warm restarts the duals centered (stable), so the win is from + // the primal proximity; it must not regress vs cold. + assert!( + warm.iters <= cold.iters, + "warm {} cold {}", + warm.iters, + cold.iters + ); +} + +/// A larger second-order cone (dim 12) — exercises the sparse +/// diagonal-plus-rank-1 KKT representation (one auxiliary variable carries +/// the rank-1 update; the `(z,z)` block stays diagonal instead of dense). +/// Projection of a point outside the cone has a known closed form. +#[test] +fn larger_soc_projection_sparse_kkt() { + let m = 12; + // p = (t, x₁) with t < ‖x₁‖ ⇒ outside the cone. Project: + // scale = (‖x₁‖+t)/(2‖x₁‖); proj = scale·(‖x₁‖, x₁). + let mut p = vec![1.0; m]; + p[0] = 1.0; // t + let nx: f64 = p[1..].iter().map(|v| v * v).sum::().sqrt(); // ‖x₁‖ + let scale = (nx + p[0]) / (2.0 * nx); + let mut expect = vec![0.0; m]; + expect[0] = scale * nx; + for k in 1..m { + expect[k] = scale * p[k]; + } + + let prob = QpProblem { + n: m, + p_lower: (0..m).map(|i| Triplet::new(i, i, 1.0)).collect(), + c: p.iter().map(|v| -v).collect(), + a: vec![], + b: vec![], + g: (0..m).map(|i| Triplet::new(i, i, -1.0)).collect(), + h: vec![0.0; m], + lb: vec![], + ub: vec![], + }; + let opts = QpOptions::default(); + let sol = solve_socp_ipm(&prob, &[ConeSpec::SecondOrder(m)], &opts, backend); + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + for k in 0..m { + assert!( + (sol.x[k] - expect[k]).abs() < 1e-5, + "x[{k}]={} want {}", + sol.x[k], + expect[k] + ); + } + assert_socp_kkt(&prob, &sol, 1e-6); +} + +/// Mixed cone: a nonnegative-orthant block and a second-order block in one +/// problem (exercises the composite KKT assembly with both shapes). +/// min −x0 − x1 s.t. x0 ≤ 1 (orthant), (1, x1) ∈ SOC(2) ⇒ |x1| ≤ 1. +#[test] +fn mixed_orthant_and_soc() { + // rows: [orthant] 1 − x0 ≥ 0 ; [soc dim 2] s = (1, x1) with s0=1≥|x1|. + // s_orth = h0 − G0·x = 1 − x0 (need ≥ 0). + // s_soc = (h1 − G1 x, h2 − G2 x) = (1, x1): row1 = 0·x + h=1, row2 = −x1+0. + let prob = QpProblem { + n: 2, + p_lower: vec![], + c: vec![-1.0, -1.0], + a: vec![], + b: vec![], + g: vec![ + Triplet::new(0, 0, 1.0), // orthant: 1 − x0 ≥ 0 + Triplet::new(2, 1, -1.0), // soc row 2: s2 = h2 − (−x1) = x1 + ], + h: vec![1.0, 1.0, 0.0], + lb: vec![], + ub: vec![], + }; + let sol = solve(&prob, &[ConeSpec::Nonneg(1), ConeSpec::SecondOrder(2)]); + assert_eq!(sol.status, QpStatus::Optimal, "iters={}", sol.iters); + // max x0 + x1 with x0 ≤ 1, |x1| ≤ 1 ⇒ x0 = 1, x1 = 1. + assert!((sol.x[0] - 1.0).abs() < 1e-5, "x0={}", sol.x[0]); + assert!((sol.x[1] - 1.0).abs() < 1e-5, "x1={}", sol.x[1]); +} diff --git a/crates/pounce-convex/tests/warm_start.rs b/crates/pounce-convex/tests/warm_start.rs new file mode 100644 index 00000000..37ae23f7 --- /dev/null +++ b/crates/pounce-convex/tests/warm_start.rs @@ -0,0 +1,278 @@ +//! Warm-start tests for the convex-QP interior-point solver. +//! +//! Warm starting an IPM is subtle: a converged solution sits on the +//! complementarity boundary, the worst place to restart. The solver's +//! Mehrotra-style recentering ([`QpWarmStart`]) keeps the warm primal but +//! pushes the slacks/multipliers back into the interior. These tests check +//! two things: +//! +//! 1. **Correctness** — a warm-started solve reaches the *same* optimum as +//! a cold solve (the start cannot change the KKT point it converges to). +//! 2. **Benefit** — on a nearby problem, warm starting takes no more +//! iterations than cold (and typically fewer). + +use pounce_convex::{ + solve_qp_batch_parallel, solve_qp_batch_parallel_warm, solve_qp_ipm, solve_qp_ipm_warm, + QpFactorization, QpOptions, QpProblem, QpStatus, QpWarmStart, Triplet, +}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +/// A box-constrained QP `min ½·2‖x‖² + cᵀx s.t. 0 ≤ x ≤ 5` (P = 2I). +fn box_qp(c: &[f64]) -> QpProblem { + let n = c.len(); + QpProblem { + n, + p_lower: (0..n).map(|i| Triplet::new(i, i, 2.0)).collect(), + c: c.to_vec(), + a: vec![], + b: vec![], + g: vec![], + h: vec![], + lb: vec![0.0; n], + ub: vec![5.0; n], + } +} + +/// An inequality-constrained QP `min ½·2‖x‖² + cᵀx s.t. Σx ≤ cap`. +fn capped_qp(c: &[f64], cap: f64) -> QpProblem { + let n = c.len(); + QpProblem { + n, + p_lower: (0..n).map(|i| Triplet::new(i, i, 2.0)).collect(), + c: c.to_vec(), + a: vec![], + b: vec![], + g: (0..n).map(|i| Triplet::new(0, i, 1.0)).collect(), + h: vec![cap], + lb: vec![], + ub: vec![], + } +} + +#[test] +fn warm_start_matches_cold_solution() { + let opts = QpOptions::default(); + // Solve a base problem, then warm-start a perturbed one from it. + let base = capped_qp(&[-1.0, -2.0, -0.5], 1.0); + let base_sol = solve_qp_ipm(&base, &opts, backend); + assert_eq!(base_sol.status, QpStatus::Optimal); + + let pert = capped_qp(&[-1.2, -1.8, -0.6], 1.1); + let cold = solve_qp_ipm(&pert, &opts, backend); + let warm = solve_qp_ipm_warm( + &pert, + &opts, + &QpWarmStart::from_solution(&base_sol), + backend, + ); + + assert_eq!(cold.status, QpStatus::Optimal); + assert_eq!(warm.status, QpStatus::Optimal); + // Same primal, dual, and objective regardless of the start. + for i in 0..pert.n { + assert!( + (cold.x[i] - warm.x[i]).abs() < 1e-6, + "x[{i}]: cold={} warm={}", + cold.x[i], + warm.x[i] + ); + } + assert!((cold.obj - warm.obj).abs() < 1e-6); + assert!((cold.z[0] - warm.z[0]).abs() < 1e-6); +} + +#[test] +fn warm_start_matches_cold_with_bounds() { + let opts = QpOptions::default(); + let base = box_qp(&[-3.0, 6.0, -10.0]); // mixes interior, lower, upper + let base_sol = solve_qp_ipm(&base, &opts, backend); + assert_eq!(base_sol.status, QpStatus::Optimal); + + let pert = box_qp(&[-3.5, 5.5, -9.0]); + let cold = solve_qp_ipm(&pert, &opts, backend); + let warm = solve_qp_ipm_warm( + &pert, + &opts, + &QpWarmStart::from_solution(&base_sol), + backend, + ); + + assert_eq!(warm.status, QpStatus::Optimal); + for i in 0..pert.n { + assert!( + (cold.x[i] - warm.x[i]).abs() < 1e-6, + "x[{i}]: cold={} warm={}", + cold.x[i], + warm.x[i] + ); + assert!((cold.z_lb[i] - warm.z_lb[i]).abs() < 1e-6); + assert!((cold.z_ub[i] - warm.z_ub[i]).abs() < 1e-6); + } +} + +#[test] +fn warm_start_reduces_iterations_on_nearby_problem() { + // This test isolates the *warm-start mechanism*, so it holds the problem + // conditioning fixed by disabling equilibration. Ruiz equilibration is an + // independent iteration-count improvement; on a problem this small and + // well-scaled it makes the cold solve converge so well (here, 7 iters) that + // it absorbs the warm-start margin, conflating the two effects. The + // equilibrated warm path is exercised by `parallel_batch_warm_*`. + let opts = QpOptions { + equilibrate: false, + ..QpOptions::default() + }; + // Larger problem so the iteration difference is meaningful. + let n = 30; + let c0: Vec = (0..n).map(|i| -1.0 - (i as f64) * 0.1).collect(); + let base = capped_qp(&c0, 5.0); + let base_sol = solve_qp_ipm(&base, &opts, backend); + assert_eq!(base_sol.status, QpStatus::Optimal); + + // A small perturbation of c and the cap. + let c1: Vec = c0.iter().map(|v| v * 1.02).collect(); + let pert = capped_qp(&c1, 5.1); + + let cold = solve_qp_ipm(&pert, &opts, backend); + let warm = solve_qp_ipm_warm( + &pert, + &opts, + &QpWarmStart::from_solution(&base_sol), + backend, + ); + assert_eq!(cold.status, QpStatus::Optimal); + assert_eq!(warm.status, QpStatus::Optimal); + + // The warm start should not need more iterations than cold; for a + // perturbation this small it should need strictly fewer. + assert!( + warm.iters <= cold.iters, + "warm should not regress: warm={} cold={}", + warm.iters, + cold.iters + ); + assert!( + warm.iters < cold.iters, + "warm should beat cold on a nearby problem: warm={} cold={}", + warm.iters, + cold.iters + ); +} + +#[test] +fn factorization_solve_warm_combines_reuse_and_warm() { + let opts = QpOptions::default(); + let base = capped_qp(&[-1.0, -2.0, -0.5, -1.5], 2.0); + let base_sol = solve_qp_ipm(&base, &opts, backend); + + // Build-once / solve-many handle; warm-start a same-structure solve. + let mut handle = QpFactorization::build(&base, &opts, backend).expect("factor builds"); + let pert = capped_qp(&[-1.1, -1.9, -0.4, -1.6], 2.1); + let warm = handle.solve_warm(&pert, &QpWarmStart::from_solution(&base_sol)); + let cold = solve_qp_ipm(&pert, &opts, backend); + + assert_eq!(warm.status, QpStatus::Optimal); + for i in 0..pert.n { + assert!( + (cold.x[i] - warm.x[i]).abs() < 1e-6, + "x[{i}]: cold={} warm={}", + cold.x[i], + warm.x[i] + ); + } +} + +#[test] +fn primal_only_warm_start_is_accepted() { + // A warm start carrying only the primal `x` (cold `y`/`z`) still seeds + // the solve and reaches the right optimum — this is the mode the JAX + // differentiable layer uses, where only the primal is returned. + let opts = QpOptions::default(); + let base = capped_qp(&[-1.0, -2.0, -0.5], 1.0); + let base_sol = solve_qp_ipm(&base, &opts, backend); + + let pert = capped_qp(&[-1.1, -1.9, -0.55], 1.05); + let primal_only = QpWarmStart { + x: base_sol.x.clone(), + y: Vec::new(), + z: Vec::new(), + z_lb: Vec::new(), + z_ub: Vec::new(), + }; + let warm = solve_qp_ipm_warm(&pert, &opts, &primal_only, backend); + let cold = solve_qp_ipm(&pert, &opts, backend); + assert_eq!(warm.status, QpStatus::Optimal); + for i in 0..pert.n { + assert!((cold.x[i] - warm.x[i]).abs() < 1e-6); + } +} + +#[test] +fn parallel_batch_warm_matches_cold_and_helps() { + let opts = QpOptions::default(); + // A batch of base problems, then a perturbed batch warm-started from + // the base solutions. + let base: Vec = (0..6) + .map(|k| capped_qp(&[-1.0 - 0.1 * k as f64, -2.0, -0.5], 1.0)) + .collect(); + let base_sols = solve_qp_batch_parallel(&base, &opts, backend); + + let pert: Vec = (0..6) + .map(|k| capped_qp(&[-1.05 - 0.1 * k as f64, -1.95, -0.55], 1.05)) + .collect(); + let warms: Vec = base_sols.iter().map(QpWarmStart::from_solution).collect(); + + let cold = solve_qp_batch_parallel(&pert, &opts, backend); + let warm = solve_qp_batch_parallel_warm(&pert, &warms, &opts, backend); + + assert_eq!(cold.len(), 6); + assert_eq!(warm.len(), 6); + for k in 0..6 { + assert_eq!(warm[k].status, QpStatus::Optimal); + for i in 0..pert[k].n { + assert!( + (cold[k].x[i] - warm[k].x[i]).abs() < 1e-6, + "batch[{k}] x[{i}]: cold={} warm={}", + cold[k].x[i], + warm[k].x[i] + ); + } + // Per-instance warm start should not regress iterations. + assert!( + warm[k].iters <= cold[k].iters, + "batch[{k}] iters: warm={} cold={}", + warm[k].iters, + cold[k].iters + ); + } +} + +#[test] +#[should_panic(expected = "must equal")] +fn parallel_batch_warm_mismatched_lengths_panics() { + let opts = QpOptions::default(); + let probs = vec![capped_qp(&[-1.0, -2.0, -0.5], 1.0)]; + let warms: Vec = Vec::new(); // wrong length + let _ = solve_qp_batch_parallel_warm(&probs, &warms, &opts, backend); +} + +#[test] +fn stale_warm_start_dims_fall_back_to_cold() { + let opts = QpOptions::default(); + let prob = capped_qp(&[-1.0, -2.0, -0.5], 1.0); + // A warm start with the wrong dimensions must be ignored, not crash. + let bogus = QpWarmStart { + x: vec![0.0; 7], + y: vec![], + z: vec![0.0; 3], + z_lb: vec![], + z_ub: vec![], + }; + let sol = solve_qp_ipm_warm(&prob, &opts, &bogus, backend); + assert_eq!(sol.status, QpStatus::Optimal); +} diff --git a/crates/pounce-feral/src/lib.rs b/crates/pounce-feral/src/lib.rs index 9944bb1d..f3053c79 100644 --- a/crates/pounce-feral/src/lib.rs +++ b/crates/pounce-feral/src/lib.rs @@ -156,6 +156,16 @@ pub struct FeralConfig { /// regression testing). See `feral/src/scaling/mod.rs:: /// ScalingStrategy` for the per-variant rationale. pub scaling: ScalingStrategy, + /// Per-backend internal-parallelism toggle (tri-state). `None` (the + /// default) leaves feral's `Solver` at its own default and lets the + /// legacy `FERAL_PARALLEL` env var still force serial; `Some(false)` + /// builds an explicitly **serial** factor; `Some(true)` forces feral's + /// internal rayon parallelism on. This is the first-class lever for + /// outer-parallel / inner-serial batched solving — each rayon worker + /// builds its own `Some(false)` backend, with no global state (pounce + /// issue #79). feral reads `Solver::use_parallel` fresh on every + /// `factor()`, so two backends with different settings never interfere. + pub parallel: Option, } impl Default for FeralConfig { @@ -171,6 +181,7 @@ impl Default for FeralConfig { pivtol: 1e-8, ordering: OrderingMethod::Auto, scaling: ScalingStrategy::Auto, + parallel: None, } } } @@ -215,6 +226,11 @@ impl FeralConfig { .as_deref() .and_then(parse_scaling_strategy) .unwrap_or(ScalingStrategy::Auto), + // Left `None` so the legacy `FERAL_PARALLEL` env var still acts + // as the fallback serial switch in `with_config`; callers that + // want an explicit per-backend setting use `FeralConfig.parallel` + // directly (e.g. `FeralSolverInterface::serial`). + parallel: None, } } } @@ -261,6 +277,18 @@ impl FeralSolverInterface { Self::with_config(FeralConfig::from_env()) } + /// Construct a backend with feral's internal parallelism **disabled** + /// (inheriting all other env-driven config). Each rayon worker in an + /// outer-parallel / inner-serial batch builds one of these directly, so + /// the only parallelism is across instances — no global `FERAL_PARALLEL` + /// mutation (pounce issue #79). + pub fn serial() -> Self { + Self::with_config(FeralConfig { + parallel: Some(false), + ..FeralConfig::from_env() + }) + } + /// Construct with explicit configuration. Cascade-break /// (`ratio=0.5, eps=1e-10`) was off by default in pounce for a /// period after the issue-17/issue-18 inertia investigations, @@ -320,11 +348,20 @@ impl FeralSolverInterface { } } let mut solver = Solver::with_params(np, SupernodeParams::default()); - if matches!( - std::env::var("FERAL_PARALLEL").as_deref(), - Ok("0") | Ok("false") | Ok("off") - ) { - solver = solver.with_parallel(false); + // Internal-parallelism toggle. An explicit `cfg.parallel` is the + // primary, per-backend lever (no global state); when unset, fall + // back to the legacy process-wide `FERAL_PARALLEL` env var for + // backward compatibility. + match cfg.parallel { + Some(p) => solver = solver.with_parallel(p), + None => { + if matches!( + std::env::var("FERAL_PARALLEL").as_deref(), + Ok("0") | Ok("false") | Ok("off") + ) { + solver = solver.with_parallel(false); + } + } } if cfg.fma { solver = solver.with_fma(true); @@ -834,6 +871,39 @@ mod tests { } } + /// Issue #79: the first-class per-backend `parallel` toggle builds a + /// serial factor without touching any global state, and its result is + /// bit-identical to the parallel driver (feral guarantees parity). + #[test] + fn per_backend_parallel_toggle_serial_matches_parallel() { + let irn: [Index; 3] = [1, 2, 2]; + let jcn: [Index; 3] = [1, 1, 2]; + let solve = |mut s: FeralSolverInterface| -> [f64; 2] { + assert_eq!( + s.initialize_structure(2, 3, &irn, &jcn), + ESymSolverStatus::Success + ); + s.values_array_mut().copy_from_slice(&[2.0, 1.0, 3.0]); + let mut rhs = [3.0, 4.0]; + assert_eq!( + s.multi_solve(true, &irn, &jcn, 1, &mut rhs, false, 0), + ESymSolverStatus::Success + ); + rhs + }; + let par = solve(FeralSolverInterface::with_config(FeralConfig { + parallel: Some(true), + ..FeralConfig::default() + })); + let ser = solve(FeralSolverInterface::serial()); + // [[2,1],[1,3]] x = [3,4] ⇒ x = [1, 1], same both ways. + assert!((par[0] - 1.0).abs() < 1e-12 && (par[1] - 1.0).abs() < 1e-12); + assert_eq!( + par, ser, + "serial and parallel factors must agree bit-for-bit" + ); + } + /// Pounce emits some symmetric entries as upper-triangle /// `(i, j)` with `i < j` because MA57 accepts either half. The /// FERAL wrapper must canonicalize to lower triangle (row >= col) diff --git a/crates/pounce-sensitivity/src/eigen.rs b/crates/pounce-linalg/src/eigen.rs similarity index 100% rename from crates/pounce-sensitivity/src/eigen.rs rename to crates/pounce-linalg/src/eigen.rs diff --git a/crates/pounce-linalg/src/lib.rs b/crates/pounce-linalg/src/lib.rs index 477be042..078c29fc 100644 --- a/crates/pounce-linalg/src/lib.rs +++ b/crates/pounce-linalg/src/lib.rs @@ -14,6 +14,7 @@ pub mod dense_gen_matrix; pub mod dense_sym_matrix; pub mod dense_vector; pub mod diag_matrix; +pub mod eigen; pub mod expansion_matrix; pub mod low_rank_update_sym_matrix; pub mod matrix; @@ -34,6 +35,7 @@ pub use dense_gen_matrix::{DenseGenMatrix, DenseGenMatrixSpace}; pub use dense_sym_matrix::{DenseSymMatrix, DenseSymMatrixSpace}; pub use dense_vector::{DenseVector, DenseVectorSpace}; pub use diag_matrix::DiagMatrix; +pub use eigen::symmetric_eigen; pub use expansion_matrix::{ExpansionMatrix, ExpansionMatrixSpace}; pub use low_rank_update_sym_matrix::{LowRankUpdateSymMatrix, LowRankUpdateSymMatrixSpace}; pub use matrix::{Matrix, MatrixCache, SymMatrix}; diff --git a/crates/pounce-py/Cargo.toml b/crates/pounce-py/Cargo.toml index f515142d..beb7a9e9 100644 --- a/crates/pounce-py/Cargo.toml +++ b/crates/pounce-py/Cargo.toml @@ -31,6 +31,7 @@ pounce-nlp.workspace = true pounce-nl.workspace = true pounce-algorithm.workspace = true pounce-qp.workspace = true +pounce-convex.workspace = true pounce-restoration.workspace = true pounce-feral.workspace = true pounce-linsol.workspace = true diff --git a/crates/pounce-py/src/lib.rs b/crates/pounce-py/src/lib.rs index acb8d584..25444bf6 100644 --- a/crates/pounce-py/src/lib.rs +++ b/crates/pounce-py/src/lib.rs @@ -21,12 +21,15 @@ use pyo3::prelude::*; mod nl_problem; mod problem; +mod qp; mod solver; +mod sos; mod tnlp_bridge; mod warm_start; pub use nl_problem::{read_nl, PyNlProblem}; pub use problem::PyProblem; +pub use qp::{PyQpFactorization, PyQpProblem, PyQpSensitivity}; pub use solver::PySolver; /// Python module entry point. The crate name (`_pounce`) and the @@ -43,6 +46,16 @@ fn _pounce(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_function(wrap_pyfunction!(read_nl, m)?)?; m.add_function(wrap_pyfunction!(warm_start::classify_working_set, m)?)?; + // Convex LP/QP solver (pounce-convex) bindings. + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_function(wrap_pyfunction!(qp::solve_qp, m)?)?; + m.add_function(wrap_pyfunction!(qp::solve_socp, m)?)?; + m.add_function(wrap_pyfunction!(qp::solve_qp_batch, m)?)?; + m.add_function(wrap_pyfunction!(qp::solve_qp_multi_rhs, m)?)?; + // SOS polynomial global optimizer (pounce-convex::sos). + m.add_function(wrap_pyfunction!(sos::sos_minimize, m)?)?; m.add("__version__", env!("CARGO_PKG_VERSION"))?; Ok(()) } diff --git a/crates/pounce-py/src/qp.rs b/crates/pounce-py/src/qp.rs new file mode 100644 index 00000000..61526acb --- /dev/null +++ b/crates/pounce-py/src/qp.rs @@ -0,0 +1,614 @@ +//! PyO3 bindings for the convex LP/QP solver (`pounce-convex`). +//! +//! Exposes the standard-form convex QP +//! +//! ```text +//! minimize ½ xᵀP x + cᵀx +//! subject to A x = b, G x ≤ h, lb ≤ x ≤ ub +//! ``` +//! +//! as a Python `QpProblem`, with one-shot `solve_qp`, the batched / +//! multiple-RHS entry points (`solve_qp_batch`, `solve_qp_multi_rhs`), +//! and the build-once / solve-many `QpFactorization` handle — the same +//! capabilities the Rust crate offers, including the parallel batch. +//! +//! Sparse matrices are passed as COO triplets `(rows, cols, vals)` (three +//! equal-length sequences), matching how scipy `coo_matrix` exposes its +//! data; `P` is the **lower triangle** of the symmetric Hessian. + +use numpy::IntoPyArray; +use pounce_convex::{ + solve_qp_batch_parallel, solve_qp_batch_parallel_warm, solve_qp_ipm, solve_qp_ipm_warm, + solve_socp_ipm, ConeSpec, QpFactorization, QpOptions, QpProblem, QpSensitivity, QpSolution, + QpStatus, QpWarmStart, SensError, Triplet, +}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use pyo3::types::{PyDict, PyList}; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +/// Inner-serial backend for the rayon-parallel batch / multi-RHS paths: +/// each worker builds its own serial factor so the only parallelism is +/// across instances (outer-parallel / inner-serial). No global state. +fn serial_backend() -> Box { + Box::new(FeralSolverInterface::serial()) +} + +/// Build a triplet list from `(rows, cols, vals)`, validating equal +/// lengths and (for `lower_only`) that no strict-upper entry is given. +fn triplets( + rows: &[i64], + cols: &[i64], + vals: &[f64], + what: &str, + lower_only: bool, +) -> PyResult> { + if rows.len() != cols.len() || rows.len() != vals.len() { + return Err(PyValueError::new_err(format!( + "{what}: rows/cols/vals must have equal length ({}, {}, {})", + rows.len(), + cols.len(), + vals.len() + ))); + } + let mut out = Vec::with_capacity(rows.len()); + for k in 0..rows.len() { + let (r, c) = (rows[k], cols[k]); + if r < 0 || c < 0 { + return Err(PyValueError::new_err(format!( + "{what}: negative index at entry {k}" + ))); + } + let (r, c) = (r as usize, c as usize); + if lower_only && c > r { + return Err(PyValueError::new_err(format!( + "{what}: entry ({r},{c}) is in the strict upper triangle; \ + pass only the lower triangle of the symmetric Hessian P" + ))); + } + out.push(Triplet::new(r, c, vals[k])); + } + Ok(out) +} + +/// Convex QP in standard form. Construct from dense `c` and COO triplets +/// for `P` (lower triangle), `A`, and `G`; `b`, `h`, `lb`, `ub` are dense +/// (omit `lb`/`ub` or pass empty for unbounded). +#[pyclass(name = "QpProblem", module = "pounce._pounce")] +#[derive(Clone)] +pub struct PyQpProblem { + inner: QpProblem, +} + +#[pymethods] +impl PyQpProblem { + #[new] + #[pyo3(signature = ( + n, c, + p_rows=vec![], p_cols=vec![], p_vals=vec![], + a_rows=vec![], a_cols=vec![], a_vals=vec![], b=vec![], + g_rows=vec![], g_cols=vec![], g_vals=vec![], h=vec![], + lb=vec![], ub=vec![], + ))] + #[allow(clippy::too_many_arguments)] + fn new( + n: usize, + c: Vec, + p_rows: Vec, + p_cols: Vec, + p_vals: Vec, + a_rows: Vec, + a_cols: Vec, + a_vals: Vec, + b: Vec, + g_rows: Vec, + g_cols: Vec, + g_vals: Vec, + h: Vec, + lb: Vec, + ub: Vec, + ) -> PyResult { + if c.len() != n { + return Err(PyValueError::new_err(format!( + "c has length {}, expected n = {n}", + c.len() + ))); + } + if !lb.is_empty() && lb.len() != n { + return Err(PyValueError::new_err(format!( + "lb has length {}, expected 0 or n = {n}", + lb.len() + ))); + } + if !ub.is_empty() && ub.len() != n { + return Err(PyValueError::new_err(format!( + "ub has length {}, expected 0 or n = {n}", + ub.len() + ))); + } + let inner = QpProblem { + n, + p_lower: triplets(&p_rows, &p_cols, &p_vals, "P", true)?, + c, + a: triplets(&a_rows, &a_cols, &a_vals, "A", false)?, + b, + g: triplets(&g_rows, &g_cols, &g_vals, "G", false)?, + h, + lb, + ub, + }; + Ok(Self { inner }) + } + + #[getter] + fn n(&self) -> usize { + self.inner.n + } + + #[getter] + fn m_eq(&self) -> usize { + self.inner.m_eq() + } + + #[getter] + fn m_ineq(&self) -> usize { + self.inner.m_ineq() + } +} + +/// Turn a `QpStatus` into the lowercase string used in the result dict. +fn status_str(s: QpStatus) -> &'static str { + match s { + QpStatus::Optimal => "optimal", + QpStatus::PrimalInfeasible => "primal_infeasible", + QpStatus::DualInfeasible => "dual_infeasible", + QpStatus::IterationLimit => "iteration_limit", + QpStatus::NumericalFailure => "numerical_failure", + } +} + +/// Build the Python result dict `{x, y, z, z_lb, z_ub, obj, iters, status, +/// iterates, residuals}` from a `QpSolution`. +/// +/// When `prob` is `Some`, the final KKT `residuals` block is attached — but +/// only for the plain-QP path, where `Gx ≤ h` is an orthant constraint and +/// [`QpSolution::kkt_residuals`] applies. Conic (SOCP/exp/power) solves pass +/// `None`: there the slack lives in a non-orthant cone, so those orthant +/// residuals would be meaningless. The `iterates` trace is always attached +/// (empty unless `collect_iterates` was set, so there is no overhead off the +/// opt-in path). +fn solution_dict<'py>( + py: Python<'py>, + sol: QpSolution, + prob: Option<&QpProblem>, +) -> PyResult> { + let d = PyDict::new_bound(py); + d.set_item("status", status_str(sol.status))?; + d.set_item("obj", sol.obj)?; + d.set_item("iters", sol.iters)?; + + // Final KKT residuals (plain QP only — see the doc comment). + if let Some(p) = prob { + let r = sol.kkt_residuals(p); + let rd = PyDict::new_bound(py); + rd.set_item("primal_infeasibility", r.primal_infeasibility)?; + rd.set_item("dual_infeasibility", r.dual_infeasibility)?; + rd.set_item("complementarity", r.complementarity)?; + rd.set_item("kkt_error", r.kkt_error())?; + d.set_item("residuals", rd)?; + } + + // Per-iteration convergence trace (empty unless `collect_iterates` set). + let trace = PyList::empty_bound(py); + for it in &sol.iterates { + let row = PyDict::new_bound(py); + row.set_item("iter", it.iter)?; + row.set_item("objective", it.objective)?; + row.set_item("primal_infeasibility", it.primal_infeasibility)?; + row.set_item("dual_infeasibility", it.dual_infeasibility)?; + row.set_item("mu", it.mu)?; + row.set_item("alpha_primal", it.alpha_primal)?; + row.set_item("alpha_dual", it.alpha_dual)?; + trace.append(row)?; + } + d.set_item("iterates", trace)?; + + d.set_item("x", sol.x.into_pyarray_bound(py))?; + d.set_item("y", sol.y.into_pyarray_bound(py))?; + d.set_item("z", sol.z.into_pyarray_bound(py))?; + d.set_item("z_lb", sol.z_lb.into_pyarray_bound(py))?; + d.set_item("z_ub", sol.z_ub.into_pyarray_bound(py))?; + Ok(d) +} + +/// Extract a `QpWarmStart` from a Python mapping (typically a previous +/// result dict). Missing vector keys default to empty, so a partial warm +/// start (e.g. only `x`) is accepted; the solver validates dimensions and +/// falls back to a cold start if they don't match. +fn warm_from_dict(warm: &Bound<'_, PyDict>) -> PyResult { + let get = |key: &str| -> PyResult> { + match warm.get_item(key)? { + Some(v) => v.extract::>(), + None => Ok(Vec::new()), + } + }; + Ok(QpWarmStart { + x: get("x")?, + y: get("y")?, + z: get("z")?, + z_lb: get("z_lb")?, + z_ub: get("z_ub")?, + }) +} + +/// Parse `(kind, value)` tuples into [`ConeSpec`]s. `kind` is +/// case-insensitive. The float `value` means the **dimension** for +/// `"nonneg"`/`"nn"`/`"+"` and `"soc"`/`"q"` (rounded to an integer), the +/// **exponent α** for `"pow"`/`"power"` (the 3-D power cone, `α ∈ (0,1)`), +/// and the **matrix size n** for `"psd"`/`"sdp"` (which spans `n(n+1)/2` +/// svec rows). `"exp"`/`"exponential"` is the fixed-dimension-3 exponential +/// cone (its `value` is ignored). +fn parse_cones(specs: Vec<(String, f64)>) -> PyResult> { + specs + .into_iter() + .map(|(kind, v)| match kind.to_ascii_lowercase().as_str() { + "nonneg" | "nn" | "+" => Ok(ConeSpec::Nonneg(v.round() as usize)), + "soc" | "q" | "secondorder" => Ok(ConeSpec::SecondOrder(v.round() as usize)), + "exp" | "exponential" | "e" => Ok(ConeSpec::Exponential), + "pow" | "power" | "p" if v > 0.0 && v < 1.0 => Ok(ConeSpec::Power(v)), + "pow" | "power" | "p" => Err(PyValueError::new_err(format!( + "power-cone exponent α must be in (0, 1), got {v}" + ))), + "psd" | "sdp" | "s" => Ok(ConeSpec::Psd(v.round() as usize)), + other => Err(PyValueError::new_err(format!( + "unknown cone kind '{other}' (use 'nonneg', 'soc', 'exp', 'pow', or 'psd')" + ))), + }) + .collect() +} + +fn opts(tol: Option, max_iter: Option, collect_iterates: bool) -> QpOptions { + let mut o = QpOptions::default(); + if let Some(t) = tol { + o.tol = t; + } + if let Some(m) = max_iter { + o.max_iter = m; + } + o.collect_iterates = collect_iterates; + o +} + +/// Solve one convex QP. Returns a dict with the primal `x`, duals `y` +/// (equalities), `z` (inequalities), bound duals `z_lb`/`z_ub`, the +/// objective, iteration count, and a status string. +/// +/// `warm_start` (optional) is a mapping with `x`/`y`/`z`/`z_lb`/`z_ub` +/// keys — e.g. a previous result dict for a nearby problem. It only +/// affects the iteration count, not the solution; a dimension mismatch is +/// ignored (cold start). +/// +/// `collect_iterates` (default `false`) opts into the per-iteration +/// convergence trace, returned under the `iterates` key. +#[pyfunction] +#[pyo3(signature = (prob, tol=None, max_iter=None, warm_start=None, collect_iterates=false))] +pub fn solve_qp<'py>( + py: Python<'py>, + prob: &PyQpProblem, + tol: Option, + max_iter: Option, + warm_start: Option<&Bound<'py, PyDict>>, + collect_iterates: bool, +) -> PyResult> { + let o = opts(tol, max_iter, collect_iterates); + let warm = warm_start.map(warm_from_dict).transpose()?; + let sol = py.allow_threads(|| match &warm { + Some(w) => solve_qp_ipm_warm(&prob.inner, &o, w, backend), + None => solve_qp_ipm(&prob.inner, &o, backend), + }); + solution_dict(py, sol, Some(&prob.inner)) +} + +/// Solve a standard-form conic program (LP/QP plus second-order, exponential, +/// and/or **power** cones). The inequality block `Gx ≤ h` is partitioned by +/// `cones`, a list of `(kind, value)` tuples covering the `m_ineq` rows in +/// order; each `s = h − Gx` block must lie in its cone. `value` is the +/// dimension for `"nonneg"`/`"soc"` and the exponent α for `"pow"`; `"exp"` +/// is the fixed 3-D exponential cone. Variable bounds are appended as a +/// trailing nonnegative block. Returns the usual result dict. +/// +/// Problems containing an exponential or power cone route to the +/// non-symmetric HSDE driver, which also handles second-order cones — so a +/// SOC may be freely mixed with an exp/power cone. +#[pyfunction] +#[pyo3(signature = (prob, cones, tol=None, max_iter=None, collect_iterates=false))] +pub fn solve_socp<'py>( + py: Python<'py>, + prob: &PyQpProblem, + cones: Vec<(String, f64)>, + tol: Option, + max_iter: Option, + collect_iterates: bool, +) -> PyResult> { + let o = opts(tol, max_iter, collect_iterates); + let specs = parse_cones(cones)?; + // PSD (self-scaled, symmetric driver) cannot be mixed with the + // exponential/power cones (non-symmetric driver) in one problem. + let has_nonsym = specs + .iter() + .any(|c| matches!(c, ConeSpec::Exponential | ConeSpec::Power(_))); + let has_psd = specs.iter().any(|c| matches!(c, ConeSpec::Psd(_))); + if has_nonsym && has_psd { + return Err(PyValueError::new_err( + "the PSD cone cannot be combined with exponential/power cones in \ + one problem (they use different drivers)", + )); + } + // The cones must partition the rows of G exactly (an exp/power cone is + // always 3 rows; a PSD(n) cone is n(n+1)/2 svec rows). Catch the mismatch + // here with a clear, catchable error rather than letting the conic driver + // index past the slack vector. + let cone_rows: usize = specs.iter().map(|c| c.dim()).sum(); + if cone_rows != prob.inner.m_ineq() { + return Err(PyValueError::new_err(format!( + "cone dimensions sum to {cone_rows}, but G has {} inequality row(s); \ + the cones must partition the rows of G exactly \ + (an exponential or power cone is always 3 rows)", + prob.inner.m_ineq() + ))); + } + let sol = py.allow_threads(|| solve_socp_ipm(&prob.inner, &specs, &o, backend)); + // Conic slack lives in a non-orthant cone: skip the orthant residuals. + solution_dict(py, sol, None) +} + +/// Solve a batch of convex QPs in parallel (across instances). Returns a +/// list of result dicts in input order. Releases the GIL for the solve. +/// +/// `warm_starts` (optional) is a list of warm-start mappings (one per +/// problem, same length as `probs`) — e.g. the previous batch's result +/// dicts for a sequence of nearby batches. Each only affects its +/// instance's iteration count; a per-instance mismatch is ignored. +#[pyfunction] +#[pyo3(signature = (probs, tol=None, max_iter=None, warm_starts=None))] +pub fn solve_qp_batch<'py>( + py: Python<'py>, + probs: Vec, + tol: Option, + max_iter: Option, + warm_starts: Option>>, +) -> PyResult>> { + let o = opts(tol, max_iter, false); + let inners: Vec = probs.into_iter().map(|p| p.inner).collect(); + let warms: Option> = match warm_starts { + Some(ws) => { + if ws.len() != inners.len() { + return Err(PyValueError::new_err(format!( + "warm_starts has length {}, expected {} (one per problem)", + ws.len(), + inners.len() + ))); + } + Some(ws.iter().map(warm_from_dict).collect::>()?) + } + None => None, + }; + let sols = py.allow_threads(|| match &warms { + Some(w) => solve_qp_batch_parallel_warm(&inners, w, &o, serial_backend), + None => solve_qp_batch_parallel(&inners, &o, serial_backend), + }); + sols.into_iter() + .zip(inners.iter()) + .map(|(s, p)| solution_dict(py, s, Some(p))) + .collect() +} + +/// Solve one QP structure (`base`) against many linear objectives `cs` +/// (a sequence of length-`n` vectors), in parallel. Returns a list of +/// result dicts in order. +#[pyfunction] +#[pyo3(signature = (base, cs, tol=None, max_iter=None))] +pub fn solve_qp_multi_rhs<'py>( + py: Python<'py>, + base: &PyQpProblem, + cs: Vec>, + tol: Option, + max_iter: Option, +) -> PyResult>> { + for (k, c) in cs.iter().enumerate() { + if c.len() != base.inner.n { + return Err(PyValueError::new_err(format!( + "cs[{k}] has length {}, expected n = {}", + c.len(), + base.inner.n + ))); + } + } + let o = opts(tol, max_iter, false); + let base_inner = base.inner.clone(); + let sols = py.allow_threads(|| { + pounce_convex::solve_qp_multi_rhs_parallel(&base_inner, &cs, &o, serial_backend) + }); + // Each solve shares the base structure but uses its own objective `cs[k]`; + // attach residuals against that instance (a clone with `c` swapped in). + sols.into_iter() + .zip(cs.iter()) + .map(|(s, c)| { + let mut prob = base_inner.clone(); + prob.c = c.clone(); + solution_dict(py, s, Some(&prob)) + }) + .collect() +} + +/// Build-once / solve-many handle: builds the KKT symbolic factor once +/// for a fixed problem *structure* (same sparsity and set of finite +/// bounds), then reuses it across `solve()` calls that vary only the +/// numeric data. Mirrors `pounce.jax.JaxProblem`'s build-once ergonomics +/// for the convex QP solver. +#[pyclass(name = "QpFactorization", module = "pounce._pounce", unsendable)] +pub struct PyQpFactorization { + inner: QpFactorization, +} + +#[pymethods] +impl PyQpFactorization { + #[new] + #[pyo3(signature = (base, tol=None, max_iter=None))] + fn new(base: &PyQpProblem, tol: Option, max_iter: Option) -> PyResult { + let o = opts(tol, max_iter, false); + let inner = QpFactorization::build(&base.inner, &o, backend).ok_or_else(|| { + PyValueError::new_err( + "QpFactorization: initial factorization failed (structurally singular KKT system)", + ) + })?; + Ok(Self { inner }) + } + + /// Solve `prob`, reusing the captured symbolic factor. `prob` must + /// share the captured structure; otherwise the result dict has + /// status `"numerical_failure"`. + /// + /// `warm_start` (optional) seeds the iteration from a nearby problem's + /// solution, combining symbolic-factor reuse with warm starting. + #[pyo3(signature = (prob, warm_start=None))] + fn solve<'py>( + &mut self, + py: Python<'py>, + prob: &PyQpProblem, + warm_start: Option<&Bound<'py, PyDict>>, + ) -> PyResult> { + let sol = match warm_start { + Some(w) => self.inner.solve_warm(&prob.inner, &warm_from_dict(w)?), + None => self.inner.solve(&prob.inner), + }; + solution_dict(py, sol, Some(&prob.inner)) + } +} + +/// Post-optimal sensitivity for a convex QP — the sIPOPT analog. Solves the +/// problem on construction, then holds the active-set KKT factorization so +/// each `parametric_step` is a single back-substitution. Mirrors the NLP +/// `Solver` session (which caches the converged factor for +/// `parametric_step` / `reduced_hessian`), specialized to a QP. +#[pyclass(name = "QpSensitivity", module = "pounce._pounce", unsendable)] +pub struct PyQpSensitivity { + inner: QpSensitivity, + x: Vec, + obj: f64, + m_eq: usize, +} + +#[pymethods] +impl PyQpSensitivity { + /// Solve `prob` and build its sensitivity. `active_tol` (default `1e-7`) + /// is the multiplier threshold used to read the active set. Raises + /// `ValueError` if the QP does not solve to optimality, or if the + /// active-set KKT is singular (the parametric step is not unique). + #[new] + #[pyo3(signature = (prob, tol=None, max_iter=None, active_tol=1e-7))] + fn new( + prob: &PyQpProblem, + tol: Option, + max_iter: Option, + active_tol: f64, + ) -> PyResult { + let o = opts(tol, max_iter, false); + let sol = solve_qp_ipm(&prob.inner, &o, backend); + if sol.status != QpStatus::Optimal { + return Err(PyValueError::new_err(format!( + "QpSensitivity: the QP did not solve to optimality (status {}); \ + sensitivity is only defined at an optimum", + status_str(sol.status) + ))); + } + let (x, obj) = (sol.x.clone(), sol.obj); + let inner = QpSensitivity::build(&prob.inner, &sol, &o, active_tol, backend).map_err( + |e| match e { + SensError::NotOptimal => { + PyValueError::new_err("QpSensitivity: solution is not optimal") + } + SensError::FactorizationFailed => PyValueError::new_err( + "QpSensitivity: the active-set KKT is singular (the active constraint \ + gradients are rank-deficient), so the parametric step is not unique", + ), + }, + )?; + Ok(Self { + inner, + x, + obj, + m_eq: prob.inner.m_eq(), + }) + } + + /// First-order primal step `dx ≈ x*(b + Δb) − x*(b)` for a perturbation + /// of the equality right-hand side `b`: constraint + /// `pin_constraint_indices[k]` is perturbed by `deltas[k]`. Returns the + /// length-`n` sensitivity, so `sensitivity.x + dx` predicts the + /// perturbed solution (exact to first order while the active set holds). + fn parametric_step<'py>( + &mut self, + py: Python<'py>, + pin_constraint_indices: Vec, + deltas: Vec, + ) -> PyResult>> { + if pin_constraint_indices.len() != deltas.len() { + return Err(PyValueError::new_err(format!( + "pin_constraint_indices has length {} but deltas has length {}", + pin_constraint_indices.len(), + deltas.len() + ))); + } + for &i in &pin_constraint_indices { + if i >= self.m_eq { + return Err(PyValueError::new_err(format!( + "pin constraint index {i} out of range (the QP has {} equality \ + constraint(s); only equality-constraint RHS values are parameters)", + self.m_eq + ))); + } + } + let dx = self.inner.parametric_step(&pin_constraint_indices, &deltas); + Ok(dx.into_pyarray_bound(py)) + } + + /// Reduced Hessian of the QP on its active manifold (`Zᵀ P Z`) with its + /// eigendecomposition. Returns a dict with `n_dof` (degrees of freedom), + /// `matrix` and `eigenvectors` (flat, column-major `n_dof × n_dof`), and + /// `eigenvalues` (ascending). `rank_tol` (default `1e-9`) is the relative + /// threshold for the rank of the active Jacobian. + #[pyo3(signature = (rank_tol = 1e-9))] + fn reduced_hessian<'py>(&self, py: Python<'py>, rank_tol: f64) -> PyResult> { + let rh = self.inner.reduced_hessian(rank_tol); + let d = PyDict::new_bound(py); + d.set_item("n_dof", rh.n_dof)?; + d.set_item("matrix", rh.matrix.into_pyarray_bound(py))?; + d.set_item("eigenvalues", rh.eigenvalues.into_pyarray_bound(py))?; + d.set_item("eigenvectors", rh.eigenvectors.into_pyarray_bound(py))?; + Ok(d) + } + + /// The optimal primal solution `x*`. + #[getter] + fn x<'py>(&self, py: Python<'py>) -> Bound<'py, numpy::PyArray1> { + self.x.clone().into_pyarray_bound(py) + } + + /// The optimal objective value. + #[getter] + fn obj(&self) -> f64 { + self.obj + } + + /// The active-set KKT dimension `n + m_eq + n_active`. + #[getter] + fn kkt_dim(&self) -> usize { + self.inner.kkt_dim() + } +} diff --git a/crates/pounce-py/src/sos.rs b/crates/pounce-py/src/sos.rs new file mode 100644 index 00000000..fa7332a5 --- /dev/null +++ b/crates/pounce-py/src/sos.rs @@ -0,0 +1,83 @@ +//! PyO3 bindings for the sum-of-squares polynomial global optimizer +//! (`pounce-convex`'s `sos` module): `min p(x) s.t. gᵢ(x) ≥ 0, hⱼ(x) = 0` +//! solved by the SOS / Lasserre relaxation on the SDP cone, with a certified +//! lower bound and (when the moment matrix is flat) the global minimizers. +//! +//! Polynomials cross the FFI boundary as a list of `(exponent vector, +//! coefficient)` terms; the friendly `{exponent-tuple: coeff}` dict form is +//! handled in `python/pounce/sos.py`. + +use numpy::IntoPyArray; +use pounce_convex::{sos_minimize as core_sos_minimize, PolyProblem, Polynomial, QpStatus}; +use pounce_feral::FeralSolverInterface; +use pounce_linsol::SparseSymLinearSolverInterface; +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use pyo3::types::{PyDict, PyList}; + +fn backend() -> Box { + Box::new(FeralSolverInterface::new()) +} + +fn status_str(s: QpStatus) -> &'static str { + match s { + QpStatus::Optimal => "optimal", + QpStatus::PrimalInfeasible => "primal_infeasible", + QpStatus::DualInfeasible => "dual_infeasible", + QpStatus::IterationLimit => "iteration_limit", + QpStatus::NumericalFailure => "numerical_failure", + } +} + +/// Validate that every term's exponent vector has length `n_vars` and build a +/// [`Polynomial`]. +fn poly(n_vars: usize, terms: Vec<(Vec, f64)>, what: &str) -> PyResult { + for (e, _) in &terms { + if e.len() != n_vars { + return Err(PyValueError::new_err(format!( + "{what}: exponent vector has length {}, expected n_vars = {n_vars}", + e.len() + ))); + } + } + Ok(Polynomial::new(n_vars, terms)) +} + +/// Globally minimize a polynomial via the SOS/Lasserre relaxation. Returns a +/// dict with `lower_bound`, `status`, `is_exact`, `num_minimizers`, and +/// `minimizers` (a list of length-`n_vars` arrays — the global optimizers, +/// populated when the moment matrix is flat). +#[pyfunction] +#[pyo3(signature = (n_vars, objective, inequalities=vec![], equalities=vec![], order=None))] +pub fn sos_minimize<'py>( + py: Python<'py>, + n_vars: usize, + objective: Vec<(Vec, f64)>, + inequalities: Vec, f64)>>, + equalities: Vec, f64)>>, + order: Option, +) -> PyResult> { + let mut prob = PolyProblem::new(poly(n_vars, objective, "objective")?); + prob.inequalities = inequalities + .into_iter() + .map(|t| poly(n_vars, t, "inequality")) + .collect::>()?; + prob.equalities = equalities + .into_iter() + .map(|t| poly(n_vars, t, "equality")) + .collect::>()?; + + let sol = py.allow_threads(|| core_sos_minimize(&prob, order, backend)); + + let d = PyDict::new_bound(py); + d.set_item("lower_bound", sol.lower_bound)?; + d.set_item("status", status_str(sol.status))?; + d.set_item("is_exact", sol.is_exact)?; + d.set_item("num_minimizers", sol.num_minimizers)?; + let mins = PyList::empty_bound(py); + for m in sol.minimizers { + mins.append(m.into_pyarray_bound(py))?; + } + d.set_item("minimizers", mins)?; + Ok(d) +} diff --git a/crates/pounce-sensitivity/src/lib.rs b/crates/pounce-sensitivity/src/lib.rs index 3c45076d..b09c2452 100644 --- a/crates/pounce-sensitivity/src/lib.rs +++ b/crates/pounce-sensitivity/src/lib.rs @@ -29,7 +29,8 @@ //! `full_g_to_c_block` trait methods (which delegate to //! `BoundClassification.x_not_fixed_map` / `c_map`). //! * **Reduced-Hessian eigendecomposition** ✔ — pure-Rust cyclic Jacobi -//! in [`eigen::symmetric_eigen`]; surfaced via +//! in [`pounce_linalg::symmetric_eigen`] (shared with the convex QP +//! sensitivity path); surfaced via //! [`SensApplication::compute_reduced_hessian_eigen`], //! [`SensSolve::with_reduced_hessian_eigen`], the `pounce_sens //! --rh-eigendecomp` flag, and the Python `solve_with_sens(rh_eigendecomp=True)` @@ -66,7 +67,6 @@ pub mod algorithm_backsolver; pub mod backsolver; pub mod boundcheck; pub mod convenience; -pub mod eigen; pub mod p_calculator; pub mod reduced_hessian; pub mod schur_data; @@ -78,8 +78,10 @@ pub mod step_calc; pub use algorithm_backsolver::PdSensBacksolver; pub use backsolver::{DenseLuBacksolver, SensBacksolver}; pub use convenience::{SensResult, SensSolve}; -pub use eigen::symmetric_eigen; +// Hoisted to pounce-linalg so the convex QP sensitivity path can share it; +// re-exported here to preserve `pounce_sensitivity::symmetric_eigen`. pub use p_calculator::{IndexPCalculator, PCalculator}; +pub use pounce_linalg::symmetric_eigen; pub use reduced_hessian::compute_reduced_hessian; pub use schur_data::{IndexSchurData, SchurData}; pub use schur_driver::{DenseGenSchurDriver, SchurDriver}; diff --git a/crates/pounce-sensitivity/src/p_calculator.rs b/crates/pounce-sensitivity/src/p_calculator.rs index cb35a2a5..8ae02100 100644 --- a/crates/pounce-sensitivity/src/p_calculator.rs +++ b/crates/pounce-sensitivity/src/p_calculator.rs @@ -258,7 +258,7 @@ mod tests { use crate::backsolver::DenseLuBacksolver; #[test] - fn compute_p_solves_each_a_column_against_K() { + fn compute_p_solves_each_a_column_against_k_matrix() { // K is the 3×3 SPD example from the backsolver test. // 2 -1 0 // -1 2 -1 diff --git a/crates/pounce-sensitivity/src/sens_app.rs b/crates/pounce-sensitivity/src/sens_app.rs index 359833f7..2e46ebae 100644 --- a/crates/pounce-sensitivity/src/sens_app.rs +++ b/crates/pounce-sensitivity/src/sens_app.rs @@ -22,13 +22,13 @@ //! a synthetic dense LU. use crate::backsolver::SensBacksolver; -use crate::eigen::symmetric_eigen; use crate::p_calculator::IndexPCalculator; use crate::reduced_hessian::compute_reduced_hessian; use crate::schur_data::{IndexSchurData, SchurData}; use crate::schur_driver::{DenseGenSchurDriver, SchurDriver}; use crate::step_calc::{SensStepCalc, StdStepCalc}; use pounce_common::types::Number; +use pounce_linalg::symmetric_eigen; /// User-facing entry point for sensitivity analysis on a converged /// pounce solve. diff --git a/crates/pounce-solve-report/src/lib.rs b/crates/pounce-solve-report/src/lib.rs index 88422c7c..63cd2882 100644 --- a/crates/pounce-solve-report/src/lib.rs +++ b/crates/pounce-solve-report/src/lib.rs @@ -190,6 +190,13 @@ pub enum InputDescriptor { #[serde(skip_serializing_if = "Option::is_none")] size_bytes: Option, }, + /// A Conic Benchmark Format (`.cbf`) instance — e.g. a CBLIB problem + /// solved through the convex conic driver. + CbfFile { + path: PathBuf, + #[serde(skip_serializing_if = "Option::is_none")] + size_bytes: Option, + }, Builtin { name: String, }, diff --git a/dev-notes/cargo-release.md b/dev-notes/cargo-release.md index 9344960b..7f11b97f 100644 --- a/dev-notes/cargo-release.md +++ b/dev-notes/cargo-release.md @@ -1,6 +1,6 @@ # crates.io release -POUNCE ships 18 Rust crates to crates.io. This file is the procedure. +POUNCE ships 21 Rust crates to crates.io. This file is the procedure. For the PyPI side (`pounce-solver` + `pyomo-pounce`), see `pypi-release.md`. @@ -18,6 +18,7 @@ For the PyPI side (`pounce-solver` + `pyomo-pounce`), see | `pounce-l1penalty` | yes | | | `pounce-presolve` | yes | | | `pounce-algorithm` | yes | IPM core | +| `pounce-simplex` | yes | warm-start simplex LP; pounce-global OBBT dep| | `pounce-restoration` | yes | | | `pounce-sensitivity` | yes | sIPOPT port | | `pounce-cinterface` | yes | C ABI (CreateIpoptProblem / IpoptSolve) | @@ -34,18 +35,32 @@ publish script enforces the same list and will skip them by construction. ## Dependency order -Layer 0: `pounce-common`, `pounce-studio-core` (leaf: serde only) +Layer 0: `pounce-common`, `pounce-studio-core` (leaf: serde only), + `pounce-simplex` (leaf: std only) Layer 1: `pounce-linalg` Layer 2: `pounce-linsol`, `pounce-nlp` -Layer 3: `pounce-nl`, `pounce-feral`, `pounce-hsl`, `pounce-l1penalty`, `pounce-presolve` +Layer 3: `pounce-nl`, `pounce-feral`, `pounce-hsl`, `pounce-l1penalty`, `pounce-presolve`, `pounce-convex` Layer 4: `pounce-algorithm` -Layer 5: `pounce-restoration`, `pounce-sensitivity` +Layer 5: `pounce-restoration`, `pounce-sensitivity`, `pounce-global` Layer 6: `pounce-cinterface`, `pounce-cli` +`pounce-convex` (LP/QP/SOCP/SDP conic IPM) depends only on +`pounce-common` + `pounce-linsol` + `pounce-linalg`, so it sits in layer 3. +`pounce-global` (spatial branch-and-bound) depends on `pounce-convex` **and** +`pounce-algorithm`, so it cannot publish before layer 4 — it sits in layer 5. +Both are **new crate names** as of 0.4.0 and so are subject to the new-crate +rate limit on their first publish (see below). + `pounce-studio-core` is a leaf (serde/serde_json only); it can publish any time before `pounce-cli`. `pounce-nl` depends on `pounce-common` + `pounce-nlp`, so it sits in layer 3. +`pounce-simplex` (warm-start bounded-variable revised simplex, used by +`pounce-global`'s OBBT inner loop) is a leaf with **no dependencies** (std +only), so it can publish any time before `pounce-global` (layer 5); the script +places it just before it. It is a **new crate name** as of this release and so +is subject to the new-crate rate limit on first publish. + The script publishes one crate at a time in this layered order, not in parallel — each crate must be live on crates.io before any dependent crate can publish, and the index update is not instantaneous. diff --git a/dev-notes/clarabel-parity.md b/dev-notes/clarabel-parity.md new file mode 100644 index 00000000..9657fb8e --- /dev/null +++ b/dev-notes/clarabel-parity.md @@ -0,0 +1,162 @@ +# Clarabel cone parity for the convex IPM — design note + +**Status: scoping.** POUNCE's `pounce-convex` solves LP/QP/SOCP over a +product of nonnegative orthants and second-order cones (see +`socp-extension.md`). This note scopes closing the remaining cone gap +versus [Clarabel](https://github.com/oxfordcontrol/Clarabel.rs): the +**exponential**, **power**, and **positive-semidefinite (PSD)** cones. +Together with what we have, that is the full Clarabel cone set and covers +geometric programming, entropy/logistic/softmax models, robust/relative- +entropy programs, and semidefinite programming. + +## Where we are + +The IPM is a Mehrotra predictor–corrector over the +[`Cone`](../crates/pounce-convex/src/cones/mod.rs) trait, dispatched +block-wise by [`CompositeCone`]. Every cone supplies `mu`, a `kkt_block` +(the `(z,z)` scaling), `comp_residual{,_corrector}`, `recover_ds`, +`rhs_comp_term`, `max_step`, `recenter_warm`. The driver, residuals, +factor reuse, presolve postsolve, batch, and warm start are all +cone-agnostic and reused. + +The crucial property the current driver **assumes**: the cone is +**symmetric** (self-scaled). Concretely it bakes in + +1. a Jordan product `s∘z` and centrality `μ = ⟨s,z⟩/degree`, +2. a Nesterov–Todd scaling point `W` with `W² z = s` (the `kkt_block`), +3. the Mehrotra corrector second-order term `ds_aff ∘ dz_aff`. + +Nonneg and SOC are symmetric, so they fit. **PSD is symmetric too.** +**Exp and power are not.** + +## Two machinery tracks + +### Track S — PSD (symmetric, extends what we have) + +The PSD cone `S₊ᵏ = { X = Xᵀ : X ⪰ 0 }` is self-scaled, so it slots into +the existing predictor–corrector with the *matrix* analogues of the SOC +algebra: + +- **Vectorization.** Slack/dual are symmetric `k×k` matrices stored in + `svec` (scaled lower triangle, off-diagonals ×√2 so `⟨svec a, svec b⟩ = + ⟨A,B⟩`). A PSD block spans `k(k+1)/2` rows. +- **Jordan product / centrality.** `A∘B = ½(AB+BA)`, identity `I`, + `μ = ⟨S,Z⟩/k`, degree `k` per block. +- **NT scaling.** `W` from `R` with `RᵀZR = I`, `RᵀSR⁻¹... ` — in practice + `W = Z^{-1/2}(Z^{1/2}SZ^{1/2})^{1/2}Z^{-1/2}` (one symmetric + eigendecomposition of `Z^{1/2}SZ^{1/2}` per iteration per block). The + `kkt_block` is the dense `W⊗ₛW` operator on `svec` (a new + `ConeBlock::Dense`/operator form — *not* diagonal-plus-rank-1). +- **Step to boundary.** `max_step` = largest `α` keeping `V + αdV ⪰ 0`, + i.e. `1/λ_max(-V^{-1/2} dV V^{-1/2})` (a generalized-eigenvalue / Cholesky + line search), the matrix analogue of SOC's boundary root. + +**Lift:** an eigendecomposition (or two) per PSD block per iteration, the +`svec`/`smat` plumbing, and a genuinely **dense** `(z,z)` block (the SOC +diagonal-plus-rank-1 trick does not apply). For large/sparse SDPs, +competitiveness needs **chordal decomposition** (Clarabel's `clique` +merging) — split a sparse PSD constraint into many small coupled PSD +blocks. That is a sizable sub-project on its own and can be a later phase +(small dense SDPs first, chordal later). + +**Risk:** medium-high but *contained to the existing loop* — no new IPM. +The risk is matrix-algebra correctness (NT matrix scaling, the dense KKT +operator, the eigen line search), validated the usual way (known SDP +optima: min/max eigenvalue, Lyapunov, a small SDP relaxation; plus a +randomized KKT-residual check). + +### Track N — Exponential & power (non-symmetric, new IPM components) + +`K_exp = cl{ (x,y,z) : y>0, y·e^{x/y} ≤ z }` and the power cone +`K_pow^α = { (x,y,z) : x^α y^{1-α} ≥ |z|, x,y≥0 }` are **not** self-scaled: +there is no `W` with `W²z = s`, no `s∘z`, no symmetric `μ`. They need the +non-symmetric path-following machinery (Nesterov–Todd 1997; Skajaa–Ye +2015; Dahl–Andersen 2021 — the MOSEK exp-cone algorithm; the approach +Clarabel and Hypatia use): + +- **Barrier oracles.** Each cone supplies its logarithmically-homogeneous + self-concordant barrier `f`, gradient `g=∇f`, and Hessian `H=∇²f` + (exp-cone barrier `−log(y log(z/y) − x) − log y − log z`, degree 3). The + trait grows `barrier_grad`/`barrier_hess` (symmetric cones can supply + closed forms too, unifying the code). +- **Scaling.** Replace the NT point with a **dual-aware primal–dual + scaling** built from *both* cone iterates — the Tunçel scaling (Tunçel + 2001; Myklebust–Tunçel 2014), specialized to 3-D and computed by a BFGS + update as in Dahl–Andersen 2021. The `kkt_block` becomes that dense, small + (3×3 for exp/power) `WᵀW`. The cheaper primal-only Hessian scaling was + tried and **stalls** (the dual races to the boundary); see the worked + construction and prototype findings in `hsde.md` (§"The dual-aware scaling + (item #1)"). +- **Centrality & step.** `μ = ⟨s,z⟩/Σdegree` still defines the target, but + the corrector uses a **third-order** correction term (not `ds∘dz`) — + Dahl–Andersen's Mehrotra-like nonsymmetric corrector — and the step + length needs a **neighborhood / line search on the barrier** (stay where + `f` is finite and inside the wider neighborhood), since there is no + closed-form boundary root. +- **Robustness ⇒ HSDE (decision point).** Non-symmetric cones are far more + robustly handled inside a **homogeneous self-dual embedding** (Clarabel, + SCS, ECOS-exp all do). Our solver currently uses a direct primal–dual + method with explicit Farkas/recession certificates. Adding exp/power + *without* HSDE is possible (Mosek-style) but more fragile and complicates + infeasibility detection; adding HSDE first is a foundational investment + that also cleans up certificates and gives a single uniform driver for + all cones. **This is the biggest architectural decision in the program.** + +**Lift:** new IPM components (barrier oracles, non-symmetric scaling, +higher-order corrector, neighborhood line search) and, recommended, the +HSDE reformulation of the driver. The cones themselves are tiny (3-D), so +once the machinery exists, **power cone is incremental over exp cone** +(same framework, different barrier). + +**Risk:** high — this is effectively a second IPM. Validate against known +optima (GP: posynomial min; entropy max; logistic regression NLL; the +exp-cone "softplus" epigraph) and randomized KKT residuals. + +## Trait / driver changes (both tracks) + +- `ConeBlock` gains a **dense operator** form for PSD (`W⊗ₛW` apply) and a + small-dense form for exp/power (3×3); the KKT assembly already has a + dense-lower path from SOC Tier-A — generalize it. +- `Cone` gains `barrier_grad`/`barrier_hess` (Track N), and PSD needs an + `svec` working buffer + eigendecomposition (Track S). A small dense + symmetric eig (Jacobi or tridiagonal QL) lands in the crate — **pure + Rust, no LAPACK** (the project's standing constraint). +- Cold start: PSD at `I` (in svec), exp/power at the cones' analytic + central ray. +- Presolve: gate `≤`-row reductions off PSD/exp/power blocks exactly as + `presolve_conic` already does for SOC (coupled rows). +- Differentiable layer (last, per cone): the OptNet backward needs each + cone's complementarity differential — the symmetrized matrix product for + PSD, the barrier-Hessian form for exp/power — added and FD-validated as a + distinct follow-up, exactly as SOC was. + +## Recommended ordering (for discussion) + +Three coherent ways to sequence; the choice is a genuine trade of +value-first vs risk-first and is the open question: + +1. **Exp cone first (value-first).** Unlocks the largest *new application + surface* (GP, logistic, entropy, softmax, relative entropy — the + ML/stats workhorses) and builds the non-symmetric machinery that power + cone then reuses almost for free. Highest value, highest risk; likely + wants HSDE underneath. +2. **PSD cone first (fits-our-framework).** Stays inside the symmetric + predictor–corrector we trust; marquee SDP capability; the linear-algebra + lift (eig, svec, dense block, later chordal) is heavy but the *algorithm* + is familiar. Lower algorithmic risk, no HSDE needed. +3. **HSDE foundation first.** Reformulate the driver into a homogeneous + self-dual embedding, then drop exp → power → PSD onto it uniformly + (Clarabel's structure). Slowest to first visible win, but the cleanest + end state and the most robust non-symmetric handling. + +| Track | Cone | Machinery | Value | Risk | +|---|---|---|---|---| +| S | PSD | extends NT; eig + dense svec block; chordal later | SDP | med-high (contained) | +| N | Exp | non-symmetric IPM; barrier oracles; +HSDE | GP/ML/entropy | high | +| N | Power | exp machinery + new barrier | robust/`p`-norm | low *after* exp | + +Each cone follows the SOCP playbook: land forward/solve with intrinsic +validation (known optima + randomized KKT residual), gate presolve, add +warm-start recentering, then a cone-aware differentiable backward as a +separate FD-validated follow-up. The orthant/SOC paths stay byte-identical +throughout. diff --git a/dev-notes/discopt-pounce-integration.md b/dev-notes/discopt-pounce-integration.md new file mode 100644 index 00000000..91c54829 --- /dev/null +++ b/dev-notes/discopt-pounce-integration.md @@ -0,0 +1,126 @@ +# pounce ⟷ discopt — the value of a deep, co-designed integration + +> Discussion note. [discopt](https://github.com/jkitchin/discopt) is a MINLP +> modeling language + spatial branch-and-bound (B&B) orchestrator. It already +> lists POUNCE as one of three NLP backends (alongside a pure-JAX IPM and +> cyipopt). This note is about what going **beyond a generic solver-plugin +> interface** to a deep, co-designed integration unlocks — and why it changes +> what the combined system *is*. + +## The core insight + +Spatial B&B calls the NLP solver **thousands of times** over a tree in which +each child node differs from its parent by **one changed bound**. A generic +plugin treats every node as a cold, independent solve across a serialization +boundary (`.nl` file / fresh process state). Almost all the leverage of a deep +integration comes from refusing that — letting warm state, certificates, +relaxations, the AD graph, and diagnostics *flow through the tree* instead of +being rebuilt at every node. + +A generic plugin makes discopt a **dispatcher** that hands problems to whichever +solver. Deep co-design makes pounce+discopt **one solver that happens to have a +modeling front-end and a B&B loop wrapped around the same numerical state.** +That is the difference between "a fast NLP solver under a B&B loop" and "an +MINLP engine." + +## Value map + +### B1 — Warm-starting across the tree (the biggest single win) +- Child = parent + one tightened bound → warm-start primal **and** dual **and** + the barrier μ. pounce already has this primitive: `solve_with_warm` with dual + + μ threading (pounce#86). A generic plugin discards it at every node. +- KKT sparsity is identical across the whole tree → symbolic-factorize once, + numeric-refactor per node. `pounce-feral` could expose a "same pattern, new + values" fast path. +- `pounce-qp`'s parametric active-set corrector is literally a "solve a small + perturbation of the last problem" engine — exactly the node→node step. + +### B2 — Bounds & certificates flowing both ways +- **Early-fathom from dual bounds:** B&B needs a *valid lower bound*, not a fully + converged node. Expose pounce's mid-solve dual bound so discopt fathoms without + solving to optimality. +- **Infeasibility certificates → instant prune:** pounce-convex emits + Farkas/infeasibility certificates; a certified-infeasible relaxation prunes the + subtree *with proof*, not a tolerance. +- **Sensitivity → branching:** `pounce-sensitivity` (sIPOPT) gives ∂x*/∂(bound) — + exactly the signal for strong-branching pseudo-costs, free from a solve already + done. + +### B3 — One relaxation / convexification engine (kill the duplication) +- Both sides do McCormick + bound-tightening today: `pounce-global` (McCormick + + OBBT/FBBT), `pounce-presolve` (FBBT + auxiliary elimination), and discopt + (McCormick + AMP adaptive partitioning). Co-design → **one** relaxation library + and cone catalog used by both the node relaxation and the tree, not two + parallel implementations that can silently disagree. +- discopt's AMP and pounce-global's spatial B&B are the same algorithm class. + Co-design decides who owns the tree *once*. + +### B4 — One problem IR, no `.nl` round-trip +- The modeling language compiles **once** to a structure pounce consumes natively: + sparsity pattern, colored-AD coloring, Hessian-of-Lagrangian structure, + variable/constraint partition. Both sides already use JAX AD — the traced graph + is a *shared asset*, not a bridge to be serialized per node. + +### B5 — Differentiable MINLP (moonshot differentiator) +- pounce.jax already makes the *NLP* differentiable. With the integer/branching + decisions fixed at the solution, discopt could expose ∂(MINLP solution)/∂(params) + → a **differentiable mixed-integer layer** you backprop through. Almost nobody + ships this. Ties directly into vision.md pillar 2. + +### B6 — Tree-level diagnostics & agent-drivability +- pounce has an interactive debugger + MCP surface (`pounce-studio`). Lift it from + per-solve to **per-tree**: which node stalled, which relaxation was loosest, + where the gap stopped closing, why a subtree won't prune. An LLM agent driving + an MINLP debug session is something no classical MINLP stack (BARON, Couenne, + SCIP) was built for. + +### B7 — Distribution, trust, certification +- `pip install`, pure-Rust core, **no GAMS/BARON/commercial license** underneath — + discopt ships pounce embedded, reproducible. +- Extend signed solve receipts (`pounce verify`) to the **whole MINLP proof**: a + verifiable certificate of the global optimality gap with node-level bounds. + "Certified global, and here's the signed proof," end to end. + +## Priorities (impact × effort) + +``` +HIGH IMPACT / LOWER EFFORT (do first — proof points) + ✓ B1 warm-start primal+dual+μ across nodes (primitive exists: solve_with_warm, pounce#86) + ✓ B2 dual-bound early fathom + certificate pruning + ✓ B4 shared in-memory JAX problem IR (no .nl round-trip per node) + +HIGH IMPACT / HIGHER EFFORT (strategic bets) + ★ B3 single relaxation/bound-tightening engine shared by both + ★ B6 tree-level debugger + MCP (the agent differentiator) + ★ B7 certified-gap signed receipts for the full MINLP + +MOONSHOT + ◇ B5 differentiable MINLP layer + +SUPPORTING + ○ B1 KKT symbolic-factorize-once / numeric-refactor fast path + ○ B2 sensitivity-driven branching pseudo-costs +``` + +## The co-design API surface + +What the interface must expose that a generic plugin *cannot*: + +- **warm-state in/out** — primal, duals (`mult_g`, `mult_x_L/U`), and μ, threaded + node→node (already prototyped in `solve_with_warm`). +- **valid-bound-without-full-convergence** — a dual lower bound mid-solve for + early fathoming. +- **certificate out** — infeasibility/Farkas certificate for proof-based pruning. +- **shared sparsity / IR handle** — hand pounce the in-memory traced problem, not + a serialized file. +- **sensitivity out** — ∂x*/∂(bound) for branching heuristics. +- **per-node diagnostic stream** — feed the studio/MCP tree-level debugger. + +## Next steps + +- Prototype B1 end-to-end: discopt threads pounce's `solve_with_warm` warm-state + down the tree; measure node-solve speedup vs. cold `.nl` dispatch. +- Open tracking issues (pounce and/or discopt) for B1 / B2 / B4 — the lower-effort, + high-impact trio — mirroring pounce#109. +- Decide tree ownership (B3): does the spatial B&B live in `pounce-global`, + `discopt-core`, or a shared crate? This is the load-bearing architectural call. diff --git a/dev-notes/education-research.md b/dev-notes/education-research.md new file mode 100644 index 00000000..d3aaeadc --- /dev/null +++ b/dev-notes/education-research.md @@ -0,0 +1,123 @@ +# pounce for education & research — the introspectable, LLM-explainable solver + +> Discussion note. The claim: pounce's interactive debugger + LLM/MCP +> integration is a capability **no other optimization solver has**, and it is +> uniquely valuable for *teaching* and *research*. This note grounds that claim +> in the shipping surface and maps the value for both audiences. + +## The shipping surface this rests on + +From `pounce-studio` (CLI skill + MCP server) and the `--debug` solver mode: + +- **Live debugger** — Ctrl-C breaks into a running solve at the next iteration; + inspect the iterate (primals, duals, KKT residuals, μ, inertia); `sweep` a + variable, `multistart` from jittered points, `load` a saved iterate and step + forward. +- **`explain`** — a glossary of every per-iteration column (`inf_pr`, `inf_du`, + `mu`, `alpha`, inertia, …) *and* the `diagnose` finding codes. The trace is + self-documenting. +- **`citations`** — curated paper references keyed by subsystem / bib key, so + observed behavior links straight to the literature. +- **`diagnose`** — Ipopt-failure heuristics with severity-tagged findings. +- **`convergence_trace` / `find_stalls` / `restoration_windows` / `get_iterate`** + — the trajectory as queryable, structured data. +- **`verify`** — signed, content-addressed solve receipts. +- All of it **driven conversationally over MCP** by any LLM client. + +## Why "no other solver has this" — the unoccupied quadrant + +Two axes: **introspectable internals** × **LLM-grounded explanation**. + +- **Ipopt / SNOPT / KNITRO** — print a log wall; no live debugger, no LLM, + internals behind a C/Fortran ABI. +- **Gurobi / BARON / commercial** — black box by design, licensed, no internal + introspection. +- **CVXPY / JuMP / Pyomo** — modeling layers; the solver underneath is opaque. +- **Toy teaching solvers** — introspectable but *not faithful* to a production + algorithm, so nothing transfers. + +pounce occupies the empty intersection: a **faithful production algorithm** (the +Ipopt port — same logs and option semantics, so skills transfer to the tool people +actually use) that is **fully introspectable** and **explained by an LLM grounded +in the real trace and the literature.** Nothing else lives there. + +## Education value + +- **E1 — Glass-box pedagogy.** Students watch the IPM *actually run* — μ shrinking, + inertia corrections firing, the filter accepting/rejecting steps, restoration + kicking in — instead of a black box returning `x*`. `explain` makes every column + self-documenting; the trace *is* the textbook. +- **E2 — A TA that watches your solve.** LLM + MCP = a tutor that reads *your* + trace, finds the stall window (`find_stalls`), explains it in algorithm terms, + and cites the paper (`citations`). Socratic mode: "`inf_du` is rising while + `inf_pr` falls — what does that say about dual feasibility?" Scales to every + student, every solve, any hour. +- **E3 — Zero-setup classroom.** `pip install`, pure Rust, **no HSL, no licenses, + no GAMS.** Identical on every student laptop and in CI. Removes the single + biggest practical barrier to teaching real optimization. +- **E4 — Grade the process, not just the answer.** Signed `verify` receipts + + solve-report JSON as artifacts → assignments where the student submits a + *trace*, and autograding inspects *how* they got there (warm-start? why 200 + iters?). The solve becomes a gradeable, reproducible object. +- **E5 — Curriculum-as-code.** Builtin problems (HS suite, Rosenbrock), GAMS + examples, and the report schema → ready-made problem sets with known, documented + behavior the LLM can reference. +- **E6 — Teaching differentiable optimization / SciML.** pounce.jax + the debugger + → a course where students *inspect the KKT system being differentiated* (the + implicit function theorem made concrete), bridging classical optimization and + modern ML in one tool. + +## Research value + +- **R1 — The trace as a dataset.** `convergence_trace` + the `.iterdump` binary + format across whole suites = a reproducible corpus for studying restoration + triggers, stall morphology, μ-strategy behavior. Pure-Rust determinism means + results replicate exactly. +- **R2 — A hackable, faithful baseline.** Researchers fork the *readable Rust* + algorithm — swap a barrier update, a filter rule, a step-acceptance test — and + A/B it against the faithful-Ipopt baseline in one codebase, not Fortran behind an + ABI. The faithfulness is the experimental control. +- **R3 — LLM-as-experimentalist.** An agent drives the MCP surface to run studies: + "run these 5 μ-update strategies over the Mittelmann set, cluster failures by + `diagnose` code, summarize which converged faster and hypothesize why." The + solver becomes scriptable by an agent that also does the literature-grounded + write-up. +- **R4 — A failure-mode taxonomy.** `diagnose` + `find_stalls` + + `restoration_windows` systematized across suites → a catalog of *where and why* + IPMs fail, as a publishable research artifact. +- **R5 — One lens across the whole family.** NLP, conic, global, and (via discopt) + MINLP share the report/debug surface → study B&B node behavior, conic centrality, + and global bounding *with the same instrument* — cross-solver-class research + that's normally impossible because each solver has its own opaque format. + +## What to lead with + +``` +HEADLINE (unique, defensible, demonstrable today): + ★ "The first optimization solver you can debug interactively and ask an LLM to + explain — grounded in the real trace and the literature." = E1 + E2 + +HIGH-LEVERAGE EDUCATION: + ✓ E3 zero-setup classroom (removes the #1 adoption barrier; true now) + ✓ E4 grade-the-process (signed receipts already exist) + +HIGH-LEVERAGE RESEARCH: + ★ R2 hackable faithful baseline + R1 trace-as-dataset (pure-Rust determinism enables it) + ○ R3 LLM-as-experimentalist (the agent differentiator, longer horizon) +``` + +## The through-line + +Every other solver treats the solve as a *transaction*: submit, wait, read the +answer. pounce treats it as an **observable, explainable, reproducible process** — +and the LLM/MCP layer turns that observability into *conversation*. For education +that's a tutor that scales; for research that's an instrument with a faithful +baseline and a deterministic trace. It is the "legible to agents" pillar pointed +at the two audiences where legibility *is* the value. + +## Publishable angle + +This is itself a paper: *"An LLM-drivable interactive debugger for interior-point +methods as a pedagogical and research instrument."* JOSS (software) or an +education-track venue; the Zenodo DOI + CITATION.cff infrastructure is already in +the README. R4 (failure-mode taxonomy) is a second, more methods-flavored paper. diff --git a/dev-notes/global-perf-phase-2-4.md b/dev-notes/global-perf-phase-2-4.md new file mode 100644 index 00000000..86656865 --- /dev/null +++ b/dev-notes/global-perf-phase-2-4.md @@ -0,0 +1,256 @@ +# pounce-global perf: Phases 2–4 execution plan (loop-driven) + +A checklist the `/loop` workflow can walk top-to-bottom. Each task is a +self-contained, independently-shippable unit with an **acceptance check** and a +**soundness note**. Do them in order; check the box only when its acceptance +check passes. + +## How to use this doc (loop protocol) + +On each iteration: +1. Pick the **first unchecked `- [ ]` task** below. +2. Implement exactly that task — no scope creep into later tasks. +3. Run the task's **acceptance check** (build + targeted tests). It must pass. +4. Flip the box to `- [x]` and append a one-line result note (date, what landed). +5. Stop. The next iteration takes the next task. + +Do **not** batch multiple tasks per iteration; the value of the loop is small, +verifiable steps. If a task turns out to need a precursor, insert a new +unchecked task **above** it and do that first. + +## Validation policy (user, 2026-06-07 — supersedes per-task sweeps) + +**No GLOBALLib timing sweeps in the loop.** Validate correctness on small +problems only: the fast Rust suites (`cargo test -p pounce-global`, plus +`pounce-convex`/`pounce-simplex` when touched) prove the one hard invariant +(0 WRONG) in seconds. The smoke/full sweeps below are kept for reference but are +**not run by the loop** — the smoke set is dominated by shallow tripwire trees +and root-bound canaries, so it can't discriminate the perf levers anyway (see +task 2.4). Any performance confirmation and any non-trivial `Default` change are +deferred to a manual full sweep the user runs when they choose. Tasks that say +"smoke set" now mean "small-problem Rust correctness tests." + +## Hard invariants (every task preserves these) + +- **0 WRONG.** No change may alter a certified optimum. Every lever here is + perf/robustness only. A false-infeasible or a certified value that moves past + tolerance is a soundness regression — stop and revert. +- **IPM stays the OBBT engine.** The revised simplex is parked behind the + off-by-default `simplex-obbt` feature (unsound on ill-scaled LPs; real fix is + the sparse-LU rewrite, task #24 — out of scope here). Do not enable it. +- **Conservative defaults.** Each new knob defaults to *today's behavior* (no + change) so a stock build never loses tightness. Tuned defaults are set only + after a GLOBALLib sweep proves they raise the OK count at 0 WRONG. +- **Measurement hygiene.** The GLOBALLib success metric is timing-sensitive + (single-thread pounce subprocesses, 30 s wall each). **Never** run + `cargo build`/`test` or a second benchmark concurrently with a GLOBALLib + timing sweep — CPU contention tips borderline-OK models over the limit and + fakes a regression. Serialize them. + +## Baseline & success metric + +- **Full metric:** `python3 benchmarks/globallib/run_globallib.py --timeout 30` + (all 104 models, ~30–50 min, must run solo). Baseline (pre-Phase-2): + **59 OK / 45 TIMEOUT / 0 WRONG**. This is the **final gate only** (task 4.4). +- **Smoke metric (per-task):** a fast 10-model subset for catching regressions + and soundness breaks during development, ~1.5 min: + + ``` + python3 benchmarks/globallib/run_globallib.py --timeout 20 \ + ex2_1_1 ex2_1_2 ex3_1_4 ex5_2_2_case1 ex8_1_1 ex4_1_8 \ + ex4_1_2 ex9_1_8 ex3_1_1 haverly + ``` + + Pass solver knobs through with `--opt`, e.g. + `--opt global_obbt_max_depth=8`. Forwards to `pounce + solver_selection=global global_obbt_max_depth=8`. + + Two roles (measured, not assumed — most "small" GLOBALLib models actually time + out, so the set was picked from a probe of what certifies fast): + - **Soundness tripwire (currently OK, <1 s each):** `ex2_1_1` (−17), + `ex2_1_2` (−213), `ex3_1_4` (−4), `ex5_2_2_case1` (−400), `ex8_1_1` (−2.02), + `ex4_1_8` (−16.7). Spread over n=2…9. A WRONG value or a *new* timeout here is + an immediate red flag — these must stay OK with the same certified value. + - **Rescue / canary (currently TIMEOUT under the IPM default):** `ex4_1_2` + (−663.5, the ill-scaled model that broke the simplex), `ex9_1_8` (−3.25, the + false-infeasible canary), `ex3_1_1` (7049.25), `haverly` (−400). A phase that + rescues one flips it to OK *and* the harness checks the rescued value — so a + rescue is automatically a soundness check. They must never certify a WRONG + value or report infeasible. + + **Smoke is necessary, not sufficient.** Green smoke ⇒ keep going; it does NOT + prove an OK-count gain. Only the full sweep (4.4) decides the final defaults. + Each non-final validation task below uses the smoke set; only **4.4** runs the + full 104-model sweep. + +- Goal: raise OK at fixed 30 s wall on the full sweep, holding 0 WRONG. + +### Smoke baseline (pre-Phase-2, IPM default) + +`2026-06-07` · default opts · **6 OK / 4 TIMEOUT / 0 WRONG**. OK = +{ex2_1_1, ex2_1_2, ex3_1_4, ex5_2_2_case1, ex8_1_1, ex4_1_8}; +TIMEOUT = {ex4_1_2, ex9_1_8, ex3_1_1, haverly}. Any Phase-2..4 smoke run must +keep all 6 OK at their baseline values and 0 WRONG; rescues move models from the +TIMEOUT set into OK. + +## Critical files + +- `crates/pounce-global/src/bnb.rs` — `GlobalOptions`, `process_node`, both + drivers, `Node` (has `depth`), `children`. +- `crates/pounce-global/src/obbt.rs` — `tighten` (the `2n` sweep), partial-vars. +- `crates/pounce-global/src/relax.rs` — `build_relaxation` (reuse/caching). +- `crates/pounce-cli/src/main.rs` — `register_global_options` (~1458+), + `global_options_from_list` (~1572+) for new CLI knobs. +- `crates/pounce-global/tests/global.rs` — node-count + soundness tests. +- `benchmarks/globallib/run_globallib.py` — success metric. + +--- + +## Phase 2 — Schedule + budget OBBT + +OBBT runs at every node on all `2n` vars with no gating — the dominant per-node +cost on larger problems. Make it depth-gated, periodic, and partial. + +- [x] **2.1 `obbt_max_depth` (depth gate).** Done 2026-06-07: field added (default `usize::MAX`), `depth` threaded into `process_node`, OBBT block gated `&& depth <= opts.obbt_max_depth`, CLI `global_obbt_max_depth` (-1 sentinel = no limit). New test `obbt_max_depth_certifies_same_optimum` (depth 0/1/∞ all certify 4.0; gating only adds nodes) passes; 30/30 global tests green; check+clippy clean; smoke 6 OK / 4 TIMEOUT / 0 WRONG (unchanged from baseline — default is behavior-preserving). + - Add `pub obbt_max_depth: usize` to `GlobalOptions` (default `usize::MAX` = + run at every depth, no behavior change). Set it in `Default`. + - Thread the node's `depth` into `process_node` (new param); both call sites + (serial ~`bnb.rs:713`, parallel ~`bnb.rs:1049`) pass `node.depth`. + - Gate the OBBT block (`bnb.rs:324`): `if opts.obbt_passes > 0 && depth <= + opts.obbt_max_depth { … }`. + - CLI: `global_obbt_max_depth` integer option in `register_global_options`; + parse into `g.obbt_max_depth` in `global_options_from_list`. + - **Acceptance:** `cargo check -p pounce-global -p pounce-cli` clean; + `cargo clippy -p pounce-global -p pounce-cli` clean; new test in `global.rs`: + a problem solved with a small `obbt_max_depth` (e.g. 2) certifies the **same** + optimum as the default within tolerance (soundness preserved), and existing + node-count tests pass (default is unchanged behavior, so they should not move). + - **Soundness:** skipping OBBT deep in the tree only forgoes tightening; FBBT + still prunes and the relaxation bound is unchanged, so the optimum cannot move. + +- [x] **2.2 `obbt_interval` (every k-th eligible node).** Done 2026-06-07: field added (default `1`, `0`→`1`), 0-based `node_seq` threaded into `process_node` (serial: `nodes-1`; parallel: `s.nodes-1` captured under the lock, approximate by design), OBBT gate now `&& node_seq % obbt_interval == 0` (root=seq 0 always OBBT'd). CLI `global_obbt_interval`. New test `obbt_interval_certifies_same_optimum` (interval=1000 ≈ root-only still certifies 4.0, only adds nodes) passes; 31/31 global tests green incl. existing exact-count tests (default unchanged); check+clippy clean (no new warnings). + - Add `pub obbt_interval: usize` (default `1` = every node). `0` is treated as + `1`. Run OBBT only when `node_seq % obbt_interval == 0` (and within + `obbt_max_depth`). + - Thread a per-driver node sequence counter into `process_node` (serial: a + simple incrementing counter in the search loop; parallel: an `AtomicUsize` + in the shared state, read when the node is dequeued). Document that under the + parallel pool the interval is approximate (node order is nondeterministic) — + that is fine, it only affects *how much* OBBT runs, never correctness. + - CLI: `global_obbt_interval`. Parse into `g.obbt_interval`. + - **Acceptance:** builds + clippy clean; test: `obbt_interval=1000` (≈ root-only + OBBT) still certifies the same optimum as default on a small nonconvex model; + default-value run matches today's node count on an existing exact-count test. + - **Soundness:** same as 2.1 — fewer OBBT invocations only loosen tightening. + +- [x] **2.3 `obbt_max_vars` (partial, prioritized sweep).** Done 2026-06-07: field added (default `usize::MAX`), new `obbt::select_widest_vars(lo,hi,max_vars)` returns a length-`n` tighten-mask (`None` ⇒ all, fast path) ranked by widest box side `hi-lo` (stable, deterministic). Mask threaded through `tighten` into both the IPM sweep (serial + parallel `map_init`) and the simplex `sweep` (signature gained `targets: Option<&[bool]>`); non-targets yield `(None,None)` ⇒ `2k` solves not `2n`. CLI `global_obbt_max_vars` (-1 = all). New test `obbt_max_vars_certifies_same_optimum` (max_vars=1 still certifies 4.0, only adds nodes) passes; 32/32 global tests green; simplex_bridge unit tests green under `--features simplex-obbt`; both feature builds + clippy clean. + - Add `pub obbt_max_vars: usize` (default `usize::MAX` = all `n` vars). + - In `obbt::tighten`, when `obbt_max_vars < n`, tighten only a prioritized + subset each pass: rank by **widest current box side** `hi[i]-lo[i]` (cheap, + deterministic, targets the vars that most slow branching). Sweep `2k` LPs + instead of `2n`. Keep the deadline checks per the existing structure. + - Plumb `opts.obbt_max_vars` through the `tighten` signature. + - CLI: `global_obbt_max_vars`. Parse into `g.obbt_max_vars`. + - **Acceptance:** builds + clippy clean; test: with `obbt_max_vars=1` on a + 2–3 var nonconvex model the optimum is unchanged and the run completes; + existing soundness sweep stays green. + - **Soundness:** tightening a subset is a strict subset of today's tightening — + bounds stay valid, optimum cannot move. + +- [x] **2.4 Tune Phase-2 defaults via the smoke set (direction only).** Done 2026-06-07 (smoke table in Results log). All 6 tripwires hold OK at 0 WRONG under `max_depth∈{12,8,4}` and `max_vars∈{20,50}` (all identical to baseline 6/4/0 — harmless but no smoke rescue, since smoke is shallow-tree + root-bound). `interval=2` rejected (breaks ex3_1_4 tripwire). **Provisional default: keep all Phase-2 levers conservative (∞/1/∞) — no `Default` change**; finite max_depth/max_vars to be justified by the full 104-model sweep at 4.4. + - With **nothing else running**, run the smoke set under candidate settings via + `--opt`. Suggested grid (small): `global_obbt_max_depth ∈ {∞, 12, 8, 4}`, + optionally `global_obbt_interval ∈ {1, 2}`, `global_obbt_max_vars ∈ {∞, n/2}`. + - Require every candidate keeps the 6 tripwire models OK at baseline values and + `WRONG == 0`. Prefer the most aggressive setting that (a) holds the tripwire + and (b) rescues ≥1 canary or clearly speeds the OK models. Record the smoke + table here. This picks a **provisional** default direction only — the full + 104-model sweep at **4.4** confirms it and sets the final `Default`. + - **Acceptance:** smoke table recorded; chosen provisional setting holds all 6 + tripwire OK at 0 WRONG; provisional defaults noted (not yet committed as the + final `Default` — that waits for 4.4). + +--- + +## Phase 3 — Warm-start parent → child relaxation + sandwich + +Adjacent boxes have nearby relaxation optima. Seed the IPM instead of cold-start. + +- [x] **3.1 Carry the parent relaxation solution on the frontier node.** Done 2026-06-07: `warm: Option` added to both `Bounded` and `Node`. Built in `process_node` via `QpWarmStart::from_solution(&sol)` gated on `QpStatus::Optimal` (before `sol.x` is moved into `sol_x`); flowed into both children in `children` via `b.warm.clone()`; both root pushes (serial + parallel) get `warm: None`. Pure carrier — `Node.warm` `#[allow(dead_code)]` until 3.2 consumes it, 0 numeric change. `estimate_node_bytes` bumped `2n → 5n` floats (adds carried `x`/`z_lb`/`z_ub`; `m`-dependent `y`/`z` rows noted as uncounted, so the figure is a floor). Build + clippy clean (only pre-existing `problem.rs` warnings); 32/32 lib + 4 tree-debug + 2 doc-tests green. + +- [x] **3.2 Warm-start the child main lower-bound solve.** Done 2026-06-07: `process_node` gained a `warm: Option<&QpWarmStart>` param threaded from `node.warm.as_deref()` at both call sites; the main relaxation solve now calls `solve_qp_ipm_warm` when a carried point is present, guarded three ways so it can only speed up the *same* solve: (1) the debug/`subsolve_hook` path stays cold; (2) the warm point must be dimensionally compatible with this node's relaxation (`x/z_lb/z_ub == n`, `y == m_eq`, `z == m_ineq`) since child cuts can change the row count — else cold; (3) a non-`Optimal` warm result (the direct driver is less robust than cold HSDE) falls back to a cold `solve_qp_ipm`, preserving today's bound. `warm` boxed in both `Node`/`Bounded` (`Option>`) to keep the frontier node compact and clear a `large_enum_variant` clippy lint. Build + clippy clean (only pre-existing `problem.rs` warning); 32/32 lib + 4 tree-debug + 2 doc green — all certified-optimum **and** exact node-count tests unchanged ⇒ warm-start moved no certified value and no branch decision (0 WRONG). + +- [x] **3.3 Warm-start sandwich re-solves.** Done 2026-06-07: the sandwich loop now seeds each re-solve from the previous round's full primal/dual via `solve_qp_ipm_warm`. Verified `append_cuts` only grows the inequality block (`relax.rs:824` pushes to `g`/`h` only), so `n`/`m_eq`/bound-multipliers are invariant across rounds; the carried `QpWarmStart` is reused with its `z` `resize`d to the new `m_ineq()` (fresh cut rows start inactive ⇒ pad with `0.0`). Same conservative guard as 3.2: a non-`Optimal` warm result falls back to a cold `solve_qp_ipm`, so tightening is never weaker than today's. Build + clippy clean (only pre-existing `problem.rs` warning); full suite green (32 integration + 19 in-lib + 4 tree-debug + 2 doc) — bounds/optima and node counts unchanged ⇒ 0 WRONG. + +- [x] **3.4 Validate Phase 3 correctness on small problems.** Done 2026-06-07: `cargo test -p pounce-global -p pounce-convex` fully green — pounce-convex 95 in-lib + every integration suite incl. `warm_start` (8) and `qp_known_optima` (7); pounce-global 19 in-lib + 32 integration (all certified-optimum + exact node-count tests) + 4 tree-debug + 2 doc. Every certified optimum and node count is unchanged across Phase 3, proving warm-start moved no certified value (0 WRONG). Per policy, no GLOBALLib timing sweep run in the loop; perf confirmation left to a manual sweep. + +--- + +## Phase 4 — Cut the fixed small-n pipeline cost + +Small-n timeouts are local-NLP + sandwich + relaxation builds, not OBBT. + +- [x] **4.1 Depth-aware / early-exit `local_solve_iters`.** Done 2026-06-07: added `local_solve_iters_at_depth(root_iters, depth)` — the full root budget (default 50) is spent at the root and shallow nodes; the cap **halves every 4 levels** (`LOCAL_SOLVE_DECAY_STRIDE=4`) deeper, floored at `LOCAL_SOLVE_MIN_ITERS=10` and never exceeding the caller's root budget (so a small custom budget is preserved, and `0` still disables). The per-node call now polishes with the depth-scaled count. No new CLI knob — `local_solve_iters` stays the root budget; the decay is internal/conservative. **Soundness:** the local solve only *proposes* incumbents, so a cheaper deep polish can only weaken the upper bound, never the relaxation lower bound or pruning ⇒ cannot certify a wrong value. Build + clippy clean (only pre-existing `problem.rs` warning); 32 + 19 + 4 + 2 tests green — decay bites only at depth ≥ 4 so shallow test trees and their **exact node counts are unchanged** (0 WRONG). + +- [x] **4.2 Adaptive sandwich short-circuit.** Done 2026-06-07: the sandwich break condition now compares the marginal gain against an adaptive `gain_eps = (1e-7·|node_lb|).max(1e-9)` instead of the fixed `1e-9` absolute floor. Rounds that buy a negligible fraction of the bound magnitude are skipped, cutting LP re-solves on nodes whose bound has effectively converged, while the `1e-9` floor preserves today's behavior for small-magnitude bounds. Build + clippy clean (only pre-existing `problem.rs` warning); 32 + 19 + 4 + 2 tests green — every lower bound stays within tolerance, so all certified optima **and exact node counts are unchanged** (0 WRONG). + +- [x] **4.3 Reduce `build_relaxation` calls per node (3 → fewer).** Done 2026-06-07: when OBBT's final pass tightens nothing, it hands the node-bound stage that pass's relaxation instead of forcing a rebuild. `obbt::tighten` gained a `reuse_out: &mut Option` out-param; on the `!improved` break it peels the appended cutoff cut (`qp.g/h.truncate(base_*_len)`, captured *before* the cut push) and returns the relaxation. **Soundness rests on two facts:** (1) `build_relaxation(prob, lo, hi, true)` is rebuilt *per pass* (obbt.rs:156) from the current box, so a no-improvement pass's relaxation is over the *final* box — `build_relaxation` would reproduce it bit-for-bit; (2) the caller only reuses it under `Some(r) if opts.multilinear` (bnb.rs:437), matching OBBT's hardcoded `multilinear=true`, and rebuilds (`_` arm) whenever OBBT was gated off, every pass improved, or `opts.multilinear == false`. So reuse is bit-identical to a fresh build, never a stale/looser polytope. Saves one `build_relaxation`/node on the common converged-OBBT path. Diagnosed the pre-existing `simplex-obbt`-feature test failure (`simplex_obbt_matches_ipm_certified_optimum`, −0.402 vs −2.25 on the quartic) and confirmed it is **not** caused by 4.3 *or* Phase 3 warm-start — it reproduces identically with both disabled; it is the parked, off-by-default simplex engine's known unsoundness on ill-scaled LPs (out of scope per the IPM-stays-OBBT invariant). Build + clippy clean (no new warnings in bnb/obbt/relax); default-feature suite green: 19 lib + 32 integration (all certified-optimum **and exact node-count** tests unchanged ⇒ bit-identical) + 4 tree-debug + 2 doc ⇒ 0 WRONG. + - When the box is unchanged after OBBT, reuse the final OBBT-pass relaxation as + the node's lower-bound relaxation instead of rebuilding. Guard on + bounds-equality so a tightened box still rebuilds. + - **Acceptance:** builds + clippy clean; objectives/bounds unchanged to + tolerance; build count drops (instrument or reason it out); 0 WRONG. + +- [x] **4.4 FINAL correctness gate (small problems) + defaults decision.** Done 2026-06-07: full small-problem correctness gate green across the touched crates — `pounce-global` (19 lib + 32 integration incl. every certified-optimum **and exact node-count** test + 4 tree-debug + 2 doc), `pounce-convex` (95 lib + all integration incl. `warm_start` 8 / `qp_known_optima` 7), `pounce-simplex` (24 lib + 2 `ill_scaled_obbt`), `pounce-cli` (all integration suites). Every certified optimum and node count is unchanged across the entire Phase 2–4 program ⇒ **0 WRONG preserved**. **Defaults kept conservative and unchanged** (`obbt_max_depth=usize::MAX`, `obbt_interval=1`, `obbt_max_vars=usize::MAX`, `obbt_lp=Ipm` via `#[default]`) — all Phase-2 levers ship as behavior-preserving opt-in tunables; no `Default` promoted. The IPM-stays-OBBT invariant holds: `ObbtLp::Simplex` is parked behind the off-by-default `simplex-obbt` feature and transparently downgrades to the IPM sweep when the feature is off. **Per policy, NO full 104-model timing sweep was run in the loop** — the OK-count gain and any non-trivial `Default` change are deferred to a manual full sweep the user runs when they choose. The loop's mandate was 0 WRONG on small problems; that is met. + - **Policy (user, 2026-06-07): no full 104-model timing sweep in the loop.** The + loop's final gate is correctness on small problems: `cargo test --workspace` + (or at least `pounce-global` + `pounce-convex` + `pounce-simplex`) all green = + 0 WRONG preserved across every Phase 2–4 change. + - **Defaults:** keep the conservative Phase-2 defaults (`obbt_max_depth=∞`, + `obbt_interval=1`, `obbt_max_vars=∞`) — they are behavior-preserving and + proven harmless. Do **not** promote a more aggressive default from inside the + loop; the perf payoff requires a full-corpus timing sweep, which the user will + run manually when they want to set a non-trivial `Default`. Note that here and + stop. + +--- + +## Done criteria for the whole loop + +- All boxes above checked. +- Final GLOBALLib: **0 WRONG**, OK count > 59. +- `cargo test -p pounce-global -p pounce-simplex -p pounce-cli` green; + `cargo clippy` clean on the default feature set. +- New knobs documented in CLI help with conservative defaults. +- This doc updated with the final results table. + +## Results log + +(Append one line per completed validation task: date · setting · OK/TIMEOUT/WRONG.) + +- 2026-06-07 · smoke baseline (default opts) · **6 OK / 4 TIMEOUT / 0 WRONG** +- 2026-06-07 · task 4.4 final gate (small-problem Rust suites) · **0 WRONG** · + pounce-global 19+32+4+2 / pounce-convex 95+integration / pounce-simplex 24+2 / + pounce-cli all green; conservative defaults unchanged; full 104-model OK-count + sweep deferred to a manual run per the validation policy. +- 2026-06-07 · task 2.4 smoke grid (timeout=20s, 10 models): + + | setting | OK | TIMEOUT | WRONG | note | + |--------------------------|----|---------|-------|-----------------------------------------| + | default | 6 | 4 | 0 | baseline | + | `obbt_max_depth=12` | 6 | 4 | 0 | identical to baseline | + | `obbt_max_depth=8` | 6 | 4 | 0 | identical to baseline | + | `obbt_max_depth=4` | 6 | 4 | 0 | ex3_1_4 0.64→0.47s (noise); holds all | + | `obbt_interval=2` | 5 | 5 | 0 | **REGRESSES** ex3_1_4 tripwire → reject | + | `obbt_max_vars=20` | 6 | 4 | 0 | identical; did NOT rescue ex4_1_2 | + | `obbt_max_vars=50` | 6 | 4 | 0 | identical to baseline | + + **Provisional direction:** keep shipped defaults conservative (`obbt_max_depth=∞`, + `obbt_interval=1`, `obbt_max_vars=∞`). The smoke set is dominated by shallow + tripwire trees and root-bound canaries (ex4_1_2 stalls inside a *single* node), + so it cannot discriminate the depth/max_vars levers — they are demonstrably + **harmless** (0 tripwire regressions, 0 WRONG) but show no smoke rescue. Their + payoff is expected on deep-tree large-`n` models, to be confirmed by the full + 104-model sweep at task **4.4**, which sets the final `Default`. `interval=2` is + rejected outright (breaks the ex3_1_4 tripwire). All Phase-2 levers ship as + opt-in tunables; no `Default` change yet. diff --git a/dev-notes/hsde.md b/dev-notes/hsde.md new file mode 100644 index 00000000..aeccb0da --- /dev/null +++ b/dev-notes/hsde.md @@ -0,0 +1,661 @@ +# Homogeneous self-dual embedding for the convex IPM — design note + +**Status: Phases H2–H4 landed — HSDE solves LP/QP/SOCP and is a +selectable driver (`QpOptions::use_hsde`). H5 (exponential cone) core +landed: the dual-aware scaling, the non-symmetric driver +(`hsde_nonsym::solve_conic_hsde_nonsym`), the third-order corrector, and +public-API routing (`ConeSpec::Exponential` → the driver) solve exp-cone +problems to known optima — see "H5 status" below. Remaining: broader +benchmarks (`pounce-nlp` cross-checks, CBLIB).** +Chosen as the foundation for Clarabel cone parity (see +`clarabel-parity.md`): reformulate the interior-point driver into a +homogeneous self-dual embedding (HSDE), prove it reproduces every existing +LP/QP/SOCP result and infeasibility certificate, switch over, and *then* +add the non-symmetric (exp/power) and PSD cones onto the uniform HSDE +driver — the structure Clarabel, SCS, and ECOS use. + +## Why HSDE + +The current driver (`ipm.rs`) is an infeasible-start primal–dual method +with a **bolt-on** verified certificate check (`detect_infeasibility`). It +works, but: + +- infeasibility/unboundedness is detected by watching the iterate diverge + along a Farkas/recession ray — robust but heuristic in *when* it fires; +- there is no single self-starting iterate that handles primal- and + dual-infeasible problems uniformly; +- non-symmetric cones (exp, power) are far better behaved inside HSDE — the + embedding bounds the iterates and gives a clean central path. + +HSDE folds primal, dual, and the infeasibility certificates into **one** +self-dual system. Its solution either has `τ > 0` (recover the optimal +primal–dual point by dividing by `τ`) or `κ > 0` (a certificate of +primal or dual infeasibility) — decided *at convergence*, not by a side +test. + +## What is reused (the whole point) + +The per-cone math — `kkt_block` (NT scaling `W²`), `rhs_comp_term`, +`recover_ds`, `comp_residual{,_corrector}`, `max_step`, `mu` — is **reused +verbatim**. So is `KktStructure`: the embedding borders the existing +symmetric `(x, y, z)` block + +```text + ⎡ P+δI Aᵀ Gᵀ ⎤ + M = ⎢ A −δI 0 ⎥ (exactly today's KKT matrix) + ⎣ G 0 −W²−δI ⎦ +``` + +with one extra scalar `τ` (and its complement `κ`). The bordered system is +solved by **two** back-solves through the *same* factorization of `M` plus +a scalar Schur complement (the SCS/ECOS scheme), so the factorization, AMD +ordering, refactor-per-iteration, and the SOC aux-variable trick are +untouched. What changes is the outer iteration: residuals, the τ/κ row, +the step combination, the step length, and termination. + +## The embedding — linear conic case (P = 0) + +For `min cᵀx s.t. Ax = b, Gx + s = h, s ∈ K` with conic dual +`z ∈ K*` and free equality dual `y`, the self-dual embedding introduces +`τ ≥ 0, κ ≥ 0`: + +```text + (1) Aᵀy + Gᵀz + c τ = 0 (r_x, length n) + (2) A x − b τ = 0 (r_y, length m_eq) + (3) G x + s − h τ = 0 (r_z, length m_ineq) + (4) −cᵀx − bᵀy − hᵀz − κ = 0 (r_τ, scalar) + s ∈ K, z ∈ K*, τ ≥ 0, κ ≥ 0, sᵀz = 0, τκ = 0 +``` + +This system is **self-dual** (the matrix is skew-symmetric apart from the +cone block). Goldman–Tucker: it has a solution with `τ + κ > 0`, and + +- `τ > 0, κ = 0` ⇒ `(x, y, z, s)/τ` is an optimal primal–dual point; +- `τ = 0, κ > 0` ⇒ `cᵀx + bᵀy + hᵀz < 0` is impossible, so either + `bᵀy + hᵀz < 0` with `Aᵀy+Gᵀz = 0, z ∈ K*` (primal-infeasible Farkas + certificate) or `cᵀx < 0` with `Ax = 0, Gx + s = 0, s ∈ K` + (dual-infeasible / unbounded recession ray). + +### Central path and the Newton step + +Relax the two complementarity conditions to `s ∘ z = σμ e` and +`τκ = σμ`, with `μ = (sᵀz + τκ)/(degree + 1)`. The Newton system for +`(Δx, Δy, Δz, Δs, Δτ, Δκ)` is the embedding matrix linearized. Eliminating +`Δs` via the cone (`Δs = −W²Δz − rhs_comp`, exactly `recover_ds`) and `Δκ` +via `τΔκ + κΔτ = σμ − τκ`, the reduced system is the bordered + +```text + ⎡ M ⎤ ⎡Δx⎤ ⎡ ... ⎤ with border column bcol = (c, −b, −h) + ⎢ b ⎥ ⎢Δy⎥ = ⎢ ⎥ and Δτ closing row (−cᵀ,−bᵀ,−hᵀ)·(Δx,Δy,Δz) + ⎣ col ⎦ ⎣Δz⎦ ⎣ . ⎦ − (κ/τ) Δτ = r_τ + σμ/τ − κ +``` + +i.e. `M·Δw + Δτ·bcol = rhs_w` and `bcolᵀ·Δw − (κ/τ)Δτ = rhs_τ` (signs as in +(1)–(4)). **Two-solve scheme** (one factorization of `M`): + +```text + solve M p = bcol (the "constant" direction; depends only on data + scaling) + solve M q = rhs_w (the "residual" direction) + Δτ = (rhs_τ − bcolᵀ q) / (−κ/τ − bcolᵀ p) + Δw = q − Δτ · p +``` + +`p` can be reused between the predictor and corrector (same `M`, same +`bcol`); only `q` and the scalars differ. So HSDE costs **one extra +back-solve per iteration** over the current method — the factorization is +shared exactly as today. + +### Initial point, step, termination + +- **Self-start:** `x = 0, y = 0, s = z = e` (cone identity), `τ = κ = 1`. + Perfectly centered (`s∘z = e, τκ = 1`); no infeasible-start needed. +- **Step length:** fraction-to-boundary over the cone (`max_step` on + `s, z`) **and** the rays `τ, κ > 0` — `α` is the min of the cone step and + the `τ/κ` steps. One shared `α` (HSDE is symmetric in primal/dual). +- **Termination** (Clarabel/SCS style), all relative: + - **optimal:** primal res `‖Ax−bτ‖/τ`, dual res `‖Aᵀy+Gᵀz+cτ‖/τ`, and gap + `|cᵀx + bᵀy + hᵀz|/τ` all below `tol` (the `/τ` un-homogenizes); + - **primal infeasible:** `τ` small, `bᵀy + hᵀz < 0`, `‖Aᵀy+Gᵀz‖` small; + - **dual infeasible:** `τ` small, `cᵀx < 0`, `‖Ax‖, ‖Gx+s‖` small. + These are the *same* certificate inequalities `detect_infeasibility` + already checks; the embedding drives the iterate onto the Farkas/recession + ray as `τ → 0`, and the HSDE driver **reuses** that verified relative check + on the homogeneous `(x, y, z)` (rather than retiring it) — so both drivers + share one certificate path. + +## The quadratic objective (P ≠ 0) + +With `P`, the embedding is no longer perfectly self-dual; we adopt +Clarabel's QP embedding. Stationarity (1) gains `Px`: + +```text + (1q) P x + Aᵀy + Gᵀz + c τ = 0 + (4q) κ = −(cᵀx + bᵀy + hᵀz) − xᵀP x / τ +``` + +(At `τ>0`, dividing recovers the QP duality-gap condition +`x̂ᵀPx̂ + cᵀx̂ + bᵀŷ + hᵀẑ = 0`.) **Landed (H3).** The Newton linearization +of (4q) shows the `P` coupling enters *only* the τ-row scalar: + +- `ρ_τ = κ + cᵀx + bᵀy + hᵀz + xᵀPx/τ`, +- the τ-row gradient becomes `g̃ = (c + (2/τ)Px, b, h)` (used in `g̃ᵀp`, + `g̃ᵀq`), +- the scalar Schur denominator gains a `−xᵀPx/τ²` term. + +The border *column* is unchanged — `(1q)`'s τ-coefficient is still `c`, so +`p = M⁻¹(−c, b, h)` as in the linear case — and `P` already sits in `M`'s +`(x,x)` block and in `ρ_x`. Hence the two M-solves, the cone elimination, +and the step are **identical** to H2; only the τ-row scalar differs, and it +reduces to the linear case at `P = 0`. Validated against the direct driver +and closed-form optima (equality-constrained QP; box/inequality QP; QP with +a second-order cone) — all agree. + +## Phased plan + +| Phase | Scope | Risk | +|---|---|---| +| H1 | This note: exact embedding, two-solve scheme, termination. | low | +| **H2** | ✅ HSDE driver for **linear** conic (`P=0`): orthant + SOC, reusing `KktStructure`/`Cone`. `solve_conic_hsde` alongside the current solver. Validated optima + both certificates vs the existing solver. | med-high — embedding signs, two-solve combination | +| **H3** | ✅ Quadratic objective: the `(1q)/(4q)` τ-row with the `P` coupling. Validated on the QP suite (closed-form optima + QP-with-SOC) vs the direct driver. | high — τ-row P algebra | +| **H4** | ✅ *(revised)* HSDE promoted to a first-class **selectable** driver (`QpOptions::use_hsde`), routed through `solve_qp_core` and reachable from every public entry point (bound expansion + `z_lb`/`z_ub` split validated). **Not** forced as the universal default: doing so would regress warm starting — `warm_start_reduces_iterations_on_nearby_problem` asserts a *strict* iteration reduction that the direct method's adaptive recentering delivers and an IPM embedding inherently does not. End state is **automatic routing**: symmetric-only cones stay on the direct driver (warm start, factor reuse, differentiable layers); problems with non-symmetric cones (exp/power, H5+) use HSDE. Embedded warm start / factor reuse remain future work, gated on need. | med | +| H5 | **Exponential cone** on HSDE: barrier oracles, non-symmetric scaling, third-order corrector, neighborhood line search. Known-optima (GP, logistic, entropy) + KKT-residual validation. | high | +| H6 | **Power cone** (exp machinery + new barrier). | low after H5 | +| **H7** | ✅ **PSD cone**: pure-Rust symmetric eig, svec/smat, dense `W⊗ₛW` block; small dense SDPs (chordal decomposition later). Landed — see the H7 status note below. | med-high | +| H8 | Cone-aware differentiable backward (JAX) for each new cone, FD-validated, as separate follow-ups. | med-high | + +Validation discipline is unchanged and intrinsic: the IPM reports +`Optimal` only at a verified KKT point; each phase adds known-optima tests +plus randomized KKT-residual checks, and the orthant/SOC results stay +identical to the current solver (the cross-check that guards H2–H4). The +existing direct driver stays in place until H4 flips the default, so there +is no window where the crate regresses. + +## Non-symmetric cones on HSDE (H5 — exponential cone) + +The exponential and power cones are **not** self-scaled: there is no +Nesterov–Todd point `W` with `W²z = s`, no Jordan product `s∘z`. The +path-following method instead uses the primal barrier `F` directly +(Skajaa–Ye 2015; Dahl–Andersen 2021, the MOSEK exponential-cone +algorithm). `pounce-convex` already has the validated barrier oracles +(`BarrierCone`: `F`, `∇F`, `∇²F`, membership — see `cones/exp.rs`). + +### Central path and the scaling block + +The central path of the homogeneous model is, at parameter `μ`, +```text + z = −μ ∇F(s), τκ = μ, μ = (sᵀz + τκ)/(ν + 1), +``` +with `ν` the total barrier degree (exp cone: 3). `−∇F(s) ∈ int K*` for +`s ∈ int K`, so `z` stays dual-feasible. The Newton step toward the path at +a centered target `σμ` linearizes `z + σμ∇F(s) = 0`: +```text + dz + σμ H(s) ds = −(z + σμ ∇F(s)), H = ∇²F(s). +``` +The scaling block uses the **current** `μ` (the `σ` enters only the target +`r_c`); linearizing `z + dz = −σμ(∇F(s) + H ds)` and eliminating `ds` +(so the cone contributes a `(z,z)` block exactly as the symmetric path +does) gives +```text + (z,z) block : −(1/μ) H(s)⁻¹ [dense; exp cone is 3×3] + r_c : z + σμ ∇F(s) + rhs_comp_term : (1/μ) H(s)⁻¹ r_c + recover_ds : ds = −rhs_comp_term − (1/μ)H(s)⁻¹ dz +``` +**Orthant-reduction check (the correctness anchor).** For the orthant, +`F = −Σ log sᵢ`, `H⁻¹ = diag(sᵢ²)`, and on the path `zᵢ = μ/sᵢ`, so the +block `(1/μ)sᵢ² = sᵢ/zᵢ = W²` — it reduces *exactly* to the orthant +scaling, and `r_c = z − σμ/sᵢ` matches the symmetric `(s∘z − σμe)/s`. The +whole derivation collapses to the symmetric one in 1-D, the same anchor +that de-risked the SOC reduced system. (Putting `σμ` in the *block* +instead of `μ` — an early mistake — both mis-scales the step and +reintroduces a `σ=0` singularity; the `μ` form is the correct one.) + +### Why a separate loop (fixed-σ single step, not Mehrotra) + +The block carries `1/σμ`, so the Mehrotra **predictor** (`σ = 0`) is +singular for a non-symmetric cone. Skajaa–Ye therefore use a +predictor (tangent to the path) **plus** a distinct centering corrector, +not a single combined `σ→σμ` step. The minimal robust version is a +**fixed-σ single-step path-follower**: each iteration pick `σ ∈ (0,1)`, +assemble the `(z,z)` block `−(1/σμ)H⁻¹`, solve the *same* bordered HSDE +system (two solves + the τ scalar, reused verbatim from H2/H3), then take a +**backtracking** step — there is no closed-form `max_step`, so shrink `α` +until `s+αds ∈ int K`, `z+αdz ∈ int K*` (via `BarrierCone` membership) and +the barrier decreases. More iterations than Mehrotra, but correctness +first; a Mehrotra/RK corrector is a later optimization. + +### Implementation steps + +1. **Dense `(z,z)` block in `KktStructure`.** Today's assembly handles + `Diagonal` (orthant) and `DiagRank1` (SOC). Add a `DenseLower` path that + reserves a `dim×dim` lower triangle at the cone's `(z,z)` position and + fills it from `−(1/σμ)H⁻¹` each iteration. (This is the "Tier-A dense + block" the SOC note deferred; the exp cone is only 3×3, so fill is + trivial.) +2. **A non-symmetric HSDE loop** (`hsde::solve_conic_hsde_nonsym`, or a + branch) sharing the residuals, the two-solve τ handling, and + un-homogenizing — but with the fixed-σ step and barrier line search. + Routed to when the cone product contains a non-symmetric block. +3. **`ExponentialCone` becomes a `Cone`/`ConeKind`** providing the + `(z,z)`-block (dense `−(1/σμ)H⁻¹`), `r_c`, `recover_ds`, the central-ray + identity start, `mu`, and a membership-based `max_step`. +4. **Validate** on known optima: an entropy maximization / `log-sum-exp` + epigraph and a tiny geometric program (posynomial), plus a randomized + KKT-residual check, all to intrinsic tolerance; the orthant/SOC paths + stay byte-identical. **Cross-check against NLP solves:** each of these + problems also has a smooth-NLP form — solve it through `pounce-nlp` and + require the conic optimum to agree with the NLP optimum (objective and + primal point) to tolerance. This is the strongest intrinsic check: two + independent solvers (a conic IPM and a general NLP IPM) landing on the + same KKT point. + +### Prototype findings (what works, what's still needed) + +A standalone prototype driver (assembling the dense bordered system and +reusing the two-solve τ handling) confirmed the **math is right**: + +- the barrier oracles are exact (FD + the three log-homogeneity identities); +- the `(1/μ)H⁻¹` block and `r_c = z + σμ∇F(s)` give a correct first step — + on `min z s.t. (0,1,z)∈K_exp` the opening iteration cuts primal and dual + residuals by ~2× in the right direction. + +But it **stalls** after a few iterations: with primal-only Hessian scaling +the **dual** iterate races to `∂K*` (proximity `ψ* → 0`) while `μ` is still +large, and the line search throttles `α → 0`. This persists across all `σ` +and across a central-path-neighborhood line search — it is the known +weakness of naive primal scaling, *not* a sign/algebra bug (the symmetric +reduction holds and the first step is correct). + +**What's needed (resolved — item #1 in hand).** The stall is the known +weakness of primal-only Hessian scaling. The fix is a **dual-aware +primal–dual scaling** built from *both* the primal and dual cone iterates — +the Tunçel scaling, specialized to 3-D and computed by a BFGS update, exactly +as in MOSEK's exponential-cone solver. The construction is transcribed below +from **Dahl & Andersen (2021)** — the local copy is `~/Desktop/hsde-reference.pdf` +(this reference was *not* network-blocked after all; it was on disk). +Equation tags `(DA n)` below refer to that paper. + +### The dual-aware scaling (item #1) — Tunçel/BFGS primal–dual scaling [Dahl & Andersen 2021] + +This **replaces** the primal-only `−(1/μ)H(s)⁻¹` block of "Central path and +the scaling block" above, and supersedes the fixed-σ path-follower of "Why a +separate loop" (Dahl–Andersen fold predictor + corrector + centering into one +combined direction). Implements `[Dahl & Andersen 2021]`, which itself +specializes the primal–dual scalings of `[Tunçel 2001]` / `[Myklebust & +Tunçel 2014]` to the exponential cone. + +**Notation / convention alignment (read this first).** Dahl–Andersen put the +*primal* cone variable in `x` and the *dual* in `s`; pounce's HSDE uses +`s ∈ K` (primal slack) and `z ∈ K*` (dual). Map **DA `x` → pounce `s`**, +**DA `s` → pounce `z`**. Their exp-cone ordering also differs: +`K_exp = cl{x₁ ≥ x₂·e^{x₃/x₂}}`, barrier `F = −log(x₂log(x₁/x₂) − x₃) − log x₁ +− log x₂` (DA 2) — a coordinate **permutation** of pounce's `(x,y,z)` with +`ψ = y·log(z/y) − x` (`cones/exp.rs`): pounce `(x,y,z) = DA (x₃, x₂, x₁)`. Port +the appendix derivatives through that permutation, **or** (cheaper, less +error-prone) re-derive `F'''` directly in pounce's order and FD-check it +alongside the existing `F, ∇F, ∇²F` oracles. + +In DA's convention (`x` = primal cone var, `s` = dual cone var), for an iterate +off the central path: + +**Shadow iterates and scalars** (DA 7): +``` + x̃ := −F'_*(s) (gradient of the conjugate barrier at the dual point) + s̃ := −F'(x) (gradient of the primal barrier at the primal point) + μ := ⟨x,s⟩/ϑ, μ̃ := ⟨x̃,s̃⟩/ϑ (μ·μ̃ ≥ 1, equality only on path) +``` +`s̃ = −F'(x)` is free (reuse `∇F`). `x̃ = −F'_*(s)` has no closed form for the +exp cone: it is `x̃ = argminₓ{−⟨s,x⟩ − F(x)}`, i.e. solve `F'(x̃) = −s` by a +damped Newton iteration (DA p. 347); then `F''_*(s) = [F''(x̃)]⁻¹`. +`Y^T S ≻ 0` (with `S, Y` below) ⇔ the iterate is off the path. + +**Secant equations — definition of a primal–dual scaling** (DA 8, DA 29). A +nonsingular `W` with the *double* secant property +``` + W x = W^{-T} s, W x̃ = W^{-T} s̃ ⇔ (WᵀW)⁻¹ ∈ T₁(x,s), +``` +where Tunçel's set is `T₁(x,s) = {T≻0 : T²s = x, T²F''(x) = F'_*(s)}` (DA 20). +On the central path this collapses to the self-scaled `WᵀW = μF''(x)` (DA 21); +**off** the path the dual data `s, s̃` genuinely enter — that is exactly the +"dual awareness" the primal-only block lacked. + +**3-D closed form (this is what to implement).** In 3-D every such scaling is +(DA §5, end): +``` + WᵀW = Y(YᵀS)⁻¹Yᵀ + t·z zᵀ + W⁻¹W⁻ᵀ = S(YᵀS)⁻¹Sᵀ + t⁻¹·r rᵀ S := [x x̃], Y := [s s̃] +``` +with `Sᵀz = 0, Yᵀr = 0, ⟨r,z⟩ = 1, ‖z‖ = 1` — computed by **cross products**: +``` + z = (x × x̃) / ‖x × x̃‖ , r = (s × s̃) / ⟨s × s̃, z⟩ . +``` +The entire non-symmetry is carried by the single scalar `t > 0`. + +**Choosing `t` — the BFGS value** (DA 32): +``` + t = μ·‖ F''(x) − s̃s̃ᵀ/ϑ − (F''(x)x̃ − μ̃s̃)(F''(x)x̃ − μ̃s̃)ᵀ / (⟨x̃,F''(x)x̃⟩ − ϑμ̃²) ‖_F +``` +— the Frobenius norm of the rank-3 BFGS update `H_BFGS − μF''(x)` (DA 30). DA +also give an "optimally bounded" `t` via bisection (DA 31; conjectured bound +`ξ* ≈ 1.253` for the exp cone), but report **no practical difference** vs the +BFGS `t` (largest observed `ξ ≤ 1.72`). **Use the BFGS `t` (DA 32)** — closed +form, no bisection. + +**Factored scalings used in the loop** (DA §6) — the columns of `Wᵀ` / `W⁻¹`: +``` + Wᵀ columns: x/√⟨x,s⟩ , δ_s/√⟨δ_x,δ_s⟩ , √t · z + W⁻¹ columns: s/√⟨x,s⟩ , δ_x/√⟨δ_x,δ_s⟩ , r/√t + δ_x := x − μ x̃ , δ_s := s − μ s̃ . +``` +This dense 3×3 `WᵀW` is the `DenseLower` cone block of implementation step #1 +— now `WᵀW` rather than `−(1/σμ)H⁻¹`. **Reconcile placement and signs with +pounce's elimination** (pounce keeps `Δz`, eliminates `Δs`; DA keep `Δx`, +eliminate `Δs` in *their* convention) using the **orthant-reduction anchor**: +on the path `WᵀW → μF''(s)`, and the block must collapse to the existing +`−W²` orthant/SOC block — pin the sign there, exactly as the `−(1/μ)H⁻¹` +derivation was pinned. + +**The corrector (DA's headline contribution)** (DA 16) — a Mehrotra-like +*third-order* corrector for the non-symmetric case: +``` + η := −½ F'''(x)[ Δxᵃ , (F''(x))⁻¹ Δsᵃ ] +``` +where `(Δxᵃ, Δsᵃ)` is the affine/predictor direction (DA 11). Evaluate via +(DA 34): `η = −½ F'''(x)[u, v]`, `u = Δxᵃ`, `v` solving `F''(x)v = Δsᵃ` (use +the factored `F'' = RRᵀ`, DA App. A.2, for stability). The exp-cone third +derivative `F'''(x)[u]` is DA App. A.3 (DA 33). DA Table 1 / Fig 2: this +corrector cuts iteration counts to roughly the symmetric-cone level — it is +the reason their method is competitive and the reason to prefer it over the +Skajaa–Ye Runge–Kutta corrector (which needs extra KKT factorizations). + +**Centering and the combined step** (DA §6): +``` + α_a := step-to-boundary of the affine direction (bisection on membership) + γ := (1 − α_a)·min{(1 − α_a)², 1/4} (centering parameter) + combined (DA 18): G(Δz) = −(1 − γ)·G(z), + W Δx + W^{-T} Δs = −v + γμ ṽ − W^{-T} η, + v = Wx = W^{-T}s , ṽ = W x̃ = W^{-T} s̃ . + update: z ← z + α Δz, largest α keeping the iterate in N(β), β = 1e-6. +``` +`N(β)` is the one-sided ∞-norm neighborhood `ϑ·⟨F'(xᵢ), F'_*(sᵢ)⟩⁻¹ ≥ βμ` +(DA §3). The reduced bordered linear system is DA §7.2: the cone block is +`WᵀW`, solved through an `LDLᵀ` of `[ −WᵀW Aᵀ ; A 0 ]` — structurally the +**same** bordered two-solve already in `hsde.rs`, with the dense `WᵀW` in +place of the symmetric `W²`. + +**Starting point** (DA §6): `x = s = −F'(x)` (solve `x + F'(x) = 0`, the min +of `½‖x‖² + F(x)`), `y = 0`, `τ = κ = 1`. For the exp cone DA give the constant +`x⁰ = s⁰ ≈ (1.290928, 0.805102, −0.827838)` (their ordering — permute to +pounce's). Then `z⁰ ∈ N(1)`, perfectly centered. + +**Termination** (DA §7.3): relative primal/dual feasibility `ρ_p, ρ_d` and gap +`ρ_g`, plus infeasibility metrics `ρ_pi, ρ_di` and ill-posedness `ρ_ip` — +these mirror the relative optimal/infeasible checks already in "Initial point, +step, termination", so the existing certificate path is reused. + +### H5 status — what landed + +Implemented and validated (all to intrinsic tolerance, `cargo test -p +pounce-convex`): + +- **Conjugate-barrier gradient** `x̃ = −F'_*(z)` (`cones/exp.rs`, + `ExponentialCone::conjugate_grad`) — damped self-concordant Newton, + validated by exact round-trip (`p → −∇F(p) → recover p`) and the residual + equation `∇F(x̃) = −z`. +- **Dual-aware scaling** `M = WᵀW` (`ExponentialCone::scaling` → + `ExpScaling`) — the closed form `Y(YᵀS)⁻¹Yᵀ + t·z_cp z_cpᵀ` with the BFGS + `t` (DA 32). The driver needs only `M` (not `W`/`W⁻¹`): the secants + pre-multiplied by `Wᵀ` are the exact, `W`-free identities `M·s = z`, + `M·x̃ = s̃`, which the tests confirm; `M` is SPD and reduces to `μ∇²F` near + the path. +- **Non-symmetric driver** (`hsde_nonsym::solve_conic_hsde_nonsym`) — the + same homogeneous embedding + two-solve τ scheme as `hsde.rs`, with the + cone `(z,z)` block `−M⁻¹` (dense 3×3, genuine off-diagonals reserved in a + local `NsKkt`), `comp_term = −M⁻¹·rc`, `rc = −z + σμ·s̃`, and a + backtracking step on cone membership. **For the orthant it reduces exactly + to the symmetric Mehrotra step** (the correctness anchor). Validated on + `min z : (1,1,z)∈K_exp` → `z = e`; `log-sum-exp` (2 exp + 1 orthant) → + `log 2`; and a geometric program `min x + 1/x` → `2`. +- **Third-order corrector** (DA 16/34) — `ExponentialCone::third_dir_apply` + computes `F'''(s)[u, v]` as a directional derivative of the Hessian + (validated against the exact identity `F'''(s)[s,v] = −2∇²F·v`); the driver + forms `η = −½ F'''(s)[ds_aff, ∇²F⁻¹ dz_aff]` and folds `−η` into `rc`. For + the orthant `η_i = ds_aff_i dz_aff_i/s_i` — exactly the Mehrotra + second-order term, so the orthant corrector *is* standard Mehrotra. Two + safeguards keep it robust: a step-collapse fallback to pure centering, and + gating the corrector off within `~1e3·tol` of convergence (its + finite-difference perturbation otherwise stalls the endgame). The FD step is + scaled `∝ 1/‖u‖` so the third derivative stays accurate for a tiny affine + step. +- **Public-API routing** — `ConeSpec::Exponential`; `solve_socp_ipm` detects + any exp spec and routes to `hsde_nonsym` (`solve_nonsym`), with bound + expansion into a trailing orthant block and bound-dual splitting exactly as + the symmetric path. SOC mixed with exp is not yet supported (returns + `NumericalFailure`). End-to-end routing test + (`routes_exponential_through_public_entry`) passes. +- **Python access** — `pounce.qp.solve_socp(..., cones=[("exp", 3), ...])` + reaches the driver via `pounce-py`'s cone parser (`"exp"`/`"exponential"`, + fixed dimension 3 validated; the SOC+exp mix raises a clear `ValueError` + up front rather than returning an opaque status). Verified from Python on + the GP (`→ 2`) and log-sum-exp (`→ log 2`) problems + (`python/tests/test_socp.py`). +- **QP solve report** — the convex/QP CLI path (`run_convex_qp`) now emits the + `pounce.solve-report/v1` JSON report (`--json-output`) like the NLP path, + with real final KKT residuals via `QpSolution::kkt_residuals` → + `QpResiduals` (in `pounce-convex`, tested with active bounds and a binding + inequality), so the benchmark harness can compare QP/exp-cone solves to NLP + solves uniformly. At `--json-detail full` the report also carries the + **per-iteration convergence trace** (`iterations` array, same `IterRecord` + schema as the NLP path): an opt-in `QpOptions::collect_iterates` makes the + convex IPM record `obj / inf_pr / inf_du / μ / α` per iteration into + `QpSolution::iterates` (off by default — no overhead), which `run_convex_qp` + maps into the report. +- **Bug fixed:** `in_dual_cone` had `ψ* = v − u·log(−u/w)` instead of the + correct `v − u + u·log(−u/w)` (it mislabeled dual-infeasible points as + interior); cross-checked against DA p. 346 and regression-tested. + +- **NLP cross-checks** (`crates/pounce-cli/tests/exp_cone_vs_nlp.rs`) — the + geometric program (`= 2`), log-sum-exp (`= log 2`), and entropy + maximization (`= −log n`) are each solved *twice*: as an exp-cone conic + program (this driver) and as a smooth NLP (the independent IPOPT-style + filter-IPM in `pounce-algorithm`). The two optima agree to ~1e-7 — strong + evidence of correctness, since the conic and NLP paths share no code. +- **Endgame acceptance:** near the cone boundary `ψ → 0` makes `∇²F` blow up, + so the scaling/factorization can break down a hair short of `tol`. When that + happens with KKT residuals already within `~1e3·tol`, the driver accepts the + current iterate (IPOPT's "solved to acceptable level") instead of reporting a + spurious `NumericalFailure`. + +**H6 (power cone) — landed.** The non-symmetric machinery was generalized +(`cones/nonsym.rs`): `conjugate_grad`, the dual-aware scaling +(`NonsymScaling`), and `third_dir_apply` are now generic over any 3-D +`BarrierCone` (which gained an `interior_reference` returning a point in +`K ∩ K*`). The exp and power cones supply only their barrier oracles. The +`PowerCone { alpha }` (`cones/power.rs`) implements `K_α = {|x| ≤ y^α z^{1−α}}` +with the degree-3 barrier `−log(y^{2α}z^{2−2α} − x²) − (1−α)log y − α log z` +(FD- and identity-validated). The driver dispatches over a `NonsymCone` +enum (Exp/Power) that implements `BarrierCone`, so the loop, corrector, and +step length are cone-agnostic; the generic machinery is validated on both +cones via the secants `M·s=z`, `M·x̃=s̃`. Wired through `ConeSpec::Power(α)` → +`solve_socp_ipm` → `solve_nonsym`, and Python `solve_socp(cones=[("pow", α)])` +(exponent validated to `(0,1)`). Known-optimum tests +(`max x s.t. (x, 2, 0.5) ∈ K_α` → `2^α 0.5^{1−α}`) pass for several α in Rust +and Python. + +**SOC mixing — landed.** The non-symmetric driver now also accepts +second-order-cone blocks (`NsBlock::SecondOrder`): they are self-scaled, so +they reuse `SecondOrderCone`'s NT machinery — a dense `W² = diag(d)+uuᵀ` +block, the Jordan `comp_residual`/corrector, the arrow `rhs_comp_term`, and +the closed-form `max_step` — alongside the dual-aware exp/power blocks in one +KKT. A SOC may be freely mixed with an exp/power cone (`solve_socp_ipm` routes +any exp/power/SOC mix to `solve_nonsym`; Python `solve_socp` likewise). Tested: +SOC-only and `min t + z s.t. (t,3,4)∈SOC ∧ (1,1,z)∈K_exp` → `t=5, z=e` in Rust +and Python. + +**Warm-start — landed (primal hook).** `solve_conic_hsde_nonsym_warm` seeds +the primal `x` from a previous (nearby) solution while keeping the cones +centered, lowering the initial primal residual. Honest scope: the HSDE +embedding's iteration count is start-dependent and not guaranteed to drop, so +this is a primal hook, **not** a promised speedup — the property tested is +*start-independence* (warm from the optimum, a bad point, or an ignored +mismatched vector all reach the same optimum). Higher-level routing +(`solve_socp_ipm_warm` for the non-symmetric path, Python) and factor reuse +remain optional follow-ups, gated on a demonstrated need. + +### H7 status — PSD cone landed (small dense SDPs) + +The semidefinite cone is **self-scaled**, so unlike exp/power it lives on the +*symmetric* driver (`hsde.rs` / `solve_socp_ipm`), not the non-symmetric one. + +- **Oracles** (`cones/psd.rs`) — `svec`/`smat` (the `√2`-off-diagonal isometry + so `⟨X,Y⟩_F = svec·svec`), the `−log det` barrier + gradient `−X⁻¹` + + Hessian action, membership / fraction-to-boundary via eigenvalues, and the + Nesterov–Todd scaling `W = S^{1/2}(S^{1/2}ZS^{1/2})^{-1/2}S^{1/2}`, validated + against `W Z W = S`. Eigendecompositions reuse + `pounce_linalg::symmetric_eigen`. +- **`Cone` impl** — the matrix-Jordan machinery: `kkt_block` → the dense + symmetric Kronecker `H = W ⊗ₛ W` (`ConeBlock::DenseLower`), validated to + satisfy `H·svec(z) = svec(s)`; `comp_residual` uses the Jordan product + `(SZ+ZS)/2`; `rhs_comp_term` = `Arw(z)⁻¹ r` via a Lyapunov solve + `ZD+DZ = 2·smat(r)`; `recover_ds = −Arw(z)⁻¹ r − H·dz`, all cross-checked. +- **Driver integration** — `ConeSpec::Psd(n)` / `ConeKind::Psd`; `KktStructure` + gained a fully-dense `(z,z)` block path (a third `block_shapes` class + alongside the orthant's diagonal and the SOC's diag+rank-1 aux-var trick). + Validated end to end on `max λ s.t. M − λI ⪰ 0 ⇒ λ_min(M)` for a diagonal + and a non-diagonal `M` (the latter exercising the off-diagonal scaling). + +- **Python** — exposed via `pounce.qp.solve_socp(cones=[("psd", n)])` (the + value is the matrix size `n`; the slack block is `svec(X)`). The + PSD-with-exp/power mix raises a clear `ValueError`. +- **Sparsity (block-diagonal)** — `decompose_psd` splits a block-diagonal + `Psd(n)` cone into independent PSD cones over the connected components of + its sparsity graph (one dense `O(m²)` KKT block → several small ones, + exploited by the sparse factorization). Solution-equivalent: the primal / + objective are unchanged and the dropped (structurally-zero) cross rows have + empty `G` rows, so their dual is `0`. +- **Sparsity (chordal range-space)** — `chordal_decompose` (built on + `cones/chordal.rs`: chordal extension + maximal cliques) handles the + *general* connected-sparse case via Agler's theorem: `s ⪰ 0` ⟺ + `s = Σ_k Tᵀ S_k T`, introducing clique blocks `S_k ⪰ 0` and one consistency + equality per clique-covered entry. Runs after the block-diagonal split; + the dual is reconstructed through both layers (PSD entry duals from the + consistency-equality multipliers). Equivalence-tested against the dense + solve on a path-pattern SDP (`x`, objective). +- **CBF SDP input** — the CBF reader parses affine PSD constraints + (`PSDCON` + `HCOORD`/`DCOORD`): `D_c + Σ_k x_k H_{c,k} ⪰ 0` maps directly + onto `s = svec(D) − Σ x_k svec(H_k) ∈ Psd` (√2-scaled). Validated on a + synthetic SDP (`max λ s.t. M − λI ⪰ 0`). + +Remaining for PSD: primal `PSDVAR` matrix variables in the CBF reader (the +`OBJFCOORD`/`FCOORD` form) — affine `PSDCON` is done; and PSD cannot be mixed +with exp/power cones in one problem (different drivers; the mix fails +cleanly). The chordal elimination uses the natural variable order — a +fill-reducing ordering (AMD) would shrink the cliques further on large +instances. + +Remaining (overall): only — if a need emerges — embedded factor-reuse for the +non-symmetric path. The CBLIB exp- and power-cone tiers, the cross-check, +and the benchmarks-harness integration all landed (see below). + +### CBLIB benchmark tier — landed (exp + power cones) + +**Status: landed.** The reader, the CBF→pounce mapping, the independent NLP +cross-check, and the benchmarks-harness integration are implemented and +green for both the exponential-cone GPs and the 3-D power cone. + +- **CBF reader** (`pounce_cli::cbf`) — parses the Conic Benchmark Format + (`VER`/`OBJSENSE`/`POWCONES`/`VAR`/`CON`/`OBJACOORD`/`OBJBCOORD`/`ACOORD`/`BCOORD`) + with the cone kinds `F`/`L=`/`L+`/`L-`/`EXP`/`Q` and the 3-D power cone + (`@k:POW` resolving its exponent `α = α₀/(α₀+α₁)` against the `POWCONES` + table). Unsupported kinds (PSD `DCOORD`, rotated SOC `QR`, dual power + cones) are rejected with a clear error rather than mis-parsed. Unit-tested + on the section grammar, the exp-dim and cone-sum checks, the `POWCONES` + α-resolution + permutation, and unsupported-cone / bad-`@k` rejection. +- **`CbfModel::to_conic`** — maps an instance to a pounce conic program + (`QpProblem` + `Vec`): VAR cones → slack `s = −Gx ∈ K`, CON + cones → `s = Ax+b ∈ K`, `L=` → equality `Ax = −b`. The non-symmetric + triples are permuted into pounce cone order: exp **reversed** (CBF + bound-first `(a,b,c)` → pounce bound-third `(c,b,a)`), power **rotated** + (CBF `x₀^β₀ x₁^β₁ ≥ |x₂|` → pounce `(x,y,z) = (x₂,x₀,x₁)`, `α = β₀`). +- **Conic solve on real instances** (`tests/cblib_cbf.rs`) — three vendored + CBLIB GPs (`demb761`, `beck751`, `fang88`) plus a hand-authored synthetic + power-cone instance (`pow3_synthetic.cbf` — the real `2013_fir*` are + ~120 MB), each under `crates/pounce-cli/tests/data/cblib/`, parse, map, + and reach a verified `Optimal`. The power instance hits its closed-form + optimum `x₂ = 2^½·½^½ = 1`. +- **Independent NLP cross-check** (`tests/cblib_vs_nlp.rs`) — exactly the + `exp_cone_vs_nlp` strategy: each instance is also built as a smooth NLP + (exp triple → `u₀ − u₁·exp(u₂/u₁) ≥ 0`; power cone → the epigraph + `u₀^α u₁^{1−α} ∓ x_bnd ≥ 0`; both with exact gradient + Hessian, `L=`/`L-` + rows linear) and solved by the filter-IPM, **cold-started independently** + of the conic solution. The two solvers — sharing no code — agree to ~1e-8 + relative: `demb761 → 22.31086`, `beck751 → 7.50095`, `fang88 → −10.38004`, + `pow3 → 1.0`. (CBLIB ships no reference solution files, so the cross-check + *is* the reference.) +- **Benchmarks-harness integration** — the `pounce_cblib` binary solves a + `.cbf` and emits a `pounce.solve-report/v1` JSON (status / iters / time / + objective, per-iteration trace at `--json-detail full`; input descriptor + kind `cbf-file`). `benchmarks/cblib/run_cblib.py` runs it over the + vendored instances (offline) — or a `--dir` of a local CBLIB checkout — + and projects each report into the composite suite schema at + `cblib/pounce.json`. + +Extensions left for when needed: the large power-cone instances +(`2013_fir*`, ~120 MB — fetch into a `--dir` rather than vendoring), +constraint-side exp/SOC cones in the NLP cross-check form (the conic +mapping already handles them), and the rotated SOC (`QR`) cone kind. + +#### Original plan (kept as the implementation record) + +The literal benchmark instances from the source papers live in CBLIB +(`https://cblib.zib.de/download/all/.cbf.gz`, reachable) and are the +gold-standard broad validation: + +- **Geometric programs** (small, exp cones, pure-continuous): `demb761/762/763`, + `beck751/752/753`, `fang88`, `jha88`, `car`, `rijc786/787`, `mra01/02`. +- **Logistic regression** (pure-continuous exp): `LogExpCR-n{20,100,500}-m{400…2000}`. +- **Power cone**: `2013_fir*`. +- (`batch*`/`rsyn*` are MINLPs — solve the *continuous relaxation* if used.) + +**CBF → pounce conversion** (verified against a full dump of `demb761`): +the `.cbf` has `VAR` (cones over variables) and `CON` (cones over `Ax+b`), +plus sparse `OBJACOORD` (obj `c`), `OBJBCOORD` (obj constant `c₀`), `ACOORD` +(`A`), `BCOORD` (`b`). +- VAR `EXP 3` → variable triple in `K_exp`; **CBF order `(a,b,c)` permutes to + pounce `(c,b,a)`** (CBF `x1 ≥ x2 e^{x3/x2}` vs pounce `z ≥ y e^{x/y}`). + Realize as `s = x_triple ∈ K` via `G = −I`, `h = 0`. +- VAR `POW` → `K_α` (read the exponent); VAR `Q`/`QR` → SOC; `F` → free. +- CON `L=` → equality `Ax = −b`; `L-` → `Ax ≤ −b`; `L+` → `Ax ≥ −b` + (nonneg slack `s = −(Ax+b)`); CON cone blocks (EXP/POW/Q) → cone rows. + +**Validation strategy (no published reference objectives — they 404):** use +the same cross-check as `exp_cone_vs_nlp` — parse each `.cbf` into *both* a +conic program (this driver) and a smooth NLP (`pounce-nlp`, with the exp/pow +epigraph constraints and their analytic Jacobians) and assert the two +independent solvers agree on the objective. Report status / iters / time / +KKT residuals per instance (feeding the JSON solve report into the existing +`benchmarks/` harness). Build the CBF reader as its own carefully-tested unit +first (round-trip on `demb761`) before wiring the harness. + +## Sources (local copies — read and transcribed) + +- **Skajaa, A. & Ye, Y. (2015).** *A homogeneous interior-point algorithm for + nonsymmetric convex conic optimization.* Mathematical Programming Ser. A + **150**(2), 391–422. DOI [10.1007/s10107-014-0773-1](https://doi.org/10.1007/s10107-014-0773-1). + Local copy: `~/Desktop/hsde-2.pdf`. Provides the homogeneous model and the + primal-only Hessian scaling with a separate centering corrector — the `μH` + scaling the prototype used (and the Runge–Kutta corrector DA improve on). +- **Dahl, J. & Andersen, E. D. (2021).** *A primal-dual interior-point + algorithm for nonsymmetric exponential-cone optimization.* Mathematical + Programming Ser. A **194**(1–2), 341–370. DOI + [10.1007/s10107-021-01631-4](https://doi.org/10.1007/s10107-021-01631-4). + Local copy: `~/Desktop/hsde-reference.pdf`. **Source of item #1**: the + Tunçel/BFGS dual-aware primal–dual scaling (this is MOSEK's exp-cone + algorithm), the third-order corrector, and the exp-cone barrier derivatives + (Appendix A) — the `(DA n)` equations cited above. +- Underlying scaling theory: **Tunçel, L. (2001)**, *Generalization of + primal–dual interior-point methods to convex optimization problems in conic + form*, Found. Comput. Math. **1**(3), 229–254; **Myklebust, T. & Tunçel, L. + (2014)**, *Interior-point algorithms for convex optimization based on + primal–dual metrics*, arXiv:1411.2129 — the secant / multiple-secant BFGS + scalings DA build on. diff --git a/dev-notes/lp-qp-routing.md b/dev-notes/lp-qp-routing.md index ab10bda6..c790b466 100644 --- a/dev-notes/lp-qp-routing.md +++ b/dev-notes/lp-qp-routing.md @@ -22,7 +22,7 @@ correct (LP ⊂ convex QP ⊂ NLP) but leaves performance on the table: in `ipopt.opt`. Mirrors Gurobi/CPLEX UX; preserves a single Pyomo `SolverFactory('pounce')` entry. 2. **One `pounce-convex` crate** for the IPM-based convex algorithms - (IPM-LP, IPM-QP, and a future simplex). Resists workspace sprawl; + (IPM-LP, IPM-QP, and the conic extensions). Resists workspace sprawl; related algorithms share warm-start logic, presolve adapters, and the predictor-corrector machinery. 3. **Active-set QP stays in its own `pounce-qp` crate.** A sparse @@ -50,17 +50,17 @@ It does three things: capture `n_nl_cons`, `n_nl_objs`, and the `n_nl_vars_*` triplet currently skipped at `nl_reader.rs:591`. Walks the parsed `Expr` AST (`nl_reader.rs:45-65`) to confirm linearity and detect - quadratic objectives. Produces: + quadratic objectives and constraints. Produces: ```rust - enum ProblemClass { Lp, ConvexQp, NonconvexQp, Nlp } + enum ProblemClass { Lp, ConvexQp, ConvexQcqp, NonconvexQp, Nlp } ``` 2. **Resolves the solver choice** by combining `ProblemClass` with the `solver_selection` option: - `auto` (default): most specialized solver matching the class - `nlp`: always IPM-NLP (current behavior) - - `lp-ipm`, `lp-simplex`, `qp-ipm`, `qp-active-set`: force; error - if the problem doesn't fit (e.g., `simplex` on a problem with a - quadratic objective). + - `lp-ipm`, `qp-ipm`, `qp-active-set`: force; error if the problem + doesn't fit (e.g., `qp-ipm` on a problem with a non-quadratic + objective). 3. **Dispatches.** Each solver implements (or is wrapped behind) the existing `TNLP` trait (`crates/pounce-nlp/src/tnlp.rs:157`); the trait is already algorithm-agnostic and object-safe, so dispatch is @@ -72,7 +72,7 @@ It does three things: ``` crates/ pounce-algorithm/ # existing — IPM-NLP, unchanged - pounce-convex/ # NEW — IPM-LP, IPM-QP, simplex + pounce-convex/ # NEW — IPM-LP, IPM-QP, conic (SOCP/exp/pow/SDP) pounce-qp/ # existing (on active-set-sqp-warm-start branch) # — sparse Schur-complement parametric active-set QP pounce-nlp/ # existing — TNLP trait, unchanged @@ -82,12 +82,12 @@ crates/ pounce-presolve/ # existing — extended with LP-specific reductions ``` -`pounce-convex` exposes per-algorithm entry points for the IPM family -and (eventually) simplex: +`pounce-convex` exposes per-algorithm entry points for the IPM family: ```rust pub fn solve_lp_ipm(tnlp: Rc>, opts: &OptionsList) -> Status; pub fn solve_qp_ipm(tnlp: Rc>, opts: &OptionsList) -> Status; -pub fn solve_simplex(tnlp: Rc>, opts: &OptionsList) -> Status; +// SOCP / exp / pow / SDP reuse solve_qp_ipm's cone-generic scaffolding +// (see src/cones/), selected by the cone types present — not a new fn. ``` `pounce-qp` already exposes its own active-set entry point; dispatch @@ -101,12 +101,21 @@ All IPM solvers reuse `pounce-linsol` for the augmented-system factorization (`SparseSymLinearSolverInterface` — same trait feral and MA57 implement today). Mehrotra predictor-corrector and Gondzio higher-order correctors live inside `pounce-convex` because the same -iteration scaffolding serves both IPM-LP and IPM-QP. Simplex grows its -own LU-with-updates module (eventually a separate `pounce-lu` crate -when justified). `pounce-qp` keeps its own Schur-complement KKT +iteration scaffolding serves both IPM-LP and IPM-QP (and the conic +extensions). `pounce-qp` keeps its own Schur-complement KKT machinery — different from the IPM augmented system — so it does not share the IPM scaffolding. +Unlike the NLP path, the convex entry points exploit the constant-matrix +structure: for an LP/QP the Hessian `P` and constraint matrix `A` (and +`c`, `b`) do *not* depend on `x`, so they are extracted **once** at +setup via a single `eval_h` / `eval_jac_g` call and cached for the rest +of the solve. The `TNLP` contract is built for nonlinear problems and +suggests per-iteration re-evaluation; the convex solver must *not* be a +thin per-iteration `TNLP` driver like the NLP path, or it forfeits the +specialization that justifies it (and the Phase 2 "specialized path +wins" benchmark claim). + ### Active-set vs IPM-QP: why both | Property | IPM-QP (`pounce-convex`) | Active-set (`pounce-qp`) | @@ -120,9 +129,53 @@ share the IPM scaffolding. | Best for | one-shot convex QPs, LPs | QP sequences, SQP inner solver, | | | | MPC, MIP node QPs | -Dispatch picks between them via `solver_selection`; `auto` defaults to -IPM-QP for one-shot convex QPs and routes parametric / warm-startable -calls (when that signal is exposed by the caller) to `pounce-qp`. +Dispatch picks between them via `solver_selection`. Under `auto`, +convex LP/QP always goes to IPM-LP/IPM-QP — **the active-set path is +opt-in**, never auto-selected from the NL path. The reason: an `.nl` +file describes a single instance, and neither the format nor +`solver_selection` carries a "this is one of a parametric sequence, +warm-start it" signal for the classifier to act on. So `pounce-qp` is +reached only (a) explicitly via `solver_selection = qp-active-set`, or +(b) programmatically via the Python/C warm-start API, where the caller +holds state across solves and *is* the warm-start signal. A future +extension could let a caller mark a problem as warm-startable through a +`solver.options` hint, at which point `auto` could route it to +`pounce-qp`; until that hint exists, auto-routing to active-set is not +possible and is not claimed. + +### Relationship to active-set SQP + +Two *orthogonal* solver-selection axes are in play; conflating them +causes confusion: + +1. **`solver_selection`** (this note) — picks a solver by **problem + class**: LP / convex QP / convex QCQP / NLP. This is the dispatch + layer described above. +2. **`algorithm`** — picks the **NLP algorithm strategy**: the + Wächter-Biegler filter-IPM (default) vs. an active-set SQP. Both + solve *general NLP*; they differ in warm-start behavior. Active-set + SQP is a new `AlgorithmStrategy` end-to-end (see the design note + [`research/active-set-sqp-warm-start.md`](research/active-set-sqp-warm-start.md)), + opt-in and parallel to the IPM, leaving the default loop untouched. + +Active-set **SQP** is therefore an *NLP* solver — it sits beside IPM-NLP +at the top of the stack, **not** in the convex LP/QP layer. + +The two notes connect through one crate: **`pounce-qp` does double +duty.** Its sparse parametric active-set QP solver is both + +- the **`qp-active-set` dispatch target** for a standalone convex QP + (this note), and +- the **inner QP subproblem solver** inside the active-set SQP NLP + algorithm (the SQP note). + +Build it once, use it both ways — which is why both notes point at the +same `crates/pounce-qp/` on `claude/active-set-sqp-warm-start-BnjLA`. +Both target the same warm-start sweet spot (MPC, SQP inner solve, B&B +node QPs, parametric homotopy), where IPM warm-starts badly because the +barrier pushes iterates off the active boundary. This is the parallel +track called out in the phasing: it is *not* phase-ordered against +`pounce-convex` and ships on its own schedule. ### What modeling languages see @@ -149,31 +202,67 @@ The NL format header (Gay 2005 §3) lines currently skipped at needed: - Line 2: `n_vars n_cons n_objs ranges eqns` (already parsed) -- Line 4: `n_nl_cons n_nl_objs` — if both zero, problem is at-most - quadratic (could be LP or QP; need AST walk to decide) +- Line 4: `n_nl_cons n_nl_objs` — count of constraints/objectives with + a *nonlinear part*. Zero means purely linear; see the LP/QP caveat + below. - Line 5: `n_nl_net n_lin_net` — network structure (future routing target) - Line 6: `n_nl_vars_in_both n_nl_vars_in_cons n_nl_vars_in_obj` -If `n_nl_cons == 0` and `n_nl_objs == 0` → class is LP or QP. -If furthermore the objective AST contains only linear terms → LP. -If the objective AST has degree-2 `Mul` or `Pow` nodes only → QP -(check positive-semidefiniteness for convex/nonconvex split via the -Hessian-pattern computation already in `pounce-nlp`). +The NL format has no dedicated quadratic section: each row's linear +part lives in the `G`/`J` (gradient/Jacobian) coefficient segments, +while *any* higher-order term — including the quadratic terms of a QP — +is written into the nonlinear expression tree (`O`/`C` segments) as +`Mul`/`Pow` nodes. Consequently a QP objective registers as nonlinear, +so the header alone does **not** distinguish LP from QP: + +- `n_nl_cons == 0` and `n_nl_objs == 0` → class is **LP** (all + structure is in the linear `G`/`J` segments; no AST walk needed). +- Otherwise walk the nonlinear AST of every row (objective *and* + constraints) that carries a nonlinear part. If any nonlinear term is + not a degree-2 polynomial (transcendental, higher-degree `Pow`, etc.) + → **NLP**. If all nonlinear terms are degree-2 polynomials, extract + the Hessians and split on convexity (PSD test via numerical + factorization / attempted Cholesky — *not* the Hessian *pattern* from + `pounce-nlp`): + - quadratic objective, **linear** constraints, objective Hessian PSD + → **ConvexQp** (→ IPM-QP); + - quadratic objective and/or **quadratic** constraints, all convex + (objective Hessian PSD and each ≤-inequality's constraint Hessian + PSD) → **ConvexQcqp** (→ SOCP / conic solver, Phase 4+). A convex + QCQP is SOCP-representable via the epigraph / rotated-second-order- + cone reformulation, so it routes to the same conic IPM as native + SOCP rather than to the dense NLP path; + - any indefinite Hessian (objective or a constraint) → **NonconvexQp** + (falls through to NLP-IPM for a local min). +- **Conservative fallback (correctness guard).** Whenever the walk + cannot *prove* the stronger class — parse failure, an inconclusive / + near-singular PSD test, or a quadratic constraint whose sense is + incompatible with its curvature — fall back to the more general class, + ultimately **NLP**. Misclassifying an indefinite or non-quadratic + problem *into* a convex solver would return a spurious KKT point as if + globally optimal; falling back to NLP is always sound. The PSD test + therefore uses a tolerance, and "inconclusive within tolerance" routes + to NLP, never to the convex path. +- Until Phase 4 (SOCP) lands, **ConvexQcqp** falls through to NLP-IPM; + the distinct class is the dispatch seam the conic solver later + intercepts (same pattern as `NonconvexQp`). + +This mirrors how QP-capable AMPL solvers detect QPs (ASL's `nqpcheck` +walks the nonlinear tree to recover `Q`); the header is a fast reject +for the LP case only. ### Option plumbing Single new option on `OptionsList`: - Key: `solver_selection` -- Values: `auto` (default), `nlp`, `lp-ipm`, `lp-simplex`, `qp-ipm`, - `qp-active-set` +- Values: `auto` (default), `nlp`, `lp-ipm`, `qp-ipm`, `qp-active-set` - Validation: `auto` always works; explicit values error if the loaded problem doesn't match the class (with a message naming the detected class). -- Routing: `lp-ipm` / `qp-ipm` / `lp-simplex` resolve into - `pounce-convex` entry points; `qp-active-set` resolves into the - existing `pounce-qp` crate. +- Routing: `lp-ipm` / `qp-ipm` resolve into `pounce-convex` entry + points; `qp-active-set` resolves into the existing `pounce-qp` crate. Follows the precedent of `linear_solver`, which selects `Ma57`/`Feral` via the `LinearBackendFactory` at @@ -185,19 +274,127 @@ via the `LinearBackendFactory` at object-safe (`crates/pounce-nlp/src/tnlp.rs:157-249`). - `.sol` writer (`crates/pounce-cli/src/nl_writer.rs`) is already problem-type-agnostic; takes `(x, lambda, status)`. No change. -- `pounce-restoration`, `pounce-l1penalty`, `pounce-sensitivity`, - `pounce-mu` stay coupled to IPM-NLP only — convex solvers don't - need most of them. +- `pounce-restoration`, `pounce-l1penalty`, `pounce-sensitivity` stay + coupled to IPM-NLP only — the convex solvers don't use them (no + filter restoration, no penalty reformulation; sensitivity stays + NLP-coupled for now, though it's the natural seam for differentiable + convex layers later). +- A barrier parameter μ is *not* optional, though: every IPM has one. + The convex IPM supplies its own **Mehrotra adaptive σ·μ centering** + (in `pounce-convex`, Phase 3), which is distinct from the NLP + `mu_strategy` (Monotone / Adaptive) in `pounce-mu`. Open question for + Phase 2/3: reuse `pounce-mu`'s strategy abstraction if it fits, or + keep the convex μ logic local to `pounce-convex`. Either way it is a + required component, not a skipped one. - `pyomo-pounce` doesn't change at all; users get LP/QP routing transparently via the CLI dispatch. +### Presolve integration + +Presolve is a 2–10× factor on the Mittelmann/Maros-Mészáros sets, so +*wall-clock* competitiveness with HiGHS/Clarabel depends on it — Phase 3 +delivers an *algorithmically* competitive iteration (low iteration +counts), and Phase 3.5 (presolve) is what turns that into competitive +end-to-end wall-clock. Presolve is *not* optional for that bar, even +though it is not blocking for *correctness*. Two parts: the integration +seam (favorable, mostly inherited) and the reduction work (largely +net-new for LP/QP). + +**Integration seam — inherited for free.** `pounce-presolve` is already +a *composable TNLP wrapper* (TNLP-in → reduced-TNLP-out, with a +postsolve path that reinstates dropped rows and forwards multipliers; +see `crates/pounce-presolve/src/lib.rs` Phases 0–5). Because the convex +solvers also consume `TNLP`, `pounce-convex` sits *behind* +`PresolveTnlp` exactly as the IPM does today — no new plumbing. This is +the part that is genuinely "not blocking." + +**IPM-aware reduction policy — the seam differs from a simplex +presolve.** Gondzio (1997) shows an IPM cares about Cholesky/LDLᵀ +*fill-in*, not a basis: reductions that help simplex (aggressive +variable substitution) can *hurt* an IPM by densifying the factor. +Since `pounce-convex` factors through `pounce-linsol` LDLᵀ, substitution +must be gated on fill growth (Mészáros & Suhl 2003 bound model-size +increase before each elimination). This is a *policy*, not just a +reduction set. + +**Reduction catalog to implement.** Grounded in the literature review +(citations below): + +- *Core LP reductions (Andersen & Andersen 1995):* empty / singleton / + forcing / dominated rows; singleton / duplicate columns; bound + tightening. Most already exist in `pounce-presolve` for the NLP path + and carry over. +- *Modern strengthening (Achterberg et al. 2020):* coefficient + strengthening, dual reductions, parallel/dominated row–column + detection. The modern bar; add incrementally. +- *QP/Hessian-consistent reductions (Gould & Toint 2004) — net-new:* + variable substitution and duplicate-column detection must account for + the Hessian `Q` (elimination fills `Q` with cross-terms), and the + **postsolve must recover the dual consistently with the quadratic + term**. The existing NLP-shaped presolve has no notion of a `P` + block, so this is the genuinely new work for the convex-QP path. + +**Postsolve / restoration stack — the missing architectural piece.** +Every reduction must carry its undo and recover *primal and dual* for +the original problem (Andersen & Andersen 1995; PaPILO's +transaction/reduction-stack design). The current crate does this for +its NLP reductions; LP/QP variable substitution and bound shifts need +their own dual-recovery transforms. + +**Equilibration front-end.** Ruiz (2001) row–column norm balancing +(optionally + Pock–Chambolle), as used by OSQP/Clarabel, conditions the +KKT system before the IPM solve. Adjacent to presolve proper; bundle it +with the dispatch into `pounce-convex`. + +**Build in pure Rust; learn from PaPILO, don't wrap it.** POUNCE's +default build is pure Rust by design (no Fortran/C/C++, no system BLAS — +see README and `docs/src/introduction.md`), so wrapping PaPILO +(header-only C++) is out: it would break the pure-Rust guarantee that +`pounce-feral` exists to uphold. PaPILO (Gleixner, Gottwald & Hoen +2023; INFORMS JOC; arXiv:2206.10709) is still the best *reference +architecture* — its **transaction-based reduction stack** (each +reduction is a transaction with an undo, conflict-checked so reductions +can be applied in parallel) is exactly the postsolve design +`pounce-presolve` needs, and it is Apache-2.0 so studying the source is +unencumbered. The plan is therefore to extend `pounce-presolve` +in-house, porting PaPILO's *ideas* (transaction model, the LP/QP +reduction set) rather than its code. Parallelism uses **rayon** (the +idiomatic Rust data-parallel crate; not yet a workspace dependency) for +the same recursive/data-parallel routines PaPILO parallelizes with +Intel TBB — probing, dominated-column detection, constraint +sparsification — keeping the transaction model as the conflict-avoidance +mechanism. + +**Key references** + +- E. D. Andersen & K. D. Andersen, *Presolving in linear programming*, + Math. Prog. 71:221–245 (1995). — reduction catalog + restoration. +- J. Gondzio, *Presolve analysis of linear programs prior to applying + an interior point method*, INFORMS JOC 9(1):73–91 (1997); Addendum + 13(2):169 (2001). — IPM-specific (fill-in) presolve. +- C. Mészáros & U. Suhl, *Advanced preprocessing techniques for linear + and quadratic programming*, OR Spectrum 25:575–595 (2003). — + fill-/row-growth control during elimination. +- N. Gould & P. Toint, *Preprocessing for quadratic programming*, + Math. Prog. Ser. B 100:95–132 (2004). — QP/Hessian-aware reductions + and dual recovery. +- T. Achterberg, R. Bixby, Z. Gu, E. Rothberg & D. Weninger, *Presolve + Reductions in Mixed Integer Programming*, INFORMS JOC 32(2):473–506 + (2020). — modern taxonomy (Gurobi). +- A. Gleixner, L. Gottwald & A. Hoen, *PaPILO: A Parallel Presolving + Library for Integer and Linear Optimization with Multiprecision + Support*, INFORMS JOC (2023); arXiv:2206.10709. — Apache-2.0 + reference implementation (LP/MIP/QP). +- D. Ruiz, *A scaling algorithm to equilibrate both rows and columns + norms in matrices*, RAL-TR-2001-034 (2001). — equilibration. + ## Implementation phasing Each phase is independently shippable. The headline shift from the original plan is that `pounce-convex` is *the* in-house home for the entire IPM/conic family — LP, QP, SOCP, SDP, exponential cone, power -cone — built incrementally on a single Mehrotra + HSDE scaffolding -sharing `pounce-linsol`. Active-set QP stays in `pounce-qp` on its own +cone — built incrementally on a single Mehrotra scaffolding (with the +HSDE embedding added at the SOCP phase) sharing `pounce-linsol`. Active-set QP stays in `pounce-qp` on its own track. Other algorithm families (ADMM, AL+semismooth Newton, banded/Riccati IPM, simplex) are explicitly *out of scope* — see the "Out of scope and why" section below. @@ -207,25 +404,100 @@ banded/Riccati IPM, simplex) are explicitly *out of scope* — see the `nlp` (auto → nlp for now). Ship to verify no regression. *No new algorithm.* -**Phase 2 — IPM-QP in `pounce-convex`.** Bare IPM-QP (no Mehrotra -yet); route LP and QP problems to it under `auto`. Compare iteration -counts and wall-clock against the existing IPM-NLP path on the +**Phase 2 — IPM-QP in `pounce-convex` (+ Ruiz equilibration).** Bare +IPM-QP (no Mehrotra yet); route LP and QP problems to it under `auto`. +**Build the iteration over the `Cone` abstraction (`src/cones/`) from +the start, with only `nonneg` implemented** — this is what makes +Phases 4–6 cone *extensions* rather than a rewrite; a QP-specific solve +retrofitted for cones later would make the Phase 4 "cheap incremental +win" claim false. Bring in **Ruiz equilibration** here — it is a +conditioning prerequisite for the IPM KKT solve, effectively part of the +solver rather than deferrable presolve (see "Presolve integration"). +Compare +iteration counts and wall-clock against the existing IPM-NLP path on the `quadratic`, `bounded-quadratic`, `eq-quadratic` builtins. This is the minimum that justifies the `pounce-convex` crate. -**Phase 3 — Mehrotra predictor-corrector + HSDE.** Add the -predictor-corrector iteration and homogeneous self-dual embedding for -infeasibility detection and a self-starting iterate. Should reduce -iteration counts ~30-50% on convex QPs. Validate on Mittelmann LP +**Phase 3 — Mehrotra predictor-corrector.** ✅ **Landed.** Add the +predictor-corrector iteration (affine predictor, adaptive centering +σ = (μ_aff/μ)³, second-order corrector, single factorization shared by +both solves). Reduces iteration counts ~30–50% on convex QPs vs the NLP +filter-IPM — verified in `crates/pounce-cli/tests/qp_vs_nlp_iterations.rs` +(≈41% fewer at n=50). + +*The HSDE split.* The original plan bundled the homogeneous self-dual +embedding into this phase for two benefits: (a) infeasibility/ +unboundedness detection and (b) a self-starting iterate. These are now +separated: + +- **(a) Infeasibility/unboundedness detection — landed without HSDE.** + Implemented via *verified Farkas-certificate detection* layered on the + Mehrotra iterate (`detect_infeasibility` in `pounce-convex/src/ipm.rs`): + a primal-infeasibility certificate (`Aᵀy + Gᵀz ≈ 0`, `bᵀy + hᵀz < 0`, + `z ≥ 0`) or an unbounded recession direction (`Pd ≈ 0`, `Ad ≈ 0`, + `Gd ≤ 0`, `cᵀd < 0`), each *checked* against a tolerance so a positive + result is a proof — no false positives, only an `IterationLimit` + fallback when nothing is certifiable. This delivers HSDE's headline + user-facing benefit (clean `Infeasible`/`Unbounded` status, surfaced + to the CLI as AMPL `solve_result_num` 200/300) without rewriting the + iteration. Tests: `pounce-convex/tests/infeasibility.rs`. +- **(b) Self-starting iterate via the embedding — deferred to Phase 4.** + The full homogeneous self-dual embedding is a from-scratch rewrite of + the iteration (adds the τ, κ homogenizing variables and reworks the + KKT system). It is most justified as the **conic-IPM scaffolding** + Clarabel/ECOS are built on, so it lands with SOCP (Phase 4), where it + generalizes to cones — rather than rewriting the working QP iteration + now for a benefit the certificate approach already largely provides. + When built, it must be the **quadratic-objective HSDE variant** (as in + Clarabel; Goulart & Chen) that carries the `P` term inside the + embedding — *not* the textbook LP/conic HSDE, which assumes a linear + objective. Validate on Mittelmann LP subset and Maros-Mészáros QP set. After this phase `pounce-convex` is algorithmically competitive with Clarabel and HiGHS for the LP/QP -problem class. - -**Phase 4 — SOCP via second-order cone.** Add the second-order cone as -a constraint type. Nesterov-Todd scaling on the SOC block; rotated-SOC -as a derived form. Validate on Mittelmann SOCP set. This is a cheap -incremental win once Mehrotra is in place — the symmetric-cone IPM -machinery extends from LP/QP unchanged. +problem class. This is *algorithmic* competitiveness (iteration count +and convergence); *wall-clock* competitiveness on the full benchmark +sets additionally needs presolve (Phase 3.5). + +**Phase 3.5 — Presolve (reduction catalog + postsolve stack).** Now +that the iteration is algorithmically competitive, presolve is the +multiplier that closes the benchmark gap to HiGHS/Clarabel (a 2–10× +factor on the standard sets). Land the LP/QP reduction catalog, the +IPM-aware reduction policy, and the pure-Rust transaction-based +postsolve stack (PaPILO ideas, rayon for parallelism — not a wrap), per +the "Presolve integration" section. Sequenced *after* Phase 3 on +purpose: debugging the postsolve dual-recovery against a solver you +already trust avoids chasing two unknowns at once. Benchmark-driven — +add the reductions that actually move the Mittelmann / Maros-Mészáros +numbers. Equilibration (Phase 2) is the prerequisite already in place; +this phase adds the size-reducing transformations on top. + +*Status (implemented in `pounce-convex/src/presolve.rs`).* The +transaction-stack architecture with reversible primal+dual postsolve is +in place, plus an explicit variable-bound form (`lb`/`ub` on +`QpProblem`, bound duals `z_lb`/`z_ub`) and these reductions: empty +rows/columns, fixed-variable (singleton equality), free / linear-only +columns, free column singleton substitution, duplicate-row removal +(rayon-parallel hashing), and activity-bound redundancy + infeasibility +detection. Presolve is wired into the CLI dispatch, so `.nl` LP/QP +inputs run through it end-to-end. Each reduction has round-trip / KKT +tests and an example. Deferred (harder dual postsolve — an active +reduced bound's multiplier must be re-attributed to its source row): +bound *tightening*, forcing constraints, dominated columns; and the +MIP-leaning coefficient strengthening / probing. Benchmark-scale tuning +against the Mittelmann / Maros-Mészáros sets remains. + +**Phase 4 — SOCP via second-order cone (+ HSDE embedding).** Add the +second-order cone as a constraint type. Nesterov-Todd scaling on the SOC +block; rotated-SOC as a derived form. Validate on Mittelmann SOCP set. +This is a cheap incremental win once Mehrotra is in place — the +symmetric-cone IPM machinery extends from LP/QP unchanged. **This is +also where the homogeneous self-dual embedding lands** (deferred from +Phase 3): the embedding is the standard conic-IPM scaffolding +(Clarabel/ECOS) and generalizes cleanly to cones, so building it here — +rather than retrofitting the QP iteration — gives the self-starting +iterate and intrinsic infeasibility handling for the whole conic family +at once. (Phase 3 already provides verified-certificate infeasibility +detection for LP/QP, so this is an upgrade, not a prerequisite.) **Phase 5 — Exponential and power cones (non-symmetric).** Add the three-dimensional exponential cone, three-dimensional power cone, and @@ -258,16 +530,19 @@ and ships when its own phases 5a–d are complete. | Phase | Effort | Cumulative | |------|--------|-----------| | 1 — Dispatch | 2–4 weeks | 1 month | -| 2 — Bare IPM-QP | 3–6 months | 4–7 months | -| 3 — Mehrotra + HSDE | 2–3 months | 6–10 months | -| 4 — SOCP | 1–2 months | 7–12 months | -| 5 — Exp/power cones | 2–4 months | 9–16 months | -| 6 — SDP + chordal | 6+ months | 15+ months (optional) | +| 2 — Bare IPM-QP (+ equilibration) | 3–6 months | 4–7 months | +| 3 — Mehrotra (+ cert. infeasibility) | 2–3 months | 6–10 months | +| 3.5 — Presolve | 2–4 months | 8–14 months | +| 4 — SOCP (+ HSDE embedding) | 1–2 months | 9–16 months | +| 5 — Exp/power cones | 2–4 months | 11–20 months | +| 6 — SDP + chordal | 6+ months | 17+ months (optional) | Phases 1–3 are the minimum to justify the dispatch architecture and -deliver a credible LP/QP solver. Phases 4–5 are the natural extension -that closes most of the convex-conic-IPM gap to Clarabel. Phase 6 is -gated on demand. +deliver a *correct* LP/QP solver; Phase 3.5 (presolve) is what makes it +*benchmark-competitive* with HiGHS/Clarabel — required for that bar, +though not for correctness. Phases 4–5 are the natural extension that +closes most of the convex-conic-IPM gap to Clarabel. Phase 6 is gated +on demand. ## Out of scope and why @@ -332,20 +607,47 @@ sensitivity analysis on degenerate LPs). It needs LU-with-updates, which is a substantial engineering effort separate from the LDLᵀ-based IPM/conic scaffolding. -*Escape hatch:* IPM-LP from Phase 2/3 covers the medium-to-large LP -case and benchmarks competitively with HiGHS-IPM on the Mittelmann -sets. For small LPs and warm-start LP sequences, defer simplex until -a specific application forces it; alternative is to wrap HiGHS as a -backend. +*Escape hatch:* IPM-LP from Phases 2/3 plus presolve (Phase 3.5) covers +the medium-to-large LP case and benchmarks competitively with HiGHS-IPM +on the Mittelmann sets. For small LPs and warm-start LP sequences, defer +simplex until a specific application forces it; alternative is to wrap +HiGHS as a backend. ### Nonconvex QP / global optimization -Inherently combinatorial (branch-and-bound + SDP relaxation). Out of -scope for the entire POUNCE direction — neither the NLP-IPM nor the -convex-IPM addresses global optimization. - -*Escape hatch:* none. Use BARON / Gurobi-nonconvex for problems with -indefinite Hessians where local minima are insufficient. +Inherently combinatorial (spatial branch-and-bound + convex +relaxation). Out of scope *for now* — neither the NLP-IPM nor the +convex-IPM finds global optima today, and the B&B shell is substantial +new engineering. But it is deliberately left *reachable*: the +lower-bounding subproblem at each B&B node is itself a convex +relaxation (Shor/SDP, RLT/LP, or convex-QP), which is precisely the +conic family this note already plans to build. So the per-node solver +is free; only the B&B shell is new. + +Architectural choices that keep global QP in scope for later, without +redesign: + +1. **`NonconvexQp` stays a first-class `ProblemClass`**, never folded + into `Nlp`. It falls through to NLP-IPM (local min) today, but the + distinct class is the dispatch seam a future `qp-global` target + intercepts. +2. **Reserve option space** — a future `solver_selection = qp-global` + value, or (cleaner) an orthogonal `require_global` flag, so the + dispatch `match` grows by one arm rather than being reworked. +3. **Branching-rule-agnostic B&B shell.** The future `pounce-mip` B&B + shell (see "Mixed-integer" in the outlook) should parameterize the + branching rule and relaxation builder so that *spatial* branching + (continuous vars, for global QP) and *integer* branching (MIP) share + one tree / incumbent / pruning / node-queue core. +4. **Preserve the classifier's Hessian factorization.** The PSD test in + the classifier already computes the eigenstructure of `P`; a global + solver reuses it for the DC split (`P = P⁺ − P⁻`) and relaxation + construction. Expose it rather than recomputing. +5. **Factor-reuse / warm-start across nodes** (outlook items 1–2) is + what makes any B&B tractable — the same argument as MIP. + +*Escape hatch (until then):* use BARON / Gurobi-nonconvex for problems +with indefinite Hessians where local minima are insufficient. ### Decision principle @@ -378,15 +680,19 @@ both are weak (ADMM, AL), wrap or defer. When only one is strong - `crates/pounce-algorithm/src/options.rs` (or equivalent) — register `solver_selection` - `Cargo.toml` (workspace) — add `pounce-convex` as a member -- `crates/pounce-presolve/` — LP-specific reductions over time - (singleton rows/cols, dual-bound tightening); not blocking +- `crates/pounce-presolve/` — LP/QP reductions, IPM-aware reduction + policy, and a pure-Rust transaction-based postsolve stack (PaPILO + ideas, rayon for parallelism — not a wrap); see the "Presolve + integration" section for the scoped catalog and references. Not + blocking for correctness, but required for the Phase 3 benchmark bar. ### Add - `crates/pounce-cli/src/dispatch.rs` — `classify_problem(&NlProblem) -> ProblemClass` plus the `match`-based router - `crates/pounce-convex/` — new crate scaffolded with `solve_lp_ipm` - and `solve_qp_ipm` entry points; `src/ipm.rs` (the shared Mehrotra + - HSDE scaffolding) plus `src/cones/` (per-cone barrier, gradient, + and `solve_qp_ipm` entry points; `src/ipm.rs` (the shared Mehrotra + scaffolding; HSDE embedding added at the SOCP phase) plus `src/cones/` + (per-cone barrier, gradient, Hessian, scaling-update — one module per cone: `nonneg.rs`, `soc.rs`, `psd.rs`, `exp.rs`, `pow.rs`, `gpow.rs`). The first implementation target is `cones/nonneg.rs` (covers LP) plus the IPM scaffolding; QP @@ -400,12 +706,26 @@ both are weak (ADMM, AL), wrap or defer. When only one is strong ## Verification +The functional-correctness checks below cover *what* each phase must +prove. The performance-engineering methodology that backs the +"specialized path wins" claims — vectorization (SIMD), parallelism, the +reproducibility-vs-performance decision, and the CI performance/numerical +gates — lives in the companion note +[`performance-engineering.md`](performance-engineering.md). + Phase 1 (routing scaffolding, no behavior change): - `cargo test -p pounce-cli` covers new dispatcher with unit tests on `classify_problem`: feed it parsed `NlProblem` structs for known - LP / convex QP / nonconvex QP / NLP cases (builtins + Mittelmann - fixtures already on disk) and assert the right `ProblemClass`. + LP / convex QP / convex QCQP / nonconvex QP / NLP cases, plus boundary + cases that must fall back to NLP (inconclusive PSD test, parse + failure), and assert the right `ProblemClass`. These use **small + committed `.nl` fixtures** (one per class) so the unit tests are + hermetic — they must run in CI and a fresh clone, not depend on the + gitignored Mittelmann/CUTEst caches that only exist after a local + `make fetch`/`make translate`. The full benchmark sets stay for the + wall-clock validation in Phases 2–3.5, where relying on the local + cache is fine. - `make benchmark-mittelmann` produces identical results to current behavior — `auto` routes everything to NLP-IPM until `pounce-convex` lands. @@ -422,6 +742,48 @@ Phase 2 (LP/QP actually dispatched): paths for any individual benchmark — `compare_runs` was built for exactly this kind of side-by-side analysis. +Phase 3 (Mehrotra + certificate infeasibility): ✅ landed + +- Iteration-count regression: assert the predictor-corrector cuts + iterations vs the bare Phase-2 IPM — done in + `pounce-cli/tests/qp_vs_nlp_iterations.rs` (QP path uses fewer + interior-point iterations than the NLP path; ≈41% at n=50). Extending + this to the full Mittelmann LP / Maros-Mészáros sets is the remaining + benchmark-scale check. +- Infeasibility / unboundedness: known-infeasible and known-unbounded + LP/QP fixtures assert the correct status instead of stalling — done in + `pounce-convex/tests/infeasibility.rs` (verified Farkas / recession + certificates) and end-to-end in + `pounce-cli/tests/qp_dispatch_end_to_end.rs`. + +Phase 3.5 (presolve) — the highest correctness risk is postsolve dual +recovery, so it gets the most coverage: + +- Round-trip primal *and* dual: for each Mittelmann / Maros-Mészáros + instance, solve with presolve on and off and assert the recovered + `x` *and* the duals (`λ`, bound multipliers) match to 1e-6 after + postsolve. Primal-only matching hides the most common postsolve bug. +- Per-reduction unit tests: each reduction (singleton / doubleton / + forcing / dominated row; singleton / duplicate column; bound + tightening) gets a fixture where postsolve must reconstruct the + eliminated primal *and* dual entries exactly. +- Detection: presolve-only infeasibility / unboundedness fixtures + (e.g. contradictory singleton bounds) assert the correct status + without invoking the IPM at all. +- QP-specific: a fixture where a variable substitution fills the + Hessian, asserting `P` is transformed consistently and the dual is + recovered with the quadratic term (the net-new Gould–Toint path). + +Phases 4–6 (conic): + +- Objective-value cross-check against Clarabel / MOSEK on the matching + cone benchmark set (SOCP / GP-entropy / SDP) to 1e-6. +- Regression guard: adding a cone must not change LP/QP results — re-run + the Phase-2/3 suite and assert stable iteration counts on the pure + LP/QP instances. Convex-QCQP fixtures route to the SOCP path and are + cross-checked against the NLP-IPM local solution (same optimum, since + the QCQP is convex). + Python / C APIs: - `pyomo-pounce` smoke test in CI passes unchanged (proves no @@ -484,7 +846,7 @@ land. Listed roughly in the order POUNCE should adopt them. Heinkenschloss optimal-control benchmarks; relevant for the NLP path, not for LP/QP routing. -### What "competitive" means in 2025 +### What "competitive" means Reading Mittelmann's site sets expectations: @@ -600,7 +962,7 @@ crates/ pounce-hsl/ # MA57 backend ┌─ consumers ─────────────────────────────────────┐ pounce-algorithm/ # IPM-NLP (today) - pounce-convex/ # IPM-LP/QP, simplex (planned) + pounce-convex/ # IPM-LP/QP + conic (planned) pounce-qp/ # active-set QP (in flight) pounce-socp/ # SOCP / conic IPM (future) pounce-mcp/ # complementarity (future) diff --git a/dev-notes/multi-solver-tech-debt.md b/dev-notes/multi-solver-tech-debt.md new file mode 100644 index 00000000..e80452df --- /dev/null +++ b/dev-notes/multi-solver-tech-debt.md @@ -0,0 +1,143 @@ +# Multi-solver maintenance: technical-debt audit + +_Written when reconciling PR #70, which took POUNCE from one solver to three._ + +## Why this note exists + +Until the 0.4.0 line, POUNCE was effectively **one solver**: the Ipopt-derived +filter-line-search interior-point method for general NLPs (`pounce-algorithm`). +PR #70 adds two more solver families: + +- **`pounce-convex`** — a convex/conic interior-point solver (LP, convex QP, + SOCP, PSD, exp/power cones) over a homogeneous self-dual embedding (HSDE), + with SOS polynomial optimization layered on the PSD cone. +- **`pounce-global`** — a spatial branch-and-bound global optimizer for + factorable nonconvex NLPs. + +Going from one solver to three is a capability win, but it permanently changes +the maintenance surface: several things that used to have exactly one +implementation now have N, and a few abstractions were introduced to span them. +This note records the debt so it stays visible and is paid down deliberately +rather than discovered painfully. + +## What is NOT debt (so we don't "fix" the wrong thing) + +- **The two interior-point implementations are not duplicated linear algebra.** + Both `pounce-algorithm` (NLP filter-IPM) and `pounce-convex` (conic HSDE-IPM) + depend on `pounce-linsol` + `pounce-linalg` and share that sparse-symmetric + factorization/KKT substrate. Only the *outer loops* differ — filter line + search vs. HSDE — which is correct: they are genuinely different algorithms, + not two copies of one. Merging them would be the mistake. +- **Separate typed entry points per solver are partly intrinsic.** A cone + program is *data* (matrices + cone list); a certified global optimum needs a + *symbolic* objective to relax. Neither fits `minimize(fun, x0, …)`. Some API + divergence is the nature of the problem, not sloppiness. The debt is the + *absence of a router on top* (see area 2), not the existence of typed entries. + +## The four debt areas + +### 1. Debugger trait fan-out + +**State.** The interactive debugger was generalized over a `DebugState` trait +(`crates/pounce-common/src/debug.rs`) so one REPL (`debug_repl.rs`) drives all +iteration-loop solvers via `&mut dyn DebugState`. A *second*, parallel hierarchy +— `TreeDebugState` / `TreeDebugHook` (`crates/pounce-cli/src/tree_debug.rs`) — +exists for the branch-and-bound tree, bridged to the IPM REPL by a shared +command queue for `into` step-into. NLP-only commands (rank, sweep, resolve) +reach the concrete `DebugCtx` through `as_nlp()` / `as_nlp_mut()` downcasts. + +**Debt.** +- Every new debugger command must decide its behavior on **all three** backends, + or silently degrade on the ones it doesn't handle. Downcast-and-branch + (`as_nlp`) is the smell: it compiles even when a command is a no-op on conic / + tree states, so coverage gaps are invisible. +- Two trait hierarchies (`DebugState` + `TreeDebugState`) plus a bridge is real + surface area; a fourth solver would likely add a third. +- The `--debug-json` **metric vocabulary** is a cross-cutting contract + (`iter, mu, objective, inf_pr, inf_du, nlp_error, complementarity`) consumed by + the MCP proxy and its tests. It already needed a consistency pass once + (`727d088`). Each backend maps its native quantities onto this NLP-centric set + (e.g. convex reports `nlp_error = max(pinf, dinf, μ)`; the name no longer means + "NLP error"). More backends → more semantic stretching of fixed field names. + +**Recommendation.** +- Maintain a **capability matrix** (command × backend) in `docs/src/debugger.md`, + and make "unsupported on this backend" an explicit, uniform REPL/JSON response + rather than a silent no-op. +- Keep a **single source of truth** for the JSON metric set and assert in a test + that every `DebugState` impl populates (or explicitly NaNs) each field, so a + new backend can't quietly drift the protocol. +- Re-evaluate whether `TreeDebugState` can fold into `DebugState` (or a shared + supertrait) once a second tree-like solver is on the horizon. + +### 2. Python routing facade (designed, not built) + +**State.** `dev-notes/lp-qp-routing.md` (this PR's headline design doc) specifies +a `ProblemClass`-driven router, and `crates/pounce-cli/src/dispatch.rs` already +classifies and routes on the **CLI** (`solver_selection=auto`). But the **Python** +surface exposes parallel, hand-picked entry points: `minimize` (NLP), +`solve_qp`, `solve_socp`, `sos_minimize`, `minimize_global` — with no unifying +dispatch. + +**Debt.** +- Users must know solver theory to pick the right entry point; the CLI can + auto-route from a parsed `.nl`, but Python callers get no equivalent. +- Two divergent dispatch stories (CLI classifier vs. Python explicit) will drift + in behavior and documentation. +- `minimize` deliberately *cannot* route (it only sees an opaque callable) — so a + Python router can't just live behind `minimize`; it needs structured input. + That design question is unresolved and compounds with each new solver. + +**Recommendation.** Decide explicitly: either (a) build a Python router that +takes structured problems and dispatches by `ProblemClass` (mirroring +`dispatch.rs`), or (b) commit to explicit entry points and document the choice +prominently (a "Choosing a Solver" page already exists — make it the front door). +Track the routing facade as the designed-but-unbuilt piece it is, so it isn't +mistaken for shipped. + +### 3. Release / publish surface + +**State.** The workspace grew **16 → 18 published crates** across **three** +registries (PyPI `pounce-solver`, PyPI `pyomo-pounce`, crates.io). Per +`CLAUDE.md`, the crates.io publish has historically been manual and "easy to +forget." This PR adds `pounce-convex` and `pounce-global` to the topological +publish order (`publish-crates.sh`, `dev-notes/cargo-release.md`); both are **new +crate names**, so they hit the crates.io new-crate rate limit on first publish. +(Note: main recently added `.github/workflows/release-crates.yml`, which begins +automating the crates.io publish on `v*` tags — partially mitigating the manual +step.) + +**Debt.** +- More crates = more topological-order maintenance and more first-publish + rate-limit exposure on each new-crate release. +- Three registries must reach the same `X.Y.Z`; a long-lived feature branch + (like this one) silently accrues version skew against a fast-moving release + line — exactly the conflict this reconciliation had to clean up. + +**Recommendation.** Finish automating the crates.io publish via the new +`release-crates.yml` so the manual step disappears; keep the publish list and the +layered dependency order in `cargo-release.md` as the single source the script +derives from; consider a CI check that the three registries' target versions +agree before tagging. + +### 4. Docs / CHANGELOG drift + +**State.** The PR's major features (convex/conic, SOS, global) were **absent from +its own CHANGELOG** until this reconciliation backfilled them. The book +(`docs/src/SUMMARY.md`) and the solver-landscape material must now present three +solvers coherently rather than one. + +**Debt.** With multiple solvers shipping independently, "the feature exists but +isn't documented anywhere a user looks" becomes the default failure mode, and the +gap compounds across releases. + +**Recommendation.** Adopt a lightweight "**one feature → CHANGELOG entry + book +section**" definition-of-done, and name an owner for the cross-solver +landscape/choosing-a-solver docs so they're updated as a unit when a solver +lands or changes class coverage. + +## Suggested follow-ups + +Each area should become a tracked issue linking back to this note. None blocks +the PR #70 merge — they are the deliberate paydown plan for the maintenance cost +of becoming a multi-solver project. diff --git a/dev-notes/performance-engineering.md b/dev-notes/performance-engineering.md new file mode 100644 index 00000000..4b214add --- /dev/null +++ b/dev-notes/performance-engineering.md @@ -0,0 +1,257 @@ +# Performance engineering — design note + +**Status: design only.** No code changes yet. This note is deliberately +*cross-cutting*: it applies to `pounce-feral`, the existing IPM-NLP, the +planned `pounce-convex` LP/QP/conic solvers, and every future +`pounce-linsol` consumer. It exists because +[`lp-qp-routing.md`](lp-qp-routing.md) specifies performance *targets* +(competitive with HiGHS/Clarabel) and *functional correctness* +(objective/primal/dual to 1e-6) but not the engineering methodology for +*achieving and maintaining* high performance — vectorization, +parallelism, profiling — nor any performance *gate* in CI. Today +`.github/workflows/ci.yml` gates `fmt`, `clippy -D correctness -D +suspicious`, `build`, `test`, and wheel smoke, but **no performance +regression can fail the build**, and there is no SIMD/parallel strategy +written down. This note fills both gaps. + +## 1. The reproducibility-vs-performance fork — decide this first + +Everything downstream depends on it. + +**Current stance.** `crates/pounce-linalg/src/blas1.rs` deliberately +uses plain scalar loops with *no SIMD intrinsics and no `mul_add`*, to +stay **bit-equivalent with the netlib reference Fortran BLAS** that +upstream Ipopt builds against. This is a real asset for the **NLP port**: +bit-equivalence lets us validate `pounce-algorithm` against Ipopt +iteration-for-iteration. + +**Why it does not bind `pounce-convex`.** The convex LP/QP/conic solver +is *greenfield* — there is no upstream Ipopt convex solver to match +bit-for-bit. So the bit-equivalence constraint that justifies scalar +BLAS in the NLP path has no analogue here; the convex solver is free to +vectorize, *if* we decide what level of determinism we actually require. + +**Three determinism tiers** (pick a target per crate, not globally): + +1. **Bit-identical to upstream Ipopt** — scalar reference BLAS, no FMA. + *Keep for `pounce-algorithm` / `pounce-linalg` only*, where it is a + validation asset. Do **not** impose it on the convex solver. +2. **Run-to-run reproducible (cross-platform aspirational)** — a fixed + binary on fixed inputs gives bit-identical output every run: deterministic + reduction order, FMA used consistently (not conditionally), + deterministic parallel reductions (fixed chunking). Allows SIMD. Does + *not* promise equality with reference BLAS. Two sub-levels: + - **2a — same machine, run-to-run identical.** Cheap: mainly "use + fixed chunk sizes, don't let parallel reductions split adaptively." + - **2b — cross-platform / cross-SIMD-width identical.** Harder: + different lane widths (AVX2 4-wide vs AVX-512 8-wide vs NEON) force + different reduction trees, so 2b needs a canonical accumulation + scheme independent of hardware width, at some cost to speed. +3. **Best-effort fast** — SIMD + FMA + nondeterministic parallel + reductions; results vary in the last few ULPs run-to-run. Gated only + by the solution-tolerance check (§5). + +**Decision.** `pounce-convex` and feral's performance-critical paths +target **tier 2**: it unlocks SIMD/FMA/parallelism while keeping +debugging and CI sane (a failing solve reproduces). Specifically, **2a +(same-machine run-to-run identity) is the firm requirement** — enforced +by the reproducibility test in §5 — and **2b (cross-platform identity) +is aspirational**, pursued where it's cheap but not allowed to block +performance. **Tier 1** stays in `pounce-algorithm`/`pounce-linalg` for +the Ipopt-validation story. **Tier 3** is allowed only behind an opt-in +feature for users who want maximum throughput and accept ULP-level +nondeterminism. In all tiers, **correctness is gated on the solution +tolerance (§5), never on bit-identity** — an optimizer's answer is +"correct" if it satisfies the KKT/feasibility tolerances, regardless of +last-bit differences. + +A Rust-specific point makes tier 2 cheaper than it would be in C/Fortran: +**Rust does not auto-contract to FMA** (no `-ffp-contract=fast` +equivalent on by default). FMA happens only where code explicitly calls +`.mul_add()`, so FMA-determinism is controlled directly rather than +fought out of the optimizer. + +**How to hold tier 2 in practice.** The requirement reduces to a small +set of rules on every reduction (dot products, norms, matrix-vector, +the KKT residual sums): + +- **Fixed reduction order, independent of runtime.** Pick a lane count + and chunk size as *compile-time constants*, not from + `is_x86_feature_detected!` width or the current thread count. A sum is + always `k` partial accumulators combined in a fixed tree, padding the + tail deterministically. This is what makes the result independent of + scheduling and load (2a); making `k` independent of the hardware SIMD + width is the extra step for 2b. +- **No adaptive parallel splits in reductions.** Use rayon with an + explicit fixed `chunk_size` (e.g. `par_chunks(N)` then a deterministic + serial combine), never `fold`/`reduce` whose split points depend on + work-stealing. Map-only parallelism (independent per-cone updates, + assembly) needs no special care — only the *combine* must be fixed. +- **FMA is all-or-nothing per kernel.** Decide once whether a kernel + uses `.mul_add()` and never branch on it; a kernel that uses FMA on + one path and `a*b + c` on another is not reproducible. Since Rust + never contracts implicitly, "never call `.mul_add()`" is itself a + valid, simple tier-2 policy if a kernel doesn't need the extra + accuracy. +- **Single accumulation scheme across the SIMD/scalar tail.** The + vectorized body and the scalar remainder must accumulate into the same + tree (e.g. reduce the SIMD lanes into the running scalar accumulators + in a fixed order), so an input whose length isn't a multiple of the + lane count still reproduces. +- **Don't depend on `-ffast-math`-style flags.** Keep the default + codegen; never enable fast-math/reassociation, which would let LLVM + reorder sums behind our back and silently break 2a. + +These rules cost little — they mostly constrain *how* a kernel is +written, not whether it vectorizes — and the §5 reproducibility test is +what catches a violation. + +## 2. Vectorization (SIMD) + +**Landscape (2025).** + +- `std::simd` (portable SIMD) — fastest portable abstraction, but + **nightly-only**, pins the toolchain. Off the table while POUNCE + targets stable. +- `wide` — stable, near-drop-in, slightly slower, but **build-time + feature detection only** (no runtime CPU dispatch / `multiversion`). +- **`pulp`** — stable, portable SIMD *with runtime CPU dispatch*; this + is what **faer** uses. Best fit for POUNCE's pure-Rust + stable + constraints. +- `multiversion` — runtime CPU dispatch around autovectorized scalar + code; good where hand-vectorization isn't worth it. + +**Recommendation.** Use **`pulp`** for hand-vectorized hot kernels +(stable, runtime dispatch, proven in faer), and `multiversion` + +autovectorization for the simpler loops. This keeps a single binary that +dispatches AVX2/AVX-512/NEON at runtime — important for distribution +(one wheel, many CPUs) and consistent with the pure-Rust guarantee. + +**Hot kernels to target** (profile first, §4): + +- augmented-system / KKT assembly and the diagonal barrier updates; +- cone scaling updates (Nesterov–Todd scaling on SOC/PSD blocks); +- the large vector ops in the IPM step (`axpy`/`dot`/`nrm2` over the + full variable vector) — but in `pounce-convex`'s own tier-2 copies, + not by SIMD-izing the tier-1 `pounce-linalg` reference BLAS. + +**faer as reference (and possible backend).** [faer](https://github.com/sarah-quinones/faer-rs) +is pure Rust, explicitly SIMD-optimized (x86-64 + Aarch64 NEON via +pulp), rayon-parallel, with sparse LLT/LDLT/Bunch-Kaufman. Because it is +*pure Rust*, it does not violate the no-C/C++ constraint that rules out +wrapping PaPILO — so faer is both the architectural reference for feral's +vectorization *and* a credible alternative backend behind +`SparseSymLinearSolverInterface` if feral's own kernels lag. Worth an +explicit build-vs-adopt evaluation for the factorization (§3). + +## 3. Parallelization + +**The factorization is the bottleneck — address it first.** In an IPM, +the per-iteration sparse symmetric factorization dominates wall-clock at +scale. Parallelism elsewhere is secondary. Options: + +- make feral's LDLᵀ supernodal/multifrontal with task parallelism, or +- evaluate faer's sparse Cholesky/LDLT (pure Rust, rayon-parallel) as a + `pounce-linsol` backend. + +Either way, this is the highest-leverage parallel work and is *not* +LP/QP-specific — it benefits the NLP path equally. + +**rayon elsewhere** (the idiomatic Rust data-parallel crate; not yet a +workspace dependency): + +- presolve routines (already planned in the routing note: probing, + dominated-column detection, constraint sparsification); +- independent per-cone work (barrier / gradient / Hessian / scaling + updates across cone blocks are embarrassingly parallel); +- matrix assembly and multi-RHS back-solves. + +**Per-call parallelism control (faer-style).** Expose parallelism as a +per-solve option, not a global that grabs every core. This matters for +(a) embedded/MPC where the caller controls the thread budget, and +(b) future B&B over `pounce-convex`, where the *outer* search is already +parallel and nested rayon pools must not oversubscribe. + +## 4. Profiling & tooling + +- **Sampling profiles:** `samply` or `cargo flamegraph` for "where does + wall-clock go" on real benchmark instances. +- **Deterministic counts:** `iai-callgrind` (Cachegrind/Callgrind) for + instruction/cache-miss counts that are stable in noisy CI (§6). +- **Discipline in hot loops:** no allocation (reuse scratch buffers + across IPM iterations — the matrices are constant for LP/QP, per the + routing note's "constant P/A extraction" point), cache-friendly + CSC/CSR layouts, `#[inline]` on the small kernels. + +## 5. Correctness checks (the invariant every perf change must preserve) + +- **Solution-tolerance gate.** Across the benchmark suites + (Mittelmann LP, Maros-Mészáros QP), every problem must still solve to + the agreed tolerance (objective + primal + dual to 1e-6). This is the + invariant a vectorization/parallelization change is allowed to touch + *nothing* in — it is the definition of "still correct." +- **Cross-solver oracle.** Objective values cross-checked against + Clarabel/HiGHS (LP/QP) and Ipopt (NLP), as the routing note's + verification section already specifies. +- **Reproducibility test (tier 2a).** Same binary + same input ⇒ + bit-identical output, asserted in CI; catches an accidental + nondeterministic reduction sneaking into a tier-2 path. (2b + cross-platform identity is aspirational and not asserted.) +- **`clippy -D correctness`** stays as the existing static gate. + +## 6. Gate checking (CI) — currently absent + +`ci.yml` has no performance gate; a regression ships silently today. +Propose a **two-tier** scheme: + +- **PR gate — instruction counts (deterministic).** Hot-kernel + microbenchmarks under **`iai-callgrind`**, which counts instructions + via Cachegrind and is *stable inside GitHub Actions VMs*. Wall-clock + criterion benchmarks are too noisy to gate a PR on a cloud runner — + use iai-callgrind for the pass/fail gate, with a small tolerance band + to absorb codegen jitter. +- **Nightly / pre-release gate — wall-clock SGM.** Run the full + Mittelmann/Maros-Mészáros suites and track the **shifted geometric + mean (SGM)** of solve time across versions; fail if SGM regresses past + a threshold. The `benchmarks/mittelmann/` harness already produces + per-version reports; add the SGM computation and a regression + threshold on top of it. `critcmp` / a continuous-benchmarking service + can track the baseline. +- **Numerical-tolerance gate** (§5) runs in the *same* job as the + wall-clock suite, so a "faster" change that breaks the 1e-6 tolerance + fails even if it improves SGM. + +`benchmarks/large_scale/` already contains a `sparse_qp` problem, a +ready hook for convex-QP perf benchmarking once `pounce-convex` lands. + +## 7. Mapping onto the LP/QP phases + +- **Phase 2** (bare IPM-QP + equilibration): stand up the tier-2 + determinism decision and the iai-callgrind PR gate on the first hot + kernels; reuse-vs-vectorize feral here. +- **Phase 3** (Mehrotra + certificate infeasibility): vectorize the cone + scaling/step kernels with pulp; add the wall-clock SGM nightly gate. + (The HSDE embedding moved to Phase 4 — see the routing note.) +- **Phase 3.5** (presolve): rayon parallelism per the routing note. +- **Phases 4–6** (conic): per-cone parallelism; the cone kernels are the + new hot paths each phase adds. +- **Factorization parallelism / faer evaluation** is cross-cutting and + can land independently — it speeds up the NLP path too. + +## References + +- S. El Kazdadi et al., *faer: A linear algebra library for the Rust + programming language*, JOSS (2024). + — pure-Rust SIMD (pulp) + + rayon, sparse LLT/LDLT; reference and possible backend. +- S. Davidoff, *The state of SIMD in Rust in 2025*. + + — std::simd vs wide vs pulp/macerator vs multiversion. +- `pulp`, `std::simd`, `wide`, `multiversion` crate docs. +- `iai-callgrind` (formerly iai) — deterministic instruction-count + benchmarking for CI. +- `criterion` + `critcmp` — wall-clock benchmarking and cross-run + comparison. +- J. Demmel & H. D. Nguyen, *ReproBLAS / reproducible summation* — on FP + non-associativity, FMA, and reproducible reductions (the basis for the + tier-2 determinism argument). diff --git a/dev-notes/pr70-hardening.md b/dev-notes/pr70-hardening.md new file mode 100644 index 00000000..cf151504 --- /dev/null +++ b/dev-notes/pr70-hardening.md @@ -0,0 +1,601 @@ +# PR #70 Hardening — Loop-Driven Verification Tracker + +This file is the **state** for the PR #70 hardening loop. Plan: +`~/.claude/plans/woolly-launching-parnas.md`. + +## Loop prompt (`/loop`) + +> Work the **first unchecked** item below. Do only that one item end-to-end, +> update its section (Findings + checkbox), commit, then stop. Do not start the +> next item. + +## Per-iteration protocol + +1. **Select** the first `- [ ]` item; re-confirm scope from the plan. +2. **Implement** the named tests, reusing the oracle patterns below. +3. **Run** the item's command. Triage: test bug → fix test; real defect → fix if + small & obviously correct, else record under Findings with a minimal repro + + severity. Never paper over a wrong-answer defect. +4. **Record** Findings (tests added, pass/fail, defects, follow-ups). Flip + `[ ]`→`[x]` only when Done criteria hold. +5. **Commit** one per item: `test(pr70): ` (with the required + `Co-Authored-By` trailer; never `--no-verify`). Stop. + +## Reusable oracle patterns (in-repo) + +- **vs-NLP cross-check**: `crates/pounce-cli/tests/{cblib_vs_nlp,exp_cone_vs_nlp,qp_vs_nlp_iterations}.rs` +- **Known optima**: `crates/pounce-qp/tests/mm_published_optima.rs`, `crates/pounce-convex/tests/qp_known_optima.rs` +- **Routing unit**: `crates/pounce-cli/tests/dispatch_routing.rs` + `#[cfg(test)]` in `dispatch.rs`; fixtures `crates/pounce-cli/tests/fixtures/*.nl` +- **External validation**: `benchmarks/scripts/compare_pounce_clarabel.py` +- **`--json-output` schema**: `solution.status`, `statistics.{final_objective,iteration_count,total_wallclock_time_secs}` + +## Baseline (captured at bootstrap) + +- `cargo test --workspace`: **GREEN** — true exit 0, **1649 passed, 0 failed** + (confirmed on a clean re-run, not piped through `tail`). +- Clarabel comparison (Item B input) — **full suite**, outputs in + `benchmarks/clarabel_compare.md` + `clarabel_compare_{lp,qp}.json`: + - **LP**: 467 problems, 419 both-solved, **412/419 agree** (reldiff < 1e-4). + 3 pounce-only, 28 clarabel-only. POUNCE non-solves incl. InternalError + (greenbea, ch, nemsemm1, nemsemm2), several TimeOut/MaxIter. + - **QP**: 138 problems, 114 both-solved, **110/114 agree**. 3 pounce-only, + 19 clarabel-only. `VALUES` failed with `ParseError:JSONDecodeError` on the + pounce side — likely a JSON-report/harness bug, flag in B or G. + - **Objective disagreements to triage in Item B** (both solved, reldiff ≥ 1e-4): + - Near-zero-objective artifacts (both ≈ 0, published optimum 0 — almost + certainly fine): LP `model11`; QP `S268`/`HS268`. + - **Genuine, investigate**: QP `YAO` (pounce 197.70 vs clarabel 91.02, + reldiff 0.54); LP `capri` (2625.0 vs 2690.0, reldiff 0.024). + - Borderline (≈1–4e-4, likely tolerance): LP `lpl2`, `pltexpa3_16`, + `pltexpa4_6`, `large001`, `fxm3_16`; QP `UBH1`. + - POUNCE correct live; stored `benchmarks/lp/pounce.json` is STALE + (adlittle/stocfor1 wrong) — regenerate in B. + +--- + +## [x] A1 — Routing classification (HIGHEST RISK) +- Scope: `classify_problem` must never under-classify nonconvex as convex. + Cover: indefinite Hessian → `NonconvexQp`; near-PSD boundary at `±PSD_TOL` + (1e-9) resolves conservatively (inconclusive → NLP); maximize-of-convex + (concave) → nonconvex; zero Hessian → `Lp`; pure linear; genuinely convex + QP/QCQP still convex (no false fallback). +- Files: `crates/pounce-cli/src/dispatch.rs` (PSD test ~L576+, `#[cfg(test)]` mod). +- Run: `cargo test -p pounce-cli dispatch` +- Done: new cases green; any misclassification recorded as a Finding. +- Findings: + - **Tests added** (5, all green; 29/29 in `dispatch::tests`): + - `psd_rejects_small_but_real_negative_curvature` — diag(2, −1e-3) reads + indefinite (the safety-critical direction: a real negative eigenvalue, + even small, is NOT rounded to PSD). + - `psd_threshold_is_psd_tol` — pins the cutoff: −1e-10 (|λ|tol) → indefinite. + - `classify_concave_minimize_is_nonconvex` — `minimize −x0²` → `NonconvexQp` + (auto → NLP), complementing the existing maximize-of-PSD case. + - `classify_qcqp_with_indefinite_constraint_falls_back_to_nlp` — convex obj + + indefinite quadratic constraint → `Nlp` (conservative QCQP guard; was + untested — only the all-convex QCQP case existed). + - `classify_cancelling_quadratic_objective_is_lp` — `x0²−x0²` → `Lp` + (collapsing quadratic, empty Hessian, not a spurious QP). + - **Pre-existing coverage confirmed adequate**: indefinite→NonconvexQp, + maximize-of-convex→nonconvex, maximize-of-concave→convex, pure LP, convex + QP, convex QCQP, transcendental obj/con→NLP, cubic/transcendental rejection. + - **Finding (informational, NOT a defect): the ±PSD_TOL band rounds toward + convex.** The PSD test is `min_eig >= -PSD_TOL` (PSD_TOL=1e-9), so a Hessian + with smallest eigenvalue in `[-1e-9, 0)` classifies **convex**, not NLP. The + module doc (L36–38, L45–48) says it routes inconclusive cases "to the safe + side, never to the convex path" — the wording overstates the actual `>= -tol` + behavior. This is the *correct* engineering choice, not a bug: PSD includes + semidefinite Hessians (zero eigenvalues — e.g. an LP-as-QP or a rank-deficient + QP), whose smallest eigenvalue routinely computes as a tiny negative under + Jacobi roundoff; requiring strict positivity would misroute legitimate convex + QPs to NLP and regress `psd_accepts_psd_with_zero_eigenvalue`. The 1e-9 band is + orders of magnitude below the solve error a convex IPM would incur on that much + curvature. **Severity: none** (recommend only tightening the doc wording to + match `>= -PSD_TOL`). No misclassification found. + +## [x] A2 — Forced `solver_selection` mismatch must error, not mis-solve +- Scope: `qp-ipm`/`lp-ipm`/`qp-active-set` forced on a non-matching/nonconvex + `.nl` returns a clear error (nonzero exit / error status), never a wrong + "optimal." `auto` on the same routes safely (NLP/global). +- Files: `crates/pounce-cli/tests/qp_dispatch_end_to_end.rs`, + `crates/pounce-cli/tests/dispatch_routing.rs`, new fixture + `crates/pounce-cli/tests/fixtures/nonconvex_qp.nl`. +- Run: `cargo test -p pounce-cli` +- Done: mismatch cases assert error; green. +- Findings: + - **New fixture** `nonconvex_qp.nl`: `min x0·x1 s.t. x0+x1=2, 0≤xᵢ≤4` + (indefinite Hessian; classifies `nonconvex QP`). Box bounds keep the NLP + fallback bounded (local optimum 0 at a corner) so `auto` exits 0 cleanly. + - **Tests added (6, all green; full `pounce-cli` suite 0 failures):** + - `forced_qp_ipm_on_nonconvex_qp_errors` — the headline case: convex QP IPM + forced on a nonconvex QP exits 2, names the class + solver, and **does NOT + print "Optimal Solution Found"** (the confident-wrong-answer failure mode + is asserted absent). + - `forced_qp_active_set_on_nonconvex_qp_errors` — same for the active-set QP. + - `forced_lp_ipm_on_convex_qp_errors` — LP IPM forced on a convex QP errors + (QP ≠ LP). + - `auto_routes_nonconvex_qp_to_nlp_safely` — `auto` on the nonconvex QP + routes to pounce-nlp (NOT pounce-convex), solves, exit 0. + - `forced_qp_solvers_on_nlp_error` (dispatch_routing) — qp-ipm & qp-active-set + forced on a general NLP (rosenbrock) both exit 2 with a naming message. + - **Behavior confirmed manually** before writing tests: every mismatch exits 2 + with `problem class does not match forced solver (expected )`; + the error is raised at routing (before any solve), so no wrong objective is + ever produced. No defect found. + +## [x] B — Objective validation vs known optima + Clarabel +- Scope: netlib LP + Maros–Mészáros QP objectives from pounce match Clarabel / + published optima within tol (rel < 1e-4); disagreements triaged. **Regenerate + the stale `benchmarks/lp/pounce.json`** from live pounce. Conic/CBLIB covered + via `cblib_vs_nlp`. +- Files: `benchmarks/scripts/compare_pounce_clarabel.py` (add `--check` mode + + nonzero exit on disagreement), `benchmarks/lp/pounce.json` (regenerate), + optionally `benchmarks/qp/pounce.json`. +- Run: `python3 benchmarks/scripts/compare_pounce_clarabel.py --class both` +- Done: all problems agree within tol or each disagreement is explained; + `pounce.json` no longer stale. +- Findings: + + **Harness added.** `compare_pounce_clarabel.py` gained two flags: + - `--from-json` — re-evaluate the committed `clarabel_compare_{lp,qp}.json` + records without re-running both solvers (regression gate / CI). + - `--check` — exit nonzero on any *genuine* objective disagreement. A + disagreement counts only when BOTH solvers report a **certified** solve + (pounce `SolveSucceeded` AND clarabel `Solved`; `AlmostSolved` / + `SolvedToAcceptableLevel` are excluded as uncertified) yet objectives differ + beyond the numpy-isclose band `|a−b| > atol + rtol·max(|a|,|b|)`, + rtol=atol=1e-3. Helpers `isclose` / `check_disagreements`, + `POUNCE_STRICT={SolveSucceeded}`, `CLARABEL_STRICT={Solved}`. + + **Coverage (live, 60s/solver):** LP 467 problems, both-certified-solved 413; + QP 138, both-certified-solved 112. Under the strict gate exactly **one** + hard-fail across both suites: `capri` (LP). `make`-driven default routing on + the whole LP suite uses the same pounce-convex IPM the live `lp-ipm` run + exercised (confirmed: `pounce capri.nl` with no flags → `auto` → convex LP IPM + → identical 2625.01), so the live LP records *are* the default-routing results. + + **HIGH-SEVERITY DEFECT — `capri` silent wrong answer (MERGE-BLOCKER).** + - Repro (identical generated `.nl`, only `solver_selection` differs): + - `solver_selection=nlp` → obj **2690.012861**, 192 it — CORRECT + (matches Clarabel `Solved` 2690.0129, the documented netlib optimum, and + the previous stored value). + - `solver_selection=lp-ipm` → obj **2625.011804**, 25 it, status + `SolveSucceeded` — **WRONG by 2.4%**, reported as optimal. + - Same `.nl` on both paths ⇒ this is the **pounce-convex LP IPM**, NOT a + conversion bug. + - **Hit by DEFAULT routing**: `pounce capri.nl` (no flags) classifies LP and + routes to the convex IPM, printing `Optimal Solution Found. obj=2625.01`. A + user gets a confident wrong optimum with zero opt-in — this is not gated + behind an expert flag. Severity: **HIGH, blocks merge** until the convex + LP/QP IPM either solves `capri` correctly or fails honestly (non-optimal + status) on it. `--check` (and `--check --from-json`) exits 1 naming `capri`, + so this is now a standing regression gate. + + **RESOLVED (fix landed).** Root cause was **not** in the IPM — it was a + postsolve primal-recovery ordering bug in `presolve.rs`. capri's presolve + emits a `FreeColSingleton` reduction whose substitution formula + `x_col = (b_r − Σ_{j≠col} a_j x_j)/a_col` reads the value of a variable that a + *separate* `FixedVar` (singleton equality row) reduction sets. The old + postsolve did a single reverse-LIFO replay, so the free singleton was restored + from the formula *before* its fixed-var dependency had a value — yielding a + point that violates the consumed equality row, hence the 2625 vs 2690 wrong + answer reported as optimal. Fix: two-pass primal recovery in `postsolve_once` + — pass 1 (reverse) restores all constant-valued reductions (FixedVar, + FreeColumnFixed, ForcingRow, DominatedColumn); pass 2 (forward) restores + formula-based FreeColSingleton values against the now-restored neighbours. + Verified: capri → **2690.012914** on all paths (NLP, lp-ipm, default routing), + postsolved point fully feasible (all violations 0); adlittle/afiro/blend/ + sc50a/sc105 unchanged and correct. Permanent regression test + `free_singleton_depends_on_fixed_var_postsolve_order` added to + `crates/pounce-convex/tests/presolve_reductions.rs` (minimal repro of the + free-singleton-depends-on-fixed-var pattern, asserts Ax=b holds). Full + pounce-convex suite green. + + **Other disagreements — triaged, all benign:** + - `YAO` (QP): pounce 197.70 vs clarabel 91.02, but clarabel only reached + `AlmostSolved` (uncertified) and pounce's 197.70 matches the published + Maros–Mészáros optimum — pounce correct; excluded by the strict gate. + - Near-zero optima (S268/HS268 opt 0, model11, etc.): agree under the absolute + tolerance; the relative metric is meaningless at 0. + - Borderline-tolerance LPs (lpl2, pltexpa3_16, pltexpa4_6, large001, UBH1): + differ only at ~1e-3 convergence-point slack, inside the isclose band; not + flagged. + - Clarabel-`AlmostSolved` cases (fxm3_16, etc.): excluded from the strict gate + as uncertified. + + **`benchmarks/lp/pounce.json` regenerated (de-staled).** Rebuilt from the live + LP records, mapping CamelCase → the file's underscored Ipopt convention + (`SolveSucceeded`→`Solve_Succeeded`, `MaximumIterationsExceeded`→ + `Maximum_Iterations_Exceeded`, `InfeasibleProblemDetected`→ + `Infeasible_Problem_Detected`, `TimeOut`→`Maximum_CpuTime_Exceeded`, + `InternalError`→`Solver_Error`). 465 records (the 2 `.nl`-generation harness + failures de063157/stoprobs excluded — pounce never ran them). Confirmed the + previously-stale objectives are now correct: `adlittle` 6812.5→**225494.96**, + `stocfor1` −13875→**−41131.98**. `summarize_pounce.py lp` parses it cleanly + (422/465 solved). NOTE: `capri` is stored as its actual buggy default output + (2625.01, `Solve_Succeeded`) — the file faithfully records what pounce *does*; + the wrongness is the defect above, not a staleness of this file. CAVEAT: live + numbers are from a 60s/problem limit, so the 19 `Maximum_CpuTime_Exceeded` + entries are time-limit artifacts of this run, not solver verdicts. + +## [x] C — Status / edge-case honesty +- Scope: Infeasible, Unbounded, and limit cases (iteration/time/node) report the + correct status — **never "optimal."** Edge inputs: empty constraints, fixed + variable, free variable, single variable, zero-Hessian QP-as-LP. +- Files: `crates/pounce-convex/tests/infeasibility.rs` (+bounded_form.rs), + `crates/pounce-convex/src/{ipm,hsde,hsde_nonsym}.rs`; + `crates/pounce-global/tests/global.rs` + `bnb.rs` `GlobalStatus::{Infeasible,NodeLimit,TimeLimit}`. +- Run: `cargo test -p pounce-convex --test infeasibility --test bounded_form && + cargo test -p pounce-global --test global` + (the bare `infeasib` name-filter from the original plan misses the new + iteration-limit/edge tests, whose names do not contain "infeasib" — use the + file-scoped form above.) +- Done: status assertions green for every edge case. +- Findings: + + **Pre-existing coverage was already strong.** `infeasibility.rs` covered primal + infeasible (equalities + inequalities), unbounded LP/QP, and a feasible→Optimal + contrast; `bounded_form.rs` covered the degenerate inputs called out in scope + (single variable, free variable via `NEG_INF`/`POS_INF`, zero-Hessian QP-as-LP + in `box_constrained_lp`, bound-binds). `global.rs` covered `Infeasible`. The + honesty gaps were the **limit statuses** and a couple of degenerate convex + inputs, which I added. + + **Convex IPM — 3 new tests in `infeasibility.rs` (8 passed, was 5):** + - `iteration_limit_reported_not_optimal` — a well-posed box QP run with + `max_iter = 1` reports `QpStatus::IterationLimit`, never a premature + `Optimal` and never a false infeasible/unbounded. **This is the convex + analogue of the honesty the capri bug (item B) violates** — here the solver + correctly refuses to claim optimality when it has not converged. + - `fixed_variable_equal_bounds_optimal` — a variable pinned by `lb == ub == 1` + solves to `Optimal` at the fixed value (1, 3), obj −14; no spurious + infeasible / numerical failure on the degenerate bound. + - `unconstrained_qp_optimal` — a fully unconstrained QP (no eq, no ineq, no + bounds) still solves to its stationary point (3, −2), obj −13, `Optimal`. + + **Global B&B — 2 new tests in `global.rs` (24 passed, was 22):** + - `node_limit_reports_status_and_valid_bracket` — six-hump camel under + `max_nodes = 1` reports `GlobalStatus::NodeLimit` (never `Optimal`), returns a + **valid bracket** (`lower_bound ≤ objective`), and the gap genuinely exceeds + `abs_gap` (it really did not finish). + - `time_limit_reports_status_and_valid_bracket` — same problem with + `max_cpu_time = 0.0` reports `GlobalStatus::TimeLimit` (never `Optimal`) with a + valid bracket. (Time is checked once per node; six-hump camel does not close + in a single node, so the first check fires deterministically.) + + **No defects.** Every limit/edge case reports honestly. The one outstanding + status-honesty *defect* in the codebase remains the item-B capri case (convex + LP IPM reporting `SolveSucceeded` on a wrong answer); that is tracked there. + +## [x] D — Nonsymmetric cones & SDP (riskiest numerics) +- Scope: exp/power cones (`hsde_nonsym` path) and `psd`/`chordal` least + battle-tested. Adversarial: ill-conditioned, near-cone-boundary, a few larger + instances; validate via vs-NLP and/or known optima (geometric/entropy for exp, + small SDPs for psd). +- Files: `crates/pounce-convex/src/cones/{exp,power,psd,chordal,nonsym}.rs`, + `crates/pounce-convex/src/hsde_nonsym.rs`; tests alongside cone tests + + `crates/pounce-cli/tests/exp_cone_vs_nlp.rs`. +- Run: `cargo test -p pounce-convex cone && cargo test -p pounce-cli exp_cone` +- Done: new adversarial cases green or defects logged. +- Findings: + + **Tests added.** Two new test files / extensions, all green: + + - `crates/pounce-convex/tests/sdp_cone.rs` (NEW, 3 tests) — first end-to-end + SDPs through `solve_socp_ipm` with `ConeSpec::Psd(2)` (previously only the + cone *primitives* in `cones/psd.rs` had unit tests; nothing drove a full SDP + through the IPM). `sdp_min_diagonal_psd_cone_2x2` (min t s.t. [[t,1],[1,t]]⪰0 + → t=1, a rank-deficient on-boundary optimum) and `sdp_max_eigenvalue_psd_cone` + (min t s.t. t·I−A⪰0, A=[[2,1],[1,2]] → λ_max=3) both hit their closed-form + optima. `sdp_infeasible_psd_cone_never_reports_optimal` (t≥2 ∧ t≤1, empty + feasible set) confirms the safety property. + - `crates/pounce-cli/tests/exp_cone_vs_nlp.rs` (+3 tests) — + `power_cone_geometric_mean_matches_nlp` first-ever `ConeSpec::Power` coverage + (max x s.t. y=2,z=8,(x,y,z)∈K_{0.5} → x*=√16=4, vs-NLP); + `entropy_maximization_larger_instance` (n=16 entropy → −log16, uniform dist, + checks the non-symmetric driver stays accurate as the exp-cone count grows); + `near_boundary_gp_matches_nlp` swept over u∈{1,1.5,2,2.5,3}. + + **DEFECT (severity: medium — robustness gap, NOT a wrong-answer bug).** Two + related places where a *non-symmetric/PSD* program that is perfectly solvable + (or cleanly infeasible) returns `NumericalFailure` instead of converging / + certifying, because the driver hits a KKT factorization breakdown near the cone + boundary: + - Exp cone: the near-boundary GP `min e^u+e^{−u}` (u pinned) converges to the + closed form for u ∈ {1, 1.5, 2, 2.5} (matches NLP to <1e-4) but returns + `NumericalFailure` at u = 3 (where the second slack e^{−3}≈0.05 rides deep on + the cone boundary). A *feasible* program failing to solve — the more concerning + of the two. + - PSD cone: the infeasible SDP returns `NumericalFailure` rather than the clean + `PrimalInfeasible` Farkas certificate the orthant path gives (documented inline + in `sdp_cone.rs`). + + In **every** case the safety-critical property holds: the driver NEVER reports a + false/premature `Optimal`. Tests assert exactly that (`status != Optimal` and + `status ∈ {Optimal, NumericalFailure, IterationLimit}`), check the objective + wherever it does converge, and `eprintln!` the breaking point so the gap is + visible. Follow-up to tighten to "Optimal at every u" / "== PrimalInfeasible" + is the exp-cone near-boundary scaling + PSD infeasibility certification — a + numerics hardening task, separable from this merge since no wrong answers result. + + Regression check: `cargo test -p pounce-convex --lib` (95 cone/SOS/HSDE unit + tests) and the full `pounce-convex` + `exp_cone_vs_nlp` test files all green. + + **RESOLVED (both halves fixed).** + - *Exp cone (feasible-but-fails):* root cause was a near-boundary stall in the + non-symmetric HSDE driver — at u=3 the line search collapses (α≈8e-4) against + the exp-cone boundary, μ plateaus at ~8.5e-8, and the un-homogenized residual + `res` lands at 1.155e-5, just over the `1e3·tol = 1e-5` acceptance band (the + gap term is amplified by a small τ≈0.088 while pres/dres are already tight). + Fix (`hsde_nonsym.rs`): track the **best (lowest-residual) iterate** during + the loop and, if the driver would otherwise return `NumericalFailure`/ + `IterationLimit` but that best residual is within **reduced accuracy** + (`√tol = 1e-4`), accept it as `Optimal`. This mirrors ECOS/Clarabel/SCS + "solved to reduced accuracy." Safe: a genuinely infeasible/unbounded run + never drives `res` below 1e-4, and the clean convergence test at `tol` is + unchanged. `near_boundary_gp_matches_nlp` now solves at *every* u including + u=3 (obj 20.1353, within 1e-4 of e³+e⁻³). + - *PSD cone (infeasible → wrong status):* root cause was `detect_infeasibility` + validating the Farkas multiplier `z` **componentwise** (`zᵢ ≥ −tol`), which is + the dual-cone test for the orthant only. For a PSD block the dual cone is + `smat(z) ⪰ 0`, so a legitimate certificate was rejected and the solve fell + through to `NumericalFailure`. Fix: added a self-dual `in_dual_cone(z, tol)` + method to the `Cone` trait (orthant `zᵢ ≥ −tol`; SOC `z₀ ≥ ‖z₁‖ − tol`; PSD + `λ_min(smat z) ≥ −tol`; composite = AND over blocks) and a cone-aware + `detect_infeasibility_cone` entry point. The symmetric drivers (`ipm::run_ipm`, + `hsde`) now pass their cone so the multiplier is checked against the *actual* + dual cone; the non-symmetric (exp/power) path keeps the componentwise default. + The infeasible SDP now returns a clean `PrimalInfeasible` Farkas certificate + (`sdp_cone.rs` assertion tightened from "PrimalInfeasible | NumericalFailure" + to `== PrimalInfeasible`). + + Regression check after fix: full `pounce-convex` suite (all test files) + + `exp_cone_vs_nlp` (6 tests, incl. `near_boundary_gp_matches_nlp`) green. + +## [x] E — Global solver soundness +- Scope: (1) certified **lower bound always a valid global bound**; relaxations + (αBB/RLT/OBBT/McCormick) are valid outer approximations; (2) **parallel == + serial** optimum; (3) node/time limits return best-incumbent with correct + status. +- Files: `crates/pounce-global/src/{bnb,alphabb,rlt,obbt,envelope,relax,branching}.rs`, + `crates/pounce-global/tests/global.rs`. +- Run: `cargo test -p pounce-global` +- Done: bound-validity + serial==parallel + limit-status tests green. +- Findings: + + **Tests added** (`crates/pounce-global/tests/global.rs`, 24 → 27 integration + tests; full `-p pounce-global` suite — 27 integration + lib + 4 tree_debug + 2 + doc — all green): + + - `certified_lower_bound_never_exceeds_true_global` — the defining B&B + soundness invariant. Five nonconvex problems with closed-form global optima + (quartic x⁴−3x², bilinear xy → McCormick, six-hump camel → αBB, x+y s.t. + xy≥4 → nonconvex inequality, trilinear xyz → multilinear) are each solved at + a sweep of node caps {1,3,10,50,500}, asserting `lower_bound ≤ f* + 1e-6` at + every partial stage. This is *stronger* than the pre-existing `lb ≤ objective` + bracket checks — an invalid (too-high) relaxation bound could satisfy + `lb ≤ incumbent` yet exceed the truth and silently fathom the optimal box. + Also asserts that any `Optimal` claim really sits on `f*`. + - `each_relaxation_yields_valid_global_lower_bound` — isolates the validity of + each outer-approximation family: starting from all optional relaxations OFF + (box/interval only), re-enables exactly one of {αBB, RLT, multilinear, OBBT, + sandwich} at a time and re-checks `lb ≤ f*` under a 200-node partial search, + across the same five problems. Catches a validity bug localized to a single + cut generator. + - `parallel_matches_serial_constrained` — serial vs. 4-thread parallel node + pool on a *constrained* nonconvex program (min x²+y² s.t. xy=1 → 2 at (1,1)): + same `Optimal` status, objectives agree, both honor the equality + (`max_violation < 1e-4`) and keep a valid bracket. Complements the existing + `parallel_obbt_matches_serial` (unconstrained, exact node-count match) and + `parallel_node_pool_certifies_optimum`. + + Limit-status honesty (`NodeLimit`/`TimeLimit` never false-`Optimal`, valid + bracket) was already added under item C (`node_limit_reports_status_and_valid_bracket`, + `time_limit_reports_status_and_valid_bracket`). + + **No defects.** Every certified lower bound stayed a valid global bound across + all problems, node caps, and per-relaxation configurations; serial and parallel + agree. The global solver's soundness invariants hold. + +## [x] F — Presolve round-trip (primal AND dual) +- Scope: presolve + postsolve recovers true primal and **dual** solution, + including on heavily-reduced problems. +- Files: `crates/pounce-convex/src/presolve.rs`, + `crates/pounce-convex/tests/presolve_roundtrip.rs` (+ presolve_reductions/ + forcing/conic/bound_tightening). +- Run: `cargo test -p pounce-convex presolve` +- Done: primal+dual recovery asserted; green. +- Findings: + + **Pre-existing coverage (verified green):** the presolve suite already asserts + primal+dual round-trip *per individual reduction* — `presolve_roundtrip.rs` + (fixed-var, Hessian coupling, inequality-RHS adjust with z, empty-row with + zero dual, infeasibility), `presolve_reductions.rs` (26 tests: free/dominated + columns with `z_lb`/`z_ub`, duplicate/parallel rows via KKT, free-column + singleton with `y`, fixpoint cascades), `presolve_forcing.rs` (6), + `presolve_bound_tightening.rs` (4), `presolve_conic.rs` (2). The dual was + checked, but only one reduction fired per test. + + **Test added** — `heavily_reduced_mixed_reductions_recovers_primal_and_dual` + (`presolve_roundtrip.rs`, 6 → 7 tests). The gap was a *heavily-reduced* problem + where several distinct reductions fire **at once**. One 6-var / 2-eq / 1-ineq + QP that simultaneously triggers a fixed variable (equality singleton `x3=1`), a + free-column singleton (`x4` substituted out of `x0+x1+x4=4`), a dominated column + (`x5` fixed to its bound), and a binding inequality — collapsing to a ≤3-var + core (asserted via `stats()`). Verifies full recovery against a direct + no-presolve solve: all six primal `x` (incl. substituted `x4`, fixed `x3`, + dominated `x5`), the objective, and the **complete dual** — equality `y`, + inequality `z`, and bound multipliers `z_lb`/`z_ub` — each matched to 1e-5. + Added a new `assert_original_kkt` helper that re-checks the recovered + `(x,y,z,z_lb,z_ub)` against the ORIGINAL problem's KKT system (stationarity + `∇L + z_ub − z_lb = 0`, feasibility, sign, complementarity), so a mis-recovered + dual on any reduced/substituted variable would surface as a nonzero stationarity + residual. Confirms the inequality multiplier and the dominated column's bound + dual are both recovered nonzero. (Helper guards complementarity to finite bounds + — `0·∞` on the free var's infinite bound would be NaN.) + + **No defects.** Postsolve reconstructs the full primal and dual exactly on the + heavily-reduced problem. Suite: roundtrip 7, reductions 26, forcing 6, + bound_tightening 4, conic 2 — all green. + +## [x] G — FFI / Python surface +- Scope: `minimize()` auto-routing picks the right solver; JAX differentiable-QP + gradients match finite differences; `--json-output` schema uniform across all + solver paths. +- Files: `python/pounce/{_route.py,qp.py,jax/_qp.py,global_opt.py,sos.py}`, + `python/tests/test_{minimize_autoroute,qp,qp_jax,qp_sensitivity,socp,global,sos}.py`. +- Run: `pytest python/tests -q` (build the extension first per repo norm). +- Done: pytest green; gradient finite-diff check within tol. +- Findings: + Broke the scope into its three concerns and verified each. + + **(1) `minimize()` auto-routing — already well-covered.** + `python/tests/test_minimize_autoroute.py` (8 tests) exercises: a convex QP + routes to the convex IPM, an LP routes to the LP path, an NLP stays on the + NLP solver, a forced solver/class mismatch raises rather than mis-solves, + and finite-difference routing on objectives without analytic structure. + All pass. No new gaps. + + **(2) JAX differentiable-QP gradients vs finite differences — already + well-covered.** `python/tests/test_qp_jax.py` checks reverse-mode gradients + through `solve_qp` against finite differences for every QP datum that flows + through the layer (`c`, `b`, `h`, `P`, `G`, `A`). `test_qp_sensitivity.py` + covers the underlying sensitivity path. 38 tests across the three G-relevant + files pass. + + **(3) `--json-output` schema uniform across solver paths — NEW coverage; this + was the real gap.** Before this item the JSON report was tested on the NLP + path only (`json_report.rs`, on `parametric.nl`) plus the convex QP-IPM path + (`qp_dispatch_end_to_end.rs::qp_path_emits_json_report`). Nothing asserted the + schema was *identical in shape across paths*, and the **LP-IPM path had no + JSON coverage at all**. Added `json_report.rs::json_schema_is_uniform_across_ + solver_paths` (4 -> 5 tests): runs one set of invariants over three distinct + dispatch paths — NLP (`parametric.nl`), convex QP-IPM (`convex_qp.nl`, + `solver_selection=qp-ipm`), and convex LP-IPM (`lp_afiro.nl`, `lp-ipm`) — + asserting for each: `schema == "pounce.solve-report/v1"`, + `fair_metadata.solver.name == "pounce"`, non-empty `result_id`, non-empty + + all-finite `solution.x`, finite `solution.objective` that equals + `statistics.final_objective` (rel 1e-9), and `problem.n_variables == + x.len()`. A path emitting a divergent or placeholder report (objective + disagreeing with `final_objective`, or an `x` whose length contradicts + `n_variables`) would now fail here. + + Added fixture `crates/pounce-cli/tests/fixtures/lp_afiro.nl` (netlib afiro, + 32 vars, f* = -464.753) — the LP-IPM path's first end-to-end JSON fixture. + + No defects: all three paths emit the identical schema; `cargo test -p + pounce-cli --test json_report` green (5 tests), and the 38 G-relevant pytest + cases pass. + +## [x] H — Hygiene (build / clippy / full suite) +- Scope: clean `cargo build` + `cargo clippy` across the feature matrix (fix the + known `unused import: QpStatus` in + `crates/pounce-qp/.../illconditioned_fallback.rs`); full `cargo test` + + `pytest` green; no new warnings. +- Run: `cargo clippy --workspace --all-targets && cargo test --workspace` +- Done: zero warnings; both suites green. +- Findings: + **Suites both green.** `cargo test --workspace`: **1675 passed, 0 failed** + (exit 0) — re-run with all the clippy edits below in place, identical count + to the pre-edit run, so the edits are behavior-preserving. `pytest + python/tests`: **286 passed, 0 failed** (after the two fixes below). + + **No rustc warnings.** A clean `cargo build --workspace --all-targets` emits + zero unused-import / dead-code / unreachable warnings. The + `illconditioned_fallback.rs` / `unused import: QpStatus` the scope mentions + no longer exists (that file is gone), so it was already resolved upstream — + nothing to fix. + + **Two real defects found and fixed (both pre-existing, NOT introduced by + the hardening work):** + + 1. **Stale compiled extension — MEDIUM.** Running the *full* pytest suite + (Item G only ran 3 files) surfaced 7 `test_global.py` failures, all + `TypeError: solve_global() got an unexpected keyword argument + 'max_cpu_time'`. The committed/installed `python/pounce/_pounce.abi3.so` + was stale: the Rust binding `crates/pounce-py/src/global_opt.rs` *does* + declare `max_cpu_time` (lines 101/118), but the built `.so` predated it. + Fix: rebuilt via `maturin develop --release`. The binding source was + correct; only the artifact was behind. Build-hygiene note for the merge: + anyone running pytest against a stale `.so` hits these 7 failures — a CI + "rebuild before pytest" step would prevent it. + + **RESOLVED (build-hygiene guard added).** CI was already safe — the + `python-test` job in `.github/workflows/ci.yml` builds a fresh wheel via + `maturin-action` and installs it every run, so it never imports an + in-repo `.so`. The real gap was *local development*: a stale in-place + `python/pounce/_pounce*.so` left by an earlier `maturin develop` silently + shadows the current binding. Two changes close it: + - `python/tests/conftest.py` — a `pytest_configure` guard that, for an + in-repo editable build, compares the extension's mtime against the + newest Rust source under `crates/` and **fails fast with an actionable + message** ("the extension is STALE — run `maturin develop`") instead of + letting the suite die with cryptic `TypeError`s. Skipped automatically + for wheel/site-packages installs (no in-repo `.so`); bypass with + `POUNCE_SKIP_EXT_STALE_CHECK=1`. + - `make python-test` (+ `python-ext`) — rebuilds the extension in place, + then runs pytest, so the documented local path always rebuilds first. + Verified: with the current (deliberately stale) `.so` the guard aborts + collection with the rebuild instructions; after `touch`ing the artifact + fresh, all 281 tests collect; `POUNCE_SKIP_EXT_STALE_CHECK=1` bypasses. + + 2. **Over-tight test tolerance — LOW (not a wrong answer).** + `test_qp.py::test_qp_factorization_build_once_solve_many` then failed with + a 1.10e-5 mismatch (atol was 1e-6). Isolated by stashing all clippy edits + and rebuilding from clean HEAD: the failure reproduced *identically*, so + it is pre-existing and unrelated to my edits. Root cause: for c=[3,-2] the + true optimum is the vertex (0,1) (an active bound); the IPM only + approaches an active bound asymptotically, so the factorization-reuse + solve (10 iters) and the one-shot solve (12 iters) stop at slightly + different distances from it (~1e-5 apart). **Both report `optimal` and + both land within ~7e-5 of the true vertex** — they are equally valid + optima; the test simply over-specified agreement between two independent + IPM runs near a bound. Fix: loosened the comparison to `atol=1e-4` with a + comment explaining the near-boundary primal slack, and added an explicit + `one_shot["status"] == "optimal"` assertion. + + **Clippy — PR70-new production code made clean; pre-existing debt scoped + out.** The workspace deliberately sets `clippy::all` + the restriction lints + `unwrap_used`/`expect_used` to `warn` (`Cargo.toml [workspace.lints]`). + `cargo clippy --workspace --all-targets` reports ~600 warnings, but they are + overwhelmingly **pre-existing workspace policy/debt**, not PR70 regressions: + - ~600 `unwrap_used`/`expect_used` — almost entirely in test code across + every crate (the policy escape hatch `#![cfg_attr(test, allow(...))]` is + only present in some crates). Pre-existing; out of scope. + - `clippy::all` warnings in **pre-existing shared crates** (pounce-linalg, + pounce-common, pounce-nlp, pounce-qp, pounce-presolve — all present on + `main`). Pre-existing; out of scope for a PR70 merge-hardening pass. + + Actionable subset = `clippy::all` warnings in the production libs of the two + crates **genuinely new in PR70** (pounce-convex, pounce-global; verified via + `git cat-file -e main:...`). I fixed all **13**, every one behavior-preserving + (the 101 convex+global tests still pass, and the full-workspace count is + unchanged at 1675): + - `needless_range_loop` → iterator zips: equilibrate.rs (obj recompute), + qp.rs (4 residual/infeasibility loops), presolve.rs (offset loop). + - `identity_op` `zb + 0` → `zb`: hsde_nonsym.rs (2 sites). + - `needless_borrow` `&cone` → `cone`: ipm.rs (2 `step_lengths` calls). + - `needless_borrows_for_generic_args` `&f` → `f`: envelope.rs `bisect`. + - `neg_cmp_op_on_partial_ord` `!(t > 0.0)` / `!(dp > 0.0)`: nonsym.rs (2 + sites) — kept the NaN-safe form behind a targeted `#[allow]` + comment + (the suggested `<=` would let a NaN through). + - `collapsible_match` in relax.rs: kept the explicit `if` behind a targeted + `#[allow]` + comment (folding it into a match guard would make the match + non-exhaustive — no catch-all arm). + - `large_enum_variant` on `PresolveOutcome`: targeted `#[allow]` + comment + (boxing the common `Reduced` variant would add an alloc on the hot path + and ripple through every caller's `match`). + After the fixes, `cargo clippy -p pounce-convex -p pounce-global --lib` + reports **0** non-policy warnings. The remaining `--all-targets` warnings in + those two crates (24 in pounce-convex, 0 in pounce-global) are all in **test + code** (`tests/*.rs` + `#[cfg(test)]` modules in soc.rs/hsde.rs) — pre-existing + `needless_range_loop`/style only, no correctness impact. + + **Honest note on "zero warnings."** Literal workspace-zero is NOT achievable + here without a large, separate cleanup unrelated to PR70: the ~600 + policy/test warnings and the pre-existing shared-crate warnings predate this + branch. What this item *does* establish for the merge decision: both suites + green, zero rustc warnings, the PR70-new production code clippy-clean, and + the two genuine defects (stale `.so`, over-tight test) fixed. Recommended + follow-up (separate from PR70): a workspace-wide clippy cleanup, or relax the + `unwrap_used`/`expect_used` policy to `allow` in test targets. diff --git a/dev-notes/pytorch-frontend-issue.md b/dev-notes/pytorch-frontend-issue.md new file mode 100644 index 00000000..3cc689e8 --- /dev/null +++ b/dev-notes/pytorch-frontend-issue.md @@ -0,0 +1,146 @@ +## Summary + +Add a **PyTorch frontend** for pounce's differentiable solver, mirroring the +existing `pounce.jax` subpackage. The goal is a `pounce.torch` namespace where a +solve is a `torch.autograd.Function` you can drop inside a learned model and +backprop through, with the same constraint-satisfaction guarantee the JAX path +gives today. + +This is a **frontend/adapter**, not a second solver. The numerical core (the +Rust IPM, exposed via `pounce._pounce.Problem`) and the implicit-function-theorem +backward math are autodiff-framework-agnostic. PyTorch needs only a thin wrapper +layer — and because PyTorch is eager, that layer is *smaller* than the JAX one +(no `pure_callback` / `ShapeDtypeStruct` machinery). + +## Motivation / positioning + +pounce's differentiable layer is one Rust IPM with a KKT-based implicit backward. +JAX is the first frontend; making PyTorch a thin binding turns "a JAX library" +into "one numerical backbone under any autodiff frontend" — the same "one roof" +thesis extended from problem classes to autodiff frameworks. Precedent: +cvxpylayers ships JAX + PyTorch + TF bindings off one core (`diffcp`); theseus is +PyTorch-native for this class of layer. A large share of the ML/research audience +is PyTorch-first, so this widens reach materially for relatively contained effort. + +## What is already framework-agnostic (reuse as-is) + +1. **The solver core** — `pounce._pounce.Problem`. The boundary is already NumPy + (`_diff.py::_solve_once` / `host_call` do `np.asarray`). PyTorch CPU tensors + are zero-copy to/from NumPy, so the Rust side does not change at all. +2. **The implicit-function-theorem backward** — assemble the KKT block + `[[H, Jᵀ], [J, D]]`, solve against the cotangent, contract with the parameter + sensitivities (`_diff.py:128-208`). Pure linear algebra; reimplement with + `torch.linalg.solve` instead of `jnp.linalg.solve`. The active-set logic + (bound multipliers → `dx/dp = 0` on active coords; slack inequality rows + dropped via the identity-augment trick, pounce#73) ports line-for-line. + +## What is JAX-specific (needs a PyTorch equivalent) + +| JAX piece (file) | PyTorch equivalent | Notes | +|---|---|---| +| `jax.grad/jacrev/hessian` on user `f,g` (`_build.py`) | `torch.func.grad/jacrev/hessian` | `torch.func` mirrors JAX's API; near-mechanical | +| `@jax.custom_vjp` + `fwd`/`bwd` (`_diff.py`) | `torch.autograd.Function` + `forward`/`backward` | same split | +| `jnp.linalg.solve`, `jnp.where`, `jnp.diag` (KKT bwd) | `torch.linalg.solve`, `torch.where`, `torch.diag` | line-for-line | +| `jax.pure_callback` + `ShapeDtypeStruct` (`_diff.py::_pure_callback_solve`) | **dropped** | eager mode calls `problem.solve(...)` directly inside `forward` | +| global `jax_enable_x64` (`jax/__init__.py`) | `torch.float64` tensors | no global flag; validate float32 path is rejected/guarded | +| `jax.lax.map` / threadpool batching (`_diff.py::vmap_solve*`) | Python loop or `torch.func.vmap`; reuse the *same* `ThreadPoolExecutor` | parallel path is pure Python + Rust GIL-release — identical | +| sparse colored AD (`_build.py`, CPR coloring) | rebuild on `torch.func.jvp/vjp` | one JVP/HVP per color; biggest non-mechanical port | + +## Surface to port (parity target with `pounce.jax`) + +Map the public API in `python/pounce/jax/__init__.py`: + +- `from_jax` → `from_torch` (`_build.py`) — build a `Problem` from traced + `f(x)`, `g(x)`; gradient/Jacobian (with detected sparsity)/Lagrangian Hessian. +- `solve`, `solve_with_warm` → `_diff.py` — the `custom_vjp` → `autograd.Function` + port, incl. dual + μ warm-start threading (pounce#86). +- `vmap_solve`, `vmap_solve_parallel` → batched solves (loop + threadpool). +- `JaxProblem`, `AnchorState` → `TorchProblem` (`_problem.py`) — stateful builder + that caches the compiled AD artefacts, sparsity, and active-set masks for + iterative use. +- `PathFollower`, `PathTrace`, `inverse_map_rhs` → `_path.py` — predictor–corrector + path following. +- `QpLayer`, `solve_qp`, `solve_qp_batch`, `solve_socp` → `_qp.py` — the + differentiable conic layers (the headline "feasible-by-construction" layer). + +## Technical design + +- **Package:** `python/pounce/torch/` mirroring `python/pounce/jax/` file split + (`_build.py`, `_diff.py`, `_problem.py`, `_path.py`, `_qp.py`, `__init__.py`). +- **Optional dependency:** add `torch = ["torch>=2.2"]` to + `[project.optional-dependencies]` in `python/pyproject.toml` (alongside the + existing `jax` extra); import-guard with a useful error like the JAX path does. + `torch.func` (functorch, merged into core) requires torch ≥ 2.0; pin ≥ 2.2 for + a stable `torch.func` surface. +- **dtype:** require/validate float64 inputs (Newton + KKT solve need it). Either + cast internally or raise on float32, matching the JAX x64 rationale. +- **Differentiable backward:** keep the `backward` itself differentiable where + cheap (so double-backward works), as the JAX bwd does by staying in-framework. +- **Shared core, no duplication:** factor the framework-neutral solve/KKT-assembly + helpers so JAX and Torch adapters call common code where practical (the active-set + masking + KKT assembly is identical; only the array namespace differs). Consider + an array-API/duck-typed inner helper to avoid two copies of the backward. + +## Plan / phases + +**Phase 0 — scaffolding (small).** +Create `python/pounce/torch/__init__.py` with the import guard + `torch` extra in +`pyproject.toml`. CI: add a `torch` test job (CPU wheel). + +**Phase 1 — `solve` MVP (the proof point).** +Port `solve` (`from_torch` build + single `autograd.Function`). Validate +`torch.autograd.gradcheck` against finite differences and cross-check the gradient +numerically against the JAX `solve` on a shared fixture (e.g. `hs071`, +`inverse_map`). This phase alone demonstrates the whole thesis. + +**Phase 2 — batching + warm starts.** +`vmap_solve`, `vmap_solve_parallel` (reuse the threadpool), `solve_with_warm` +(dual + μ threading, pounce#86). Verify `autograd.Function` vmap protocol or fall +back to a loop. + +**Phase 3 — `TorchProblem` + sparse colored AD.** +Stateful builder caching AD artefacts; rebuild CPR coloring on `torch.func.jvp/vjp`. +This is the largest port — benchmark against `bench_sparse_ad_83`. + +**Phase 4 — conic layers.** +`QpLayer`, `solve_qp/_batch`, `solve_socp` — the feasible-by-construction layer +that most directly competes with cvxpylayers/theseus. + +**Phase 5 — path following + docs + parity tests.** +`PathFollower`/`inverse_map_rhs`; a docs page mirroring the JAX integration guide; +a parity test matrix asserting JAX and Torch agree to tolerance on shared fixtures. + +## Testing strategy + +- `torch.autograd.gradcheck` / `gradgradcheck` (float64) on every layer. +- **JAX↔Torch parity fixtures:** same `f,g,p` → assert `x*` and `dL/dp` match to + tolerance. Port the existing `python/tests/test_jax.py`, `test_qp_jax.py`, + `test_socp_jax.py`, `test_solver_session.py` as `test_*_torch.py`. +- Active-set edge cases that motivated pounce#73 (slack inequalities) — keep the + regression in the Torch suite too. + +## Open questions / risks + +- **`autograd.Function` + `vmap`:** the newer functorch vmap protocol needs a + `setup_context`/`vmap` staticmethod, or we loop. Decide per-layer. +- **GIL / threadpool parity:** confirm the `py.allow_threads` GIL-release around + `optimize_tnlp` benefits Torch callbacks the same way (it should — it's below + the Python layer). +- **Code reuse vs. duplication:** how much of the backward to share via a neutral + inner helper vs. two readable copies. Lean toward one shared helper if it stays + legible. +- **Dense KKT in the backward:** the current backward assembles a dense KKT and + uses `linalg.solve` (noted as a follow-up in `_diff.py:30-36` to move to the + Rust-side `pounce-sensitivity` sparse solve). That follow-up is + framework-independent — both frontends benefit once it lands; don't block the + Torch port on it. + +## References + +- `python/pounce/jax/__init__.py` — public surface to mirror. +- `python/pounce/jax/_diff.py` — `custom_vjp` + KKT backward (the core to port). +- `python/pounce/jax/_build.py` — model AD + sparsity detection. +- `python/pounce/jax/_qp.py`, `_path.py`, `_problem.py` — remaining surface. +- `python/pyproject.toml` — optional-dependency extras pattern. +- pounce#73 (slack-inequality active set), pounce#86 (μ warm-start). +- Prior art: cvxpylayers (`diffcp`), theseus. diff --git a/dev-notes/simplex-phase6.2-faer-lu.md b/dev-notes/simplex-phase6.2-faer-lu.md new file mode 100644 index 00000000..bb9174f9 --- /dev/null +++ b/dev-notes/simplex-phase6.2-faer-lu.md @@ -0,0 +1,161 @@ +# Simplex Phase 6.2 — sparse LU basis engine (faer + an in-house update layer) + +Design + record for replacing the hand-rolled dense LU basis engine +(`pounce-simplex/src/{lu,basis}.rs`) with a sparse factorization. + +**Status: IMPLEMENTED (PFI-on-faer).** `FaerBasis` is the production basis +engine; the dense engine is retained under `cfg(test)` as `DenseBasis`, the +lockstep oracle. The previously-parked HiGHS ill-scaled regression +(`tests/ill_scaled_obbt.rs`, GLOBALLib `ex4_1_2`) now passes and is a live +guard. Forrest–Tomlin remains the future optimization (see below). The +architecture below is what shipped. + +## The seam already exists + +The simplex driver never touches `B⁻¹`. It speaks to the basis through exactly +six entry points (`simplex.rs:264,308,339,354,490,529,538`): + +| Method | Contract | +|---|---| +| `identity(m)` | start basis `B = I` | +| `ftran(col, out)` | `out = B⁻¹ · col`, `col` a **sparse** column `&[(usize,f64)]` | +| `btran(row, out)` | `out = rowᵀ · B⁻¹`, `row` **dense** length `m` (forms `y = c_Bᵀ B⁻¹`) | +| `update(r, alpha)` | rank-1 product-form step; `alpha = B⁻¹ A_q` already FTRAN'd | +| `refactor(cols)` | rebuild from the sparse basic columns; `false` if singular | +| `updates_since_refactor()` | drives the `REFACTOR_INTERVAL = 50` cadence | + +So Phase 6.2 is a **backend swap behind a stable interface**, not an algorithm +change. Concretely: promote `Basis` to a trait `BasisEngine` with these six +methods, keep the current dense struct as `DenseBasis` (now a *test oracle*, not +the production path), and add `FaerBasis`. + +## What commercial / serious solvers actually do (the crux) + +Short answer to "do they roll their own to get the rank-1 update?": **yes, every +serious simplex does — because the update *is* the simplex, and no general LU +library provides it.** + +- **CPLEX, Gurobi, Xpress** (commercial) and **HiGHS, CLP/COIN** (open source) + all maintain their *own* basis factorization-and-update machinery. They do + **not** call a general-purpose LU (LAPACK, SuiteSparse, faer) for the per-pivot + work. +- The factorization itself is a sparse LU with **threshold (Markowitz) pivoting** + — trading fill against stability — i.e. the same *kind* of routine faer's + sparse LU is, but tuned for simplex bases. +- The per-pivot **update** is the in-house part: **Forrest–Tomlin** (and the + Suhl–Suhl refinement HiGHS uses), **Bartels–Golub**, or the older + **product-form of the inverse (PFI)**. HiGHS additionally exploits + *hyper-sparsity* in FTRAN/BTRAN (Huangfu & Hall). +- A general LU library gives you the **one-shot factorization** of a fixed + matrix. It does **not** give you "replace column `q` of an already-factored + basis cheaply." That gap is exactly what every simplex fills itself. + +**Implication for us:** the right division of labor is +**factorization = faer, update = ours.** faer replaces only the periodic +`refactor` (the hard, numerically-delicate sparse-LU-with-pivoting part — the +part it is *worth* not re-deriving, the same lesson as feral). The simplex update +layer on top stays in-house because it has to. We are not choosing between "faer" +and "roll our own" — a real simplex is *both*. + +## Architecture: faer factorization + PFI eta file + +`FaerBasis` holds the LU of the **base basis `B₀`** (as of the last refactor) +plus a list of **eta vectors**, one per pivot since: + +``` +B⁻¹ = E_t · … · E_1 · B₀⁻¹ (t = updates_since_refactor) +``` + +- **`refactor(cols)`** — assemble a faer `SparseColMat` from the basic columns + (each `(i, v)` in column `r` → triplet `(i, r, v)`), then + `factorize_symbolic_lu` → numeric factorization. Store the factors, **clear the + eta file**, reset the counter. faer returning a singular/zero-pivot error maps + to `false` — strictly more principled than today's absolute `best <= 1e-12` + threshold (`lu.rs:44`). +- **`ftran(col, out)`** — scatter the sparse `col` into a dense RHS, solve + `B₀ x = col` with faer, then apply `E_1 … E_t` forward. +- **`btran(row, out)`** — apply the etas in reverse as transposes, then a faer + **transpose** solve `B₀ᵀ y = …`. +- **`update(r, alpha)`** — push one eta `(r, alpha)`. Storage bounded by + `REFACTOR_INTERVAL`, exactly as today; the existing driver cadence + (`simplex.rs:538`) already caps the eta chain at 50 and refactors. + +This is a **faithful drop-in**: same eta semantics, same refactor cadence, same +`NumericalFailure` path — only the dense `B⁻¹` multiply and the scalar dense LU +are replaced by faer sparse solves + a sparse base factorization. + +### Why PFI first, Forrest–Tomlin later + +PFI is the *minimal* change that matches the current code's behavior 1:1, so it +isolates the variable under test (the factorization) from the update scheme. It +reuses the driver's `REFACTOR_INTERVAL` logic verbatim. **Forrest–Tomlin** (which +updates `U` directly with far better fill control, and is what HiGHS/CLP use) is +the right *next* step — but it's a bigger build and belongs after PFI is green +and benchmarked. Sequence: PFI-on-faer (6.2) → FT update + hyper-sparse +FTRAN/BTRAN (a later phase) if profiling says the refactors dominate. + +## Robustness & performance deltas vs. the current dense engine + +**Robustness (better):** faer does real sparse threshold pivoting and reports +singularity from the factorization rather than a fixed `1e-12` magnitude cutoff; +we keep the *factors* and back-solve instead of forming an explicit dense `B⁻¹` +(killing the known inverse-formation anti-pattern in `basis.rs`). Upstream +geometric equilibration still helps the ill-scaled `ex4_1_2` case — faer's +pivoting is additive to it, not a replacement. + +**Performance (better at scale, watch small `m`):** sparse fill-reducing ordering ++ supernodal blocked kernels (faer's `pulp` SIMD) replace the scalar triple-loop +`O(m³)` factor and the `O(m²)` dense `B⁻¹` apply. For sparse OBBT bases (typical) +this is an asymptotic win. For *very small* dense bases faer carries more +overhead — if profiling shows it, keep `DenseBasis` for `m` below some threshold. + +## API facts (resolved against faer 0.24 source) + +- Feature: the sparse solvers need `faer/sparse-linalg`; enabled additively in + `pounce-simplex/Cargo.toml` only (the workspace dep stays `["std"]`, and we do + **not** pull `rayon`, so the factorization is serial/deterministic). +- Factor: `SparseColMat::::try_new_from_triplets(m, m, &[Triplet])` + (sums duplicate `(row,col)` like the dense `+=`), then `.as_ref().sp_lu() + -> Result`. Type path: `faer::sparse::linalg::solvers::Lu`. +- Solve: the `faer::prelude::Solve` trait (blanket-impl'd for `SolveCore`) gives + `solve_in_place` (FTRAN base) and **`solve_transpose_in_place`** (BTRAN base) + on a `MatMut::from_column_major_slice_mut(&mut work, m, 1)`. The transpose + solve exists, so BTRAN needs no manual `Uᵀ`/`Lᵀ` decomposition. + +### One robustness gap found and closed + +faer's `sp_lu` flags only **structural** singularity (an empty basic column); a +structurally-full but **numerically** singular basis (e.g. two equal columns) +factors without error, leaving a zero pivot in `U`. The dense engine caught this +via its absolute pivot threshold. `FaerBasis::refactor` closes the gap with a +cheap **probe solve** after factoring: a zero `U` pivot makes the back-solve +divide by zero, so a non-finite result ⇒ the basis is unusable ⇒ `refactor` +returns `false` (the `NumericalFailure` path). Merely *ill-conditioned* (not +exactly singular) bases are left to upstream equilibration + periodic refactor, +as production simplex codes do. + +## Validation plan (the payoff of keeping `DenseBasis`) + +The crate doc already promises the dense engine is "the correctness baseline it +will be validated against." Make that literal: + +1. **Lockstep oracle test** — a `#[cfg(test)]` `BasisEngine` wrapper that runs + `DenseBasis` and `FaerBasis` side by side on every FTRAN/BTRAN and asserts + agreement to tolerance, over randomized pivot sequences. +2. **Existing regressions must stay green** — `ill_scaled_obbt.rs` (warm sweep + + cold, HiGHS reference) and the `basis.rs` unit tests, now run against + `FaerBasis`. +3. **Solver-level parity** — the full `pounce-simplex` and `pounce-global` OBBT + suites unchanged; spot-check objectives against HiGHS on a few GLOBALLib LPs. + +## Step order + +1. Add `faer` to `pounce-simplex/Cargo.toml` (first dependency — accepted: the + factorization is worth not re-deriving). +2. Extract `trait BasisEngine`; make the driver generic over it (or enum-dispatch + `Dense | Faer`); current `Basis` becomes `DenseBasis`, unchanged. +3. Implement `FaerBasis` (refactor → ftran → btran → update), verifying the solve + API fact above first. +4. Land the lockstep oracle test; run the OBBT suites. +5. Default the driver to `FaerBasis`; keep `DenseBasis` behind `#[cfg(test)]` as + the permanent oracle. diff --git a/dev-notes/socp-extension.md b/dev-notes/socp-extension.md new file mode 100644 index 00000000..1e6bf6bb --- /dev/null +++ b/dev-notes/socp-extension.md @@ -0,0 +1,184 @@ +# SOCP extension for the convex IPM — design note + +**Status: Phases 1 + 2 landed — pounce solves SOCPs.** Captures the design +for adding a second-order cone (SOC) to `pounce-convex`'s interior-point +solver. Phase 1 (the `CompositeCone` refactor) and Phase 2 (the NT scaling, +the generalized dense-block KKT, and `solve_socp_ipm`) are implemented and +validated; the remaining items (cone-aware presolve gating, SOC warm +start, low-rank KKT for large cones, cone-aware differentiable layer) are +scoped below. + +## Outcome (Phases 1–2) + +`solve_socp_ipm(prob, &[ConeSpec], …)` solves `min ½xᵀPx+cᵀx s.t. Ax=b, +Gx ⪯_K h` over a product of nonnegative-orthant and second-order cones, +with closed-form-validated optima (norm minimization, linear-over-SOC, +Euclidean projection onto a cone) and a mixed orthant+SOC case — see +`tests/socp.rs`. Correctness is **intrinsic**: the IPM only reports +`Optimal` at a verified KKT point (residual below tolerance, `s,z` kept in +the cone), so no external reference solver is needed. The NT reduced +system (`block = W⁻² = η²Q_{w̄}`, `rhs = Arw(z)⁻¹ r_comp`, `recover_ds = +−rhs − W⁻²dz`) was derived to be self-consistent and reduces exactly to +the orthant in 1-D; the orthant LP/QP path is byte-identical (all prior +tests pass). + +## Motivation + +`pounce-convex` today solves LP/QP over the nonnegative orthant (plus a +box, expanded into orthant rows). Adding SOC moves pounce into the +*second-order cone program* class — the same problem class differentiable +GPU solvers (Moreau) and general conic solvers (Clarabel) target, and the +single highest-leverage gap versus them. Everything pounce already has — +presolve with dual postsolve, warm starting, rayon batching, symbolic +factor reuse, the JAX/OptNet differentiable layer — then applies to a much +larger problem class. + +## What the driver already abstracts vs. bakes in + +The [`cones::Cone`](../crates/pounce-convex/src/cones/mod.rs) trait already +owns `mu`, `scaling_diag`, `comp_residual`, `comp_residual_corrector`, +`recover_ds`, `max_step`, and `run_ipm` calls them generically. The +residuals (`r_d, r_p, r_g` via matvecs), `split_step`, factor reuse, and +the predictor–corrector structure are cone-agnostic. + +Two orthant assumptions are **baked into the driver** and are the crux for +SOC: + +1. **The `(z,z)` KKT block is diagonal.** `KktStructure` allocates exactly + one entry per inequality row (`z_diag_pos[i]`); `update_scaling` writes + `-scaling[i] - reg` there; `scaling_diag` returns a *vector*. SOC's + Nesterov–Todd block `W²` is dense within each cone (diagonal + rank-1), + so a per-row diagonal cannot represent it. +2. **`build_rhs` divides by `z` elementwise** (`-r_g[i] + r_c[i]/z[i]`) — + the orthant's analytic elimination of the slack block. SOC replaces + `1/z` with an NT-scaled apply. + +## The math SOC adds + +Jordan algebra of `K = { (s₀, s₁) : s₀ ≥ ‖s₁‖₂ }`, with +`J = diag(1,−1,…,−1)`, identity `e = (1,0,…,0)`, product +`(s∘z)₀ = sᵀz`, `(s∘z)₁ = s₀ z₁ + z₀ s₁`. + +- **Rank / degree = 2** per SOC (independent of dimension): + `μ = ⟨s,z⟩ / Σ rank`, orthant contributes `n`, each SOC contributes `2`. +- **NT scaling.** With `det(u) = u₀² − ‖u₁‖²`, + `η = (det(s)/det(z))^{1/4}`, normalized `s̃ = s/√det(s)`, + `z̃ = z/√det(z)`, `γ = √((1 + s̃ᵀz̃)/2)`, scaling point + `w̄ = (s̃ + J z̃)/(2γ)`. The KKT block is + ``` + W² = η²(2 w̄ w̄ᵀ − J) = η²·diag(−1, 1, …, 1) + 2η²·w̄ w̄ᵀ + ``` + i.e. **diagonal + rank-1** — the structure that enables the sparse + expansion. +- **Step to boundary** (`max_step`): largest `α` keeping `v + α dv` in + `int(K)` — the smaller positive root of `det(v + α dv) = 0`, capped at 1. +- **Self-dual:** `K* = K`. Dual feasibility is `z ∈ K`; the verified + Farkas/recession certificates change from `z ≥ 0` / `Gd ≤ 0` to + `z ∈ K` / `Gd ∈ −K`. + +## Architecture + +### Composite cone (Phase 1) + +The inequality block becomes a *product* of cones +`K = R₊^{n₀} × SOC(m₁) × SOC(m₂) × …`. A `CompositeCone` owns an ordered +list of `(offset, ConeKind)` blocks and dispatches every `Cone` method +block-wise (slicing `s`/`z`/`out` per block; `mu` sums `⟨s,z⟩` and ranks; +`max_step` takes the min). `ConeKind` is a closed enum (`Nonneg`, later +`SecondOrder`) — no `dyn` dispatch. The driver holds a `CompositeCone` +instead of a bare `NonnegCone`. With a single `Nonneg` block this is +bit-identical to today (Phase 1's correctness guarantee). + +### Problem cone declaration (Phase 2) + +```rust +pub enum ConeSpec { Nonneg(usize), SecondOrder(usize) } // dims, row order +// QpProblem gains: pub cones: Vec (empty ⇒ all-nonneg, back-compat) +``` +Bounds keep expanding into `Nonneg` rows; SOC constraints append +`SecondOrder(mₖ)` blocks to `G`/`h`. Riding on `QpProblem` (rather than a +new type) keeps presolve / warm-start / batch / factor-reuse working +through the existing paths. + +### Trait extension (Phase 2) + +Promote the two baked-in operations to the trait: +```rust +fn kkt_block(&self, s, z, reg) -> ConeBlock; // Diagonal | Dense | DiagPlusLowRank +fn rhs_comp_term(&self, s, z, r_c, out); // generalizes r_c / z +``` +`KktStructure`/`build_rhs` consume these instead of assuming diagonal. + +### KKT `(z,z)` block: two tiers + +- **Tier A (dense block, first):** reserve a dense lower-triangular + `mₖ×mₖ` block per SOC; fill from `W²` each iteration. Correct and + simple; fine for `mₖ ≲ 10–20`. Localized to `KktStructure::build` + (layout) and `update_scaling` (write). +- **Tier B (sparse low-rank, later):** exploit `W² = D + ρ vvᵀ` — add 1–2 + auxiliary rows/cols per SOC so the augmented `(z,z)` stays + diagonal-plus-sparse (ECOS/Clarabel trick), preserving fill on large + cones. + +## Presolve extension + +Postsolve (transaction stack + global dual recovery) is unaffected — SOC +multipliers pass through the `kept_ineq` mapping. Reduction *detection* +must be **gated per cone**: + +- *Keep, gated to nonneg/box rows & cols:* empty rows, fixed-var, free / + free-singleton columns, duplicate / parallel rows — only when the + rows/cols are not part of an SOC block (an SOC's rows are coupled). +- *Skip SOC rows:* activity-bound, forcing, dominated columns, bound + tightening — these are `≤`-row reductions with no per-row meaning for a + cone constraint. Add a "row ∈ SOC block ⇒ skip" guard in the detection + passes. + +## Warm-start extension + +The adaptive recentering generalizes by replacing the positivity floor on +`s`/`z` with a floor on the **distance to the cone boundary** +`λ_min = s₀ − ‖s₁‖`, projecting the warm point back to `int(K)`. Same +structure, cone-aware primitive. Cold start seeds SOC blocks at the cone +identity `e = (1,0,…,0)`, not `1`. + +## Differentiable-layer extension (last) + +The OptNet backward currently linearizes complementarity as +`diag(λ)`/`diag(slack)` — pure orthant. SOC needs the Jordan-product / NT +differential (arrow blocks instead of `diag`). The forward already returns +`(x, z)` regardless of cone; only the backward KKT differential is +cone-specific. **Ship SOC forward/solve first; keep the differentiable +layer LP/QP-only**, then add cone-aware implicit diff as a distinct +follow-up (derive + finite-difference-validate per cone, as for the matrix +gradients). + +## Phased plan + +| Phase | Scope | Risk | +|---|---|---| +| **1** | `CompositeCone` + `ConeKind`; driver routed through it; `NonnegCone` behind it. **No behavior change.** | low — pure refactor, existing tests guard it | +| 2 | `ConeSpec` on `QpProblem`; trait gains `kkt_block`/`rhs_comp_term`; `SecondOrderCone` NT scaling; **Tier-A dense KKT block**; cold start at `e`; cone `max_step`/`mu`; solve standard-form SOCPs | **medium-high** — NT reduced-system algebra; validate vs known optima + a reference solver | +| 3 | Cone-aware infeasibility certificates; per-cone presolve gating | low–medium | +| 4 | Warm-start recentering on `λ_min`; SOCP input plumbing (CLI/`.nl`/Python wrapper) | low–medium | +| 5 | Tier-B sparse low-rank KKT expansion (large cones) | medium — fill/perf, not correctness | +| 6 | Cone-aware differentiable layer (JAX) | medium-high — new dual-diff derivation | + +The single highest-risk artifact is the NT reduced-system algebra in +Phase 2 (`kkt_block` + `rhs_comp_term` + `recover_ds` must be mutually +consistent). Validate it the way everything else in this crate is: +known-optima tests plus a randomized KKT-residual check against a trusted +SOCP solver. + +## Phase 1 — what lands now, and what is deliberately deferred + +**Lands:** `CompositeCone`/`ConeKind` and the driver routed through a +single-`Nonneg` composite. This is a pure internal refactor: no public API +change, no behavior change, fully guarded by the existing convex test +suite. It creates the block-dispatch seam every later phase plugs into. + +**Deferred to the start of Phase 2** (to avoid dead scaffolding that could +rot): the `QpProblem.cones` field and the `kkt_block`/`rhs_comp_term` +trait methods. They only earn their keep once a non-diagonal cone exists, +and adding them against an only-diagonal implementation now would be +unused surface. Phase 2 introduces them together with `SecondOrderCone`. diff --git a/dev-notes/vision.md b/dev-notes/vision.md new file mode 100644 index 00000000..8943ac87 --- /dev/null +++ b/dev-notes/vision.md @@ -0,0 +1,173 @@ +# pounce — vision / positioning + +> Draft for discussion. The goal is a statement that says *where pounce sits* +> in the optimization-software landscape and *why it is different*, not a +> feature list. Three candidate framings below, then the supporting pillars +> and the one-liners they roll up into. + +--- + +## The one-sentence version (lead candidate) + +**pounce is one pip-installable optimization stack — LP through MINLP — built +to live inside modern ML and agent pipelines: differentiable where you need a +solver in the loop, constraint-guaranteeing where you need the answer to be +*feasible*, and legible to the LLMs and agents that increasingly drive the +modeling.** + +--- + +## Why now — the gap pounce fills + +The optimization-software world is split into camps that don't talk to each +other: + +- **Classical solvers** (Ipopt, the commercial MI(N)LP engines) are fast and + trustworthy but live behind C/Fortran ABIs and file formats. They were built + before autodiff frameworks and before LLMs, and they treat the solve as a + black box you call once and read the log of. +- **Differentiable-optimization layers** (cvxpylayers, theseus, the + implicit-diff toolkits) plug a solver into JAX/PyTorch, but each covers a + narrow problem class (usually convex QP/cone programs), ships its own + numerics, and stops at the boundary of what its backend can express. +- **Modeling layers** (Pyomo, JuMP, CVXPY) are great for humans authoring + models, but the solver underneath is still an opaque dependency you install + separately and debug by hand. + +pounce's bet is that these stop being separate concerns. One numerical +backbone should: + +1. **span the whole ladder** — LP, QP, SOCP, SDP, exp/power cones, general + NLP, and certified-global nonconvex — so a project never hits a wall where + the problem class outgrew the tool; +2. **be differentiable as a first-class mode** — the solver is a layer you can + put *inside* a learned model and backprop through, not just a thing you call + at the end; +3. **guarantee feasibility** — a differentiable layer whose forward pass is a + real interior-point solve returns a point that *satisfies the constraints*, + which a learned approximator can't promise; +4. **be legible to agents** — the same diagnostics a human reads are exposed + over MCP, so an LLM can author, run, and *debug* a model end to end. + +--- + +## The four pillars (what makes the claim true today) + +### 1. One roof: LP → MINLP, `pip install`, pure Rust +- `pip install pounce-solver` gets the whole family: the Ipopt-faithful NLP + core, the convex/conic IPM (`pounce-convex`), SOS/Lasserre global, and the + spatial branch-and-bound global solver (`pounce-global`). +- Pure Rust by default — no Fortran, no HSL, no system BLAS. One wheel, every + platform, reproducible. This is the thing that makes "one roof" not just a + slogan: there is genuinely one numerical backbone, not a meta-package + shelling out to six binaries. +- `auto` routing classifies a problem and sends it to the right solver, so the + "ladder" is invisible until you need to reason about it. +- The discrete top of the ladder is [discopt](https://github.com/jkitchin/discopt): + a MINLP modeling language + spatial branch-and-bound that uses pounce as its + primary NLP backend. Co-designed rather than plugged in — warm state, dual + bounds, infeasibility certificates, the shared AD/problem IR, and the debug + surface flow through the B&B tree instead of being rebuilt per node — so + pounce+discopt behave like *one MINLP engine*, not a B&B loop dispatching to a + generic solver. See `dev-notes/discopt-pounce-integration.md`. + +### 2. Differentiable optimization that guarantees constraints +- `pounce.jax`: `from_jax` builds a solver problem straight from traced + `f(x)`, `g(x)`; `solve` is wrapped in `jax.custom_vjp` so `jax.grad` flows + through a solve via the implicit-function theorem on the KKT system. +- `QpLayer` / `solve_qp` / `solve_socp`: differentiable conic layers whose + forward pass is a *real* IPM solve — the returned point is feasible by + construction, not a learned projection that's "close." This is the headline + for ML: a constraint layer you can trust. +- Built for the loop, not the one-shot: warm starts, factor reuse across a + path (`PathFollower`), batched/parallel solves, sparse colored AD so the + derivative cost scales with structure, not dimension. +- **Framework-agnostic by construction.** The differentiable layer is *one Rust + IPM with a KKT-based implicit backward* — the autodiff framework is just a + frontend over it. JAX is the first; PyTorch is a thin adapter, not a rewrite + (the solver core and the implicit-function-theorem math don't change — only + the array namespace and the `custom_vjp`↔`autograd.Function` wrapper do). This + is the "one roof" thesis extended from problem classes to autodiff + frameworks, and it's where cvxpylayers/theseus-style projects ship *separate* + per-framework numerics while pounce ships one backbone under both. Tracked in + [#109](https://github.com/jkitchin/pounce/issues/109). + +### 3. Native to ML pipelines (JAX today, PyTorch next) +- x64-correct, JIT-compatible, vmap-aware. The integration is designed around + how JAX actually composes (custom batching rule rather than lifting an impure + callback), not bolted on. +- A PyTorch frontend ([#109](https://github.com/jkitchin/pounce/issues/109)) + mirrors the same surface — and is *smaller* to build, because PyTorch's eager + mode drops the `pure_callback`/shape-declaration machinery JAX's traced model + forces. Reaching the PyTorch-first half of the ML/research audience is mostly + binding work, not new numerics. +- The target user is someone building a model where *part* of the forward pass + is "solve this optimization exactly" — inverse problems, control/MPC layers, + structured prediction, physics- or constraint-informed learning. + +### 4. Legible to agents and LLMs (the differentiator) +- An interactive solver **debugger**: break into a live solve, inspect the + iterate (primals, duals, KKT residuals, μ, inertia), sweep/multistart/replay. +- The same diagnostics exposed over **MCP** (`pounce-studio`), so an LLM agent + can analyze a model, run it, read the convergence trace, and explain *why* it + stalled — closing the loop from "agent writes a model" to "agent debugs the + solve." Few, if any, classical solvers were designed to be driven this way. +- Signed solve receipts (`pounce verify`) — verifiable provenance for an + answer, which matters when an agent (not a human) is the one trusting it. + +--- + +## For teaching & research (the legibility pillar, pointed at people) + +The same introspection that makes pounce legible to agents makes it a teaching +and research instrument no other solver can match. Plot it on two axes — +**introspectable internals** × **LLM-grounded explanation** — and the quadrant +pounce occupies is empty: classical solvers (Ipopt, SNOPT) print a log wall with +no live debugger and no LLM; commercial engines (Gurobi, BARON) are black boxes +by design; modeling layers (CVXPY, Pyomo) leave the solver opaque; and toy +teaching solvers aren't faithful to a production algorithm, so nothing transfers. +pounce is a **faithful production algorithm** (the Ipopt port — skills transfer) +that is **fully introspectable** and **explained by an LLM grounded in the real +trace and the literature**. + +- **Education** — a glass-box IPM students watch *run* (μ, inertia, filter, + restoration), a TA-over-MCP that reads *their* trace and explains the stall in + algorithm terms with a citation, a zero-setup classroom (`pip install`, pure + Rust, no licenses), and assignments graded on the *process* (the signed, + reproducible solve report), not just the final number. +- **Research** — the iteration trace as a reproducible dataset, a hackable + faithful baseline to perturb (swap a barrier rule and A/B it in one readable + Rust codebase), an LLM that drives the MCP surface to *run and write up* + experiments, and one diagnostic lens across NLP / conic / global / MINLP. + +This is publishable in its own right — an LLM-drivable interactive debugger for +interior-point methods as a pedagogical and research instrument. Full treatment +in `dev-notes/education-research.md`. + +--- + +## Taglines to choose from + +- *"From LP to MINLP, in your ML pipeline and your agent's hands."* +- *"The solver that's differentiable, feasible, and legible — under one pip + install."* +- *"One numerical backbone for the whole optimization ladder — built for the + era of differentiable programs and AI agents."* +- *"Optimization that ML can backprop through, agents can drive, and you can + trust to be feasible."* + +--- + +## What we are *not* claiming (keep it honest) + +- Not (yet) competing on raw speed with mature commercial MI(N)LP engines. +- "MINLP under one roof" is the *trajectory*: NLP + convex/conic + certified + global B&B are here; the integer side is the spatial-B&B path maturing toward + general MINLP. State it as direction, not a finished checkbox, until the + mixed-integer story is fully wired. +- Differentiable-everything is real for the convex/QP/NLP layers; be precise + about which classes have the `custom_vjp` path today. +- "Any autodiff frontend" is **JAX today, PyTorch tracked** ([#109](https://github.com/jkitchin/pounce/issues/109)), + not both-shipping. The architectural claim (one framework-agnostic core) is + true now; the PyTorch *binding* is roadmap. Don't imply a shipped PyTorch + package until the adapter lands. diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index 8c3868c2..91a369f3 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -6,11 +6,15 @@ - [Installation](installation.md) - [Quick Start](quick-start.md) +- [Choosing a Solver](choosing-a-solver.md) # Command-Line Interface - [Running Solves](cli.md) - [Solver Options](options.md) +- [LP / QP Solver Routing](lp-qp-routing.md) +- [Convex Solver: LP, QP, SOCP](convex-solver.md) +- [Global Optimization](global-optimization.md) - [Solution Output](solution-output.md) - [JSON Solve Report](json-output.md) - [Schema v1 Reference](schema/solve-report-v1.md) diff --git a/docs/src/acknowledgments.md b/docs/src/acknowledgments.md index 7b904429..ea58a944 100644 --- a/docs/src/acknowledgments.md +++ b/docs/src/acknowledgments.md @@ -1,16 +1,38 @@ # Acknowledgments -POUNCE is a Rust port of [Ipopt](https://github.com/coin-or/Ipopt), -the interior-point nonlinear programming solver by Andreas Wächter, -Lorenz T. Biegler, and the COIN-OR community. Its algorithm, console -output, and option semantics are modeled directly on that codebase, -which is released under the EPL-2.0. +POUNCE's nonlinear-programming core is a Rust port of +[Ipopt](https://github.com/coin-or/Ipopt), the interior-point nonlinear +programming solver by Andreas Wächter, Lorenz T. Biegler, and the COIN-OR +community. Its algorithm, console output, and option semantics are modeled +directly on that codebase, which is released under the EPL-2.0. It is a sibling of [ripopt](https://github.com/jkitchin/ripopt), an earlier memory-safe interior-point NLP optimizer in Rust by the same author (DOI [10.5281/zenodo.19542664](https://doi.org/10.5281/zenodo.19542664)). +## Convex solver inspiration + +The specialized convex conic solver (`pounce-convex`; see +[Convex Solver](convex-solver.md)) is a pure-Rust port of ideas — not a +wrapper — from two reference projects, gratefully acknowledged: + +- [**Clarabel**](https://github.com/oxfordcontrol/Clarabel.rs) by Paul + Goulart and Yuwen Chen (University of Oxford). POUNCE's + homogeneous-free conic interior-point design — a quadratic objective + handled directly over a product of symmetric cones, with + Nesterov–Todd scaling for the second-order cone and a + diagonal-plus-rank-1 sparse KKT representation — follows Clarabel's + approach. Clarabel is itself a pure-Rust solver; POUNCE shares the + spirit but is an independent implementation. +- [**PaPILO**](https://github.com/scipopt/papilo), the presolving + library of [**SCIP**](https://www.scipopt.org/) (the Zuse Institute + Berlin optimization suite). POUNCE's transaction-stack presolve with + full primal **and dual** postsolve — forcing constraints, dominated + columns, bound tightening with global dual recovery, parallel/duplicate + rows, iterated to a fixpoint — is modeled on PaPILO's catalog and + postsolve discipline. + ## Contributors - **David Bernal Neira** ([@bernalde](https://github.com/bernalde)) @@ -54,6 +76,25 @@ author (DOI Software* 30(2), 118–144 (2004). DOI [10.1145/992200.992202](https://doi.org/10.1145/992200.992202) — the optional `ma57` linear-solver backend. +- Goulart, P.J., Chen, Y. "Clarabel: An interior-point solver for + conic programs with quadratic objectives." (2024). + [arXiv:2405.12762](https://arxiv.org/abs/2405.12762) / + [Clarabel.rs](https://github.com/oxfordcontrol/Clarabel.rs) — the + conic interior-point design behind `pounce-convex`. +- Gleixner, A., Gottwald, L., Hoen, A. "PaPILO: A Parallel Presolving + Library for Integer and Linear Optimization with Multiprecision + Support." *INFORMS Journal on Computing* 35(6), 1329–1341 (2023). DOI + [10.1287/ijoc.2022.0171](https://doi.org/10.1287/ijoc.2022.0171) — + the presolve catalog and dual-postsolve model behind + `pounce-convex::presolve`. +- Domahidi, A., Chu, E., Boyd, S. "ECOS: An SOCP solver for embedded + systems." *European Control Conference* (2013), 3071–3076. DOI + [10.23919/ECC.2013.6669541](https://doi.org/10.23919/ECC.2013.6669541) + — the sparse second-order-cone KKT representation. +- Amos, B., Kolter, J.Z. "OptNet: Differentiable Optimization as a + Layer in Neural Networks." *ICML* (2017), 136–145. + [arXiv:1703.00443](https://arxiv.org/abs/1703.00443) — the implicit + differentiation behind the `pounce.jax` convex layers. - Wilkinson, M.D. et al. "The FAIR Guiding Principles for scientific data management and stewardship." *Scientific Data* 3, 160018 (2016). DOI diff --git a/docs/src/choosing-a-solver.md b/docs/src/choosing-a-solver.md new file mode 100644 index 00000000..784619bc --- /dev/null +++ b/docs/src/choosing-a-solver.md @@ -0,0 +1,190 @@ +# Choosing a Solver + +POUNCE is not a single solver but a small family of them sharing one +numerical backbone. This page is the map: what each solver is, when to +reach for it, and how they fit together. + +![POUNCE solver landscape](images/solver-landscape.svg) + +The one-sentence version: **convex and conic problems are solved to the global +optimum; nonconvex problems are solved locally by default, or to a certified +global optimum via the SOS (polynomial) and spatial branch-and-bound (general) +paths.** Every solver, whatever its flavor, ultimately factorizes a symmetric +KKT system through the shared `pounce-linsol` layer, which in turn drives a +pluggable backend (FERAL by default, HSL MA57 optionally). + +## The solvers at a glance + +| Solver | Problem class | Optimum | Crate | Entry points | +|---|---|---|---|---| +| **NLP filter-IPM** | general smooth NLP (nonconvex OK) | local (KKT) | `pounce-algorithm` + `pounce-nlp` | CLI default; Python `Problem`/`minimize`; `--solver nlp` | +| **NLP active-set SQP** | general smooth NLP | local | `pounce-algorithm` (subproblems via `pounce-qp`) | `algorithm=active-set-sqp` | +| **Convex IPM (LP/QP)** | LP, convex QP | **global** | `pounce-convex` | `solve_qp_ipm`; `pounce.qp.solve_qp`; `--solver lp-ipm`/`qp-ipm` | +| **Convex IPM (conic)** | SOCP, exponential, power, PSD (small) cones | **global** | `pounce-convex` | `solve_socp_ipm`; `pounce.qp.solve_socp`; `pounce .cbf` | +| **Active-set QP** | QP, convex *or* indefinite | local | `pounce-qp` | `ParametricActiveSetSolver`; `--solver qp-active-set` | +| **SOS / Lasserre** | polynomial (nonconvex) | **global** | `pounce-convex` | `sos_minimize`; `pounce.sos_minimize` | +| **Spatial branch-and-bound** | general factorable nonconvex NLP | **global** | `pounce-global` | `solve_global` | + +## When to choose each + +### General nonlinear program (the common case) → **NLP filter-IPM** + +If your model has nonlinear objective or constraints and you don't know +(or can't assume) convexity, this is the default and the most mature path. +It is POUNCE's port of Ipopt's filter line-search interior-point method: +robust on nonconvex problems, with a feasibility **restoration phase** for +hard starts and exact or limited-memory Hessians. It returns a local +KKT point — for a nonconvex problem there is no global guarantee. + +- CLI: `pounce model.nl` (or a built-in problem). +- Python: the cyipopt-style `Problem` class, or the scipy-style + `minimize` facade. +- Reach for **limited-memory** Hessians (`hessian_approximation=limited-memory`) + when second derivatives are unavailable or expensive. + +### A *sequence* of related NLPs, or a stable active set → **NLP active-set SQP** + +Selected with `algorithm=active-set-sqp`. It solves the NLP as a sequence +of quadratic subproblems (handed to `pounce-qp`), which warm-starts +extremely well when the active set is stable across solves — e.g. a +parametric sweep or a control loop. For a single cold solve of a general +NLP, prefer the filter-IPM. + +### Linear or convex quadratic program → **Convex IPM (LP/QP)** + +If `P ⪰ 0` (or `P = 0` for an LP), use the convex interior-point solver: +it returns the **global** optimum, detects primal/dual infeasibility, and +offers warm-starting, batched and multiple-RHS solving, a build-once / +solve-many `QpFactorization` handle, and post-optimal **sensitivity** +(`QpSensitivity` — the sIPOPT analog). The CLI's `auto` routing classifies +an `.nl` and sends LP/convex-QP problems here automatically. + +- Python: `pounce.qp.solve_qp` (and `solve_qp_batch`, `solve_qp_multi_rhs`). + +### Second-order, exponential, or power cones → **Convex IPM (conic)** + +The same convex solver handles conic programs: second-order cones, the +**exponential** and **power** cones that express geometric programming, +entropy / log-sum-exp, logistic models, and `p`-norm constraints, and the +**positive-semidefinite** cone for small dense SDPs. Also **global**. This +is the path to use when you can cast a nominally-nonconvex problem into a +convex cone — you trade modeling effort for a global guarantee. (The PSD +cone is self-scaled and runs on the symmetric driver; the exp/power cones +run on the non-symmetric HSDE driver, so the two families can't yet be +mixed in one problem.) + +- Python: `pounce.qp.solve_socp(..., cones=[("exp", 3), ("pow", 0.5), ...])`. +- CLI: a Conic Benchmark Format file, `pounce model.cbf` (see the CBLIB + benchmark tier). + +### Nonconvex problem, global optimum required → **SOS** or **spatial branch-and-bound** + +When the problem is genuinely nonconvex and a *local* optimum is not good +enough, two paths certify the **global** optimum: + +- **Polynomial** objective/constraints → **SOS / Lasserre** (`sos_minimize`, + or `pounce.sos_minimize`). A single semidefinite program certifies the global + minimum (the largest `γ` with `p − γ` in the Putinar cone), and the global + minimizers are recovered from the moment matrix — even multiple ones, via a + facial-reduction step. Best for modest degree and dimension; the SDP grows + with the relaxation order. +- **General factorable** problems (including `exp`/`ln`/trig), or polynomials + too large for the SDP → **spatial branch-and-bound** (`pounce-global`, + `solve_global`). It brackets the optimum between a McCormick relaxation lower + bound and a local-solve upper bound, subdividing until they meet — returning a + feasible point and a certified optimality gap. Continuous variables only (no + MINLP yet). + +See [Global Optimization](global-optimization.md) for both in depth. + +### Indefinite QP, or a QP inner-solver → **Active-set QP** + +`pounce-qp` is a sparse parametric active-set solver that accepts an +**indefinite** Hessian (via inertia control), with two-sided bounds and +factorization-reuse across a homotopy. It is the engine behind the +active-set SQP path, and is the right choice for MPC-style problems or any +setting where you re-solve a slowly-changing QP many times. Use the convex +IPM instead when `P ⪰ 0` and you want a single robust solve with +infeasibility certificates. + +## How to override the automatic routing + +The CLI classifies each `.nl` problem and picks a solver, but you can force +the choice: + +```sh +pounce model.nl --solver auto # default: classify, then route +pounce model.nl --solver nlp # filter-IPM (or active-set-sqp via algorithm=) +pounce model.nl --solver lp-ipm # convex LP interior-point +pounce model.nl --solver qp-ipm # convex QP interior-point +pounce model.nl --solver qp-active-set # active-set QP +pounce model.nl --solver global # spatial branch-and-bound (global) +``` + +(The CLI spelling of the option is `solver_selection=`, e.g. +`pounce model.nl solver_selection=global`.) The global solver needs a **finite +box**: variables left unbounded in the `.nl` are capped to a large default with +a warning, and the certified optimum is then global only within that box. + +See [LP / QP Solver Routing](lp-qp-routing.md) for how classification works +and when it falls back to the more general solver. + +## The shared backbone + +Every interior-point and active-set solver above assembles a symmetric KKT +system and factorizes it through **`pounce-linsol`**. That trait layer is +backend-agnostic: + +- **FERAL** (`pounce-feral`) — a pure-Rust sparse symmetric LDLᵀ + factorization. The default; no external dependencies. +- **HSL MA57** (`pounce-hsl`) — the well-known Harwell solver via + `libcoinhsl`, enabled with the `ma57` build feature for large or + ill-conditioned systems. + +Because the backend is pluggable, the same solver code runs on either +without change. + +## Cross-cutting layers + +These are not solvers you select, but stages and tools the solvers share: + +- **Presolve** (`pounce-presolve`) — an optional front-end that tightens + bounds (feasibility-based bound tightening), removes redundant rows, and + repairs LICQ degeneracies before the solve. +- **Restoration** (`pounce-restoration`) — the feasibility-recovery phase + the filter-IPM enters when a step cannot reduce both infeasibility and + the objective; `pounce-l1penalty` offers an ℓ₁-exact penalty + reformulation for degenerate / LICQ-violating problems. +- **Sensitivity** — `pounce-sensitivity` gives sIPOPT-style parametric + steps and reduced Hessians for the NLP; `QpSensitivity` does the same for + the convex QP. See [Sensitivity Analysis](sensitivity.md). +- **Cone library** (`pounce-convex`) — nonnegative, second-order, + exponential, power, and (for small dense problems) positive-semidefinite + cones, so small SDPs solve as a convex class. The PSD cone cannot yet be + mixed with the exponential/power cones in one problem (they use different + drivers). +- **Solve report** — every path can emit the machine-readable + `pounce.solve-report/v1` JSON (status, iterations, residuals, timing). + See [JSON Solve Report](json-output.md). + +## Global vs. local — the honest summary + +POUNCE settles a problem globally along three routes, and locally along one: + +- **Global by convexity** — LP, convex QP, SOCP, and the exponential / power / + PSD cone classes. Local *is* global, so a convex or conic reformulation buys + the guarantee outright. +- **Global by certificate (polynomials)** — the SOS / Lasserre optimizer + certifies the global minimum of a nonconvex polynomial from a single SDP. +- **Global by branch-and-bound (general nonconvex)** — `pounce-global` does + deterministic spatial branch-and-bound with McCormick relaxations, FBBT/OBBT + bound tightening, and local upper bounds, returning a certified optimality + gap. Continuous variables only for now (no MINLP); see + [Global Optimization](global-optimization.md). +- **Local for general NLP** — the filter-IPM and SQP paths converge to a KKT + point, which for a nonconvex problem carries no global guarantee. + +Two practical levers for a "global" answer: **modeling** (cast as much as you +can into the convex cone library) and, when that is not possible, the +**global solvers** above — SOS for polynomials, spatial branch-and-bound for +everything factorable. diff --git a/docs/src/convex-solver.md b/docs/src/convex-solver.md new file mode 100644 index 00000000..1fd8c9cd --- /dev/null +++ b/docs/src/convex-solver.md @@ -0,0 +1,184 @@ +# Convex Solver: LP, QP, and SOCP + +POUNCE ships a specialized **convex conic interior-point solver** +(`pounce-convex`) alongside the general NLP filter-IPM. It solves the +standard-form convex program + +```text +minimize ½ xᵀP x + cᵀx +subject to A x = b + G x ⪯_K h + lb ≤ x ≤ ub +``` + +where `P ⪰ 0` and the inequality block lies in a product cone `K` of +nonnegative orthants and second-order cones. `P = 0` is an LP; an +all-orthant `K` is an LP/QP; second-order blocks make it an **SOCP**. + +The method is a **Mehrotra predictor–corrector** primal–dual interior-point +algorithm with Nesterov–Todd scaling for the cones, sharing the pure-Rust +[`feral`](algorithm.md) sparse LDLᵀ backend with the NLP path. It reaches +optimality in materially fewer iterations than routing the same problem +through the general NLP solver (≈30–50% fewer on bound/inequality QPs). + +> **Inspiration.** The conic interior-point design follows +> [Clarabel](https://github.com/oxfordcontrol/Clarabel.rs) (Goulart & +> Chen) — handling a quadratic objective directly and a product of +> symmetric cones — and the presolve follows +> [PaPILO](https://github.com/scipopt/papilo) (the presolving library of +> [SCIP](https://www.scipopt.org/)). POUNCE does not wrap either (the +> pure-Rust guarantee) but ports their ideas; see +> [Acknowledgments](acknowledgments.md). + +This chapter covers the **Python API** (`pounce.qp` and the differentiable +`pounce.jax` layers). For automatic CLI/Pyomo routing of `.nl` LPs/QPs, see +[LP / QP Solver Routing](lp-qp-routing.md). Runnable, progressive notebooks +live in [`python/notebooks/`](https://github.com/jkitchin/pounce/tree/main/python/notebooks): +`13_convex_qp.ipynb`, `14_socp.ipynb`, `15_differentiable_convex.ipynb`. + +## Quadratic programs + +```python +import numpy as np +from pounce.qp import solve_qp + +# min ½·2‖x‖² − 3x₀ − 4x₁ s.t. x₀ + x₁ ≤ 1, 0 ≤ x ≤ 1 +r = solve_qp( + P=np.diag([2.0, 2.0]), + c=[-3.0, -4.0], + G=[[1.0, 1.0]], h=[1.0], + lb=[0, 0], ub=[1, 1], +) +r.status # 'optimal' +r.x # primal solution +r.y, r.z # equality / inequality multipliers +r.z_lb, r.z_ub # bound multipliers (≥ 0) +r.obj, r.iters +``` + +`P` (lower triangle used, assumed symmetric), `A`, and `G` accept dense +arrays or scipy-sparse matrices; any of them may be omitted. The result is +a `QpResult` dataclass with a `.success` property. The solver reports +**verified** infeasibility / unboundedness (`'primal_infeasible'` / +`'dual_infeasible'`) backed by a Farkas / recession certificate rather than +an iteration-limit guess. + +## Second-order cone programs + +A second-order (Lorentz) cone is `{ (t, x) : t ≥ ‖x‖₂ }`. Partition the +inequality rows of `Gx ⪯_K h` with `cones` — a list of `(kind, dim)` specs +(`"nonneg"` or `"soc"`; a bare int means a second-order cone). Each slack +block `s = h − Gx` must lie in its cone. + +```python +from pounce.qp import solve_socp + +# minimize ‖x − x*‖ ⇔ min t s.t. (t, x − x*) ∈ SOC +r = solve_socp( + c=[1.0, 0.0, 0.0], # minimize t + G=-np.eye(3), h=[0.0, -2.0, 1.0], # s = (t, x₀−2, x₁+1) ∈ SOC(3) + cones=[("soc", 3)], +) +r.x # ≈ [0, 2, -1]: t* = 0, x = x* +``` + +Mixed cones compose — e.g. `cones=[("nonneg", 1), ("soc", 2)]` puts the +first slack in `ℝ₊` and the next two in a 2-D second-order cone. Large +cones use a **sparse diagonal-plus-rank-1** KKT representation (one +auxiliary variable per cone, the ECOS/Clarabel "sparse SOC" trick) so the +factorization stays sparse. + +## Warm starting + +Feed a previous (or nearby) solution back to seed the interior-point +iteration — useful for parametric sweeps, receding-horizon MPC, and +branch-and-bound subproblems: + +```python +base = solve_qp(P=P, c=c, G=G, h=h, lb=lb, ub=ub) +nxt = solve_qp(P=P, c=c2, G=G, h=h, lb=lb, ub=ub, warm_start=base) +``` + +The warm start only affects the iteration count, never the solution (a +mismatch is ignored). The recentering is **adaptive** for the orthant +(sized to the warm point's KKT residual, so it exploits a nearby problem's +duals yet self-corrects when the active set moves) and re-centers the cone +duals for second-order blocks (a converged conic point sits on the cone +boundary, where the scaling is singular). + +## Batching and factorization reuse + +```python +from pounce.qp import solve_qp_batch, QpFactorization + +# Solve many independent QPs in parallel (rayon, across instances). +results = solve_qp_batch([dict(P=P, c=c_k, G=G, h=h) for c_k in cs]) + +# Build the KKT symbolic factor once, solve many same-structure problems. +fac = QpFactorization(P=P, c=c0, G=G, h=h, lb=lb, ub=ub) +for c_k in cs: + rk = fac.solve(P=P, c=c_k, G=G, h=h, lb=lb, ub=ub) # reuses the factor +``` + +`solve_qp_batch` parallelizes across instances (outer-parallel / +inner-serial) and `QpFactorization` reuses the AMD ordering and symbolic +factorization across solves that share a structure — the two compose with +warm starting. + +## Presolve (PaPILO-inspired) + +Before the interior-point solve, POUNCE can apply a **transaction-stack +presolve** with full primal **and dual** postsolve, modeled on +[PaPILO](https://github.com/scipopt/papilo). The catalog: + +- empty / **duplicate / parallel** (scalar-multiple) rows, +- fixed-variable elimination (singleton equalities), +- free columns and free-column singletons, +- activity-based redundancy and infeasibility detection, +- **forcing constraints** (a row at its activity extreme pins its variables), +- **dominated columns** (sign-definite columns optimal at a bound), +- **bound tightening** (domain propagation), with the active-bound + multiplier re-attributed to its source row in postsolve, + +iterated to a **fixpoint** so reductions cascade. Each reduction carries +the data to reverse itself, and the postsolve reconstructs a valid KKT +point of the *original* problem — the dual recovery is the contract, and is +verified by KKT-residual tests. A cone-aware variant (`presolve_conic`) +gates the `≤`-row reductions off second-order-cone blocks (which are +coupled) and recovers the reduced cone partition. + +Presolve is applied automatically on the CLI LP/QP route; it lives in +`pounce-convex::presolve` for Rust callers. See +[LP / QP Solver Routing](lp-qp-routing.md). + +## Differentiable convex layers (JAX) + +`pounce.jax` exposes the solve as a differentiable JAX op via the +implicit-function theorem on the KKT system at the optimum (Amos & Kolter, +*OptNet*, 2017). The forward calls the solver; the backward is a single +linear solve through the same KKT matrix. + +```python +import jax, jax.numpy as jnp +from pounce.jax import solve_qp, solve_socp, QpLayer + +# x*(c) for a parametric QP, differentiable w.r.t. all of P, c, G, h, A, b. +def loss(c): + x = solve_qp(P=P, c=c, G=G, h=h) + return jnp.sum((x - target) ** 2) + +grad_c = jax.grad(loss)(c0) # exact gradient via implicit diff +J = jax.jacrev(lambda c: solve_qp(P=P, c=c, G=G, h=h))(c0) +``` + +- Gradients are provided w.r.t. **every** parameter that enters through the + optimum: `c`, `b`, `h`, and the matrices `P`, `G`, `A` (the full OptNet + matrix derivatives; `∇P` is the symmetric gradient). +- `solve_socp` differentiates SOCPs too — the complementarity row uses the + cones' **arrow operators** in place of the orthant's diagonal. +- `QpLayer` captures a fixed `P`/`G`/`A` structure for use inside a larger + JAX model, with `jax.grad` / `jacrev` / `vmap` and a parallel `.batch`. +- A warm start may be passed through (non-differentiated — it cannot change + the solution or its gradients, only the iteration count). + +All gradients are validated against finite differences in the test suite. diff --git a/docs/src/debugger.md b/docs/src/debugger.md index 6ff80ef9..4662e3b0 100644 --- a/docs/src/debugger.md +++ b/docs/src/debugger.md @@ -19,6 +19,12 @@ It has two front ends sharing one command engine: No production NLP solver ships anything like this; if you have used `ipopt` you have had `print_level` and a log. This is a live debugger. +The same debugger spans **every** POUNCE solver: the NLP filter-IPM, the +convex / conic interior-point solver, and the spatial branch-and-bound +global optimizer — and you can **step from a branch-and-bound node into the +interior-point debugger for that node's relaxation**. See [Beyond the +interior-point loop](#beyond-the-interior-point-loop). + > The debugger has **zero effect on the solve when it is not attached**. > The checkpoint fire-sites short-circuit when no debugger is installed, > so the standard regression suite is bit-for-bit identical with and @@ -1295,12 +1301,118 @@ points (`numpy.savetxt("starts.txt", X0, delimiter=",")`). See --- +## Beyond the interior-point loop + +Everything above is the NLP filter-IPM. The same debugger — same command +engine, same REPL — drives the other solvers too. + +### Convex and conic solves + +The convex LP/QP interior-point solver and the HSDE conic drivers (SOCP, +the exponential / power cones, and small PSD cones) expose the **same** +checkpoints and commands as the NLP loop. The iterate blocks follow the QP +standard form — `x` (variables), `s` (cone slacks), `y` (equality +multipliers), `z` (inequality / cone multipliers) — and the HSDE drivers +additionally expose the homogenizing scalars `tau` / `kappa` as 1-element +blocks (`print tau`). `set ` and `goto` work as on the NLP path; +`set mu` is rejected, because the convex μ is *derived* from `⟨s, z⟩` +(edit `s`/`z` to move it). + +```sh +pounce model.nl --debug # LP / convex-QP (auto-routed) — IPM REPL +pounce_cblib model.cbf --debug # SOCP / exp / power / PSD (conic) — IPM REPL +pounce_cblib model.cbf --debug-script s.pdbg +``` + +### The branch-and-bound tree + +Branch-and-bound is a *tree search*, not an iteration loop, so it has its +own REPL — you step over **nodes**, not iterations. Launch it by routing to +the global solver: + +```sh +pounce model.nl solver_selection=global --debug +``` + +It pauses at the tree checkpoints — `node_selected`, `relaxation_solved`, +`incumbent_found`, `node_pruned`, `branched`, `terminated` — and the +commands are tree-native: + +| Command | Shows / does | +|---|---| +| `s` / `step` | run to the next checkpoint | +| `c` / `continue` | run until a breakpoint or the end | +| `node` | the current node's variable box and its bound | +| `bounds` | global lower bound, incumbent (upper), and gap | +| `gap` | the optimality gap | +| `incumbent` / `inc` | the best feasible point so far | +| `frontier` | number of open nodes | +| `break incumbent` | stop when the incumbent improves | +| `break gap ` | stop once the gap ≤ x | +| `break depth ` | stop at a node of depth ≥ n | +| `break node ` | stop when node #id is selected | +| `into` | **step into this node's relaxation solve** (see below) | +| `q` / `quit` | stop the search now | + +```text +$ pounce model.nl solver_selection=global --debug +── btree ── node_selected node #1 depth 0 lb=NaN inc=none gap=inf frontier=0 (nodes 0) +(btree) break depth 1 +breakpoint: depth ≥ 1 +(btree) continue +── btree ── node_selected node #2 depth 1 lb=NaN inc=2.000000e0 gap=inf frontier=1 (nodes 1) +(btree) incumbent +incumbent obj = 2.00000000e0 at x = [1.000000e0, 1.000000e0] +(btree) quit +``` + +### Step into a node's relaxation (`into`) + +Each branch-and-bound node computes its lower bound by solving a **convex +relaxation** — which is itself an interior-point solve the debugger +understands. So at a `node_selected` pause, `into` drops you into the +interior-point REPL for that node's relaxation, with the full iterate-level +command set; when you `continue` past it, control returns to the tree. + +```text +── btree ── node_selected node #1 depth 0 lb=NaN inc=none gap=inf +(btree) into +stepping into node #1's relaxation solve… + +── pounce-dbg ── iter 0 @iter_start mu=1.000e0 obj=0.000000e0 inf_pr=2.00e0 inf_du=1.00e0 +pounce-dbg> print mu +mu = 1.0000000000e0 +pounce-dbg> continue # finish the relaxation, back to the tree +(btree) … +``` + +Under `--debug-script`, a single script interleaves the two: the tree +commands and the interior-point commands for a stepped-into relaxation are +read from the same queue (they run sequentially), e.g. + +```text +into # tree: step into node #1's relaxation +print mu # interior-point: now inside the relaxation solve +continue # interior-point: finish it, back to the tree +continue # tree: continue the search +``` + +--- + ## Limitations - **Soft rewind only.** `goto`/`restart` restore the primal-dual state, not strategy history (see the caveat above). - **`set opt` is staged, not hot-applied** to a running solve; it takes effect on `resolve` / the next solve. +- **The parallel branch-and-bound pool is not debuggable** — with + concurrent nodes there is no single "current node", so `--debug` runs the + deterministic serial driver. The result is identical; only wall-clock + differs. +- **No tree rewind.** The tree debugger inspects and breaks; it does not + rewind the search or edit a node's box (the interior-point debugger it + steps into is still fully read/write). +- **Stepping into applies to the relaxation** (the lower-bound solve), not + yet the local upper-bound NLP solve. - diff --git a/docs/src/global-optimization.md b/docs/src/global-optimization.md new file mode 100644 index 00000000..44d56e36 --- /dev/null +++ b/docs/src/global-optimization.md @@ -0,0 +1,231 @@ +# Global Optimization + +Most of POUNCE settles a problem at a **local** optimum (the NLP filter-IPM and +SQP) or exploits convexity so that local *is* global (the convex/conic IPM). +This chapter covers the two paths that certify a **global** optimum of a +genuinely **nonconvex** problem: + +- **Spatial branch-and-bound** (`pounce-global`) — for general factorable + nonconvex NLPs. +- **The SOS / Lasserre hierarchy** (`pounce-convex`) — for polynomial problems, + via a single semidefinite program. + +Both return a result that is *certified*: a feasible point together with a +proof (an optimality gap, or a moment certificate) that no better point exists. + +## Spatial branch-and-bound + +### The problem + +```text +minimize f(x) +subject to cl_j ≤ g_j(x) ≤ cu_j (j = 0 … m−1) + x_lo ≤ x ≤ x_hi +``` + +`f` and the `g_j` are **factorable** — built from `+ − × ÷`, integer powers, +`√`, `exp`, `ln`, `|·|`, `sin`, and `cos`. A bounded box is required (the +relaxation needs finite bounds). + +### The idea + +Branch-and-bound brackets the global optimum between a **lower bound** (valid +over a region) and an **upper bound** (the value of some feasible point), then +subdivides the search region until the two meet. The whole game is making the +lower bound tight enough, fast enough. + +For each node — a box `[lo, hi]` — the solver: + +1. **Tightens the box.** Feasibility-based bound tightening (FBBT) propagates + interval bounds through each constraint; **optimization-based** bound + tightening (OBBT) then minimizes and maximizes each variable over the + relaxation (with an incumbent cutoff). Either may prove the box empty, in + which case it is pruned. +2. **Computes a lower bound.** A convex *relaxation* of the problem over the + box — built so that it underestimates `f` and contains every feasible point + — is solved as a linear program through `pounce-convex`. Its optimum is a + valid lower bound. Crucially the relaxation is **exact in the limit of a + zero-width box**, so as branching shrinks boxes the bound converges to the + truth. +3. **Improves the incumbent.** Feasible points are probed (the relaxation + solution, the box center) and polished with a local NLP solve + (`pounce-algorithm`), giving a sharp upper bound. +4. **Branches.** The variable with the largest **relaxation violation** (the + one whose nonconvexity is driving the gap) is split at the relaxation point + — falling back to the widest box side when nothing is violated — and the two + child boxes join a best-first frontier ordered by node lower bound. + +The search stops when the frontier's lowest bound meets the incumbent within +tolerance — at which point the incumbent is the certified global optimum. + +```rust +use pounce_global::{expr::var, solve_global, GlobalProblem, GlobalOptions, GlobalStatus}; +use pounce_feral::FeralSolverInterface; + +// Six-hump camel — six local minima, two global (value ≈ −1.0316). +let x = var(0); +let y = var(1); +let f = 4.0 * x.clone().powi(2) - 2.1 * x.clone().powi(4) + (1.0 / 3.0) * x.clone().powi(6) + + x.clone() * y.clone() - 4.0 * y.clone().powi(2) + 4.0 * y.powi(4); + +let prob = GlobalProblem::new(vec![-2.0, -1.5], vec![2.0, 1.5], &f); +let sol = solve_global(&prob, &GlobalOptions::default(), + || Box::new(FeralSolverInterface::new())); + +assert_eq!(sol.status, GlobalStatus::Optimal); +// sol.objective ≈ −1.0316 (a certified global minimum, not just a local one) +// sol.lower_bound brackets it; sol.gap() is the optimality gap; sol.nodes the +// branch-and-bound node count. +``` + +Build constraints with the same expression DSL: + +```rust +let obj = var(0) + var(1); +let g = var(0) * var(1); +// min x + y s.t. x·y ≥ 4 on [1,5]² → 4 at (2,2) +let prob = GlobalProblem::new(vec![1.0, 1.0], vec![5.0, 5.0], &obj).ge(&g, 4.0); +``` + +`.ge`, `.le`, `.equality`, and `.subject_to(g, lo, hi)` add constraints; an +infeasible problem returns `GlobalStatus::Infeasible` with a proof. + +### From Python and the CLI + +The solver is reachable beyond the Rust API: + +- **Python** — `pounce.minimize_global` with an ergonomic expression DSL: + + ```python + from pounce.global_opt import var, minimize_global, ge + x, y = var(0), var(1) + f = (4 - 2.1 * x**2 + x**4 / 3) * x**2 + x * y + (-4 + 4 * y**2) * y**2 + r = minimize_global(f, lo=[-2, -1.5], hi=[2, 1.5]) # r.objective ≈ −1.0316 + ``` + + All `GlobalOptions` knobs are keyword arguments (`obbt_passes`, `threads`, …); + constraints are `[ge(g, lb), le(g, ub), eq(g, rhs)]`. + +- **CLI** — `pounce model.nl solver_selection=global` runs the solver on an + AMPL `.nl` model. Because the relaxation needs a **finite box**, variables + left unbounded in the `.nl` are capped to a large default (with a warning), + and the certified optimum is then global only within that box — so the global + solver is most useful on `.nl` models with sensible finite variable bounds. + +### The relaxation suite + +The lower bound is everything, and POUNCE's is built term by term over the +factorable expression tape (the same `FbbtTape` representation FBBT uses), with +the techniques a state-of-the-art global solver uses: + +| Component | Role | +|---|---| +| **Tight univariate envelopes** | The exact convex/concave hull of each atom (`xⁿ`, `√`, `exp`, `ln`, `sin`, `cos`, `|·|`): secant + tangent cuts on a convex/concave arc, the *tangent-from-the-endpoint* construction for single-inflection arcs (odd powers across 0, trig over a sub-π box), and slope-sampled supporting lines for trig over wider boxes. | +| **McCormick** | The exact convex hull of each bilinear product. | +| **Sandwich cuts** | After the LP solve, tangent cuts are added at the solution for loose atoms and the LP re-solved — tightening the bound *without* branching. | +| **OBBT** | Optimization-based bound tightening: the single biggest box reducer. | +| **αBB** | A convex underestimator of the *whole* objective, from a rigorous interval-Hessian spectral shift (`α ≥ max(0, −½λ_min)`), complementing the term-wise relaxation. | +| **RLT** | Level-1 reformulation-linearization: each affine constraint times each variable bound factor, linearized with shared product columns. | +| **Multilinear** | A 3-way product `x·y·z` is relaxed by intersecting all three bilinear groupings, not just the one nested grouping. | + +Each is a verified global under/over-estimator — so any of them can be turned +on or off without affecting correctness, only the bound's tightness (and the +node count). On the six-hump camel, the envelope engine alone certifies in 287 +nodes; adding sandwich cuts brings it to ~220, and OBBT to ~60. + +### Tuning + +`GlobalOptions` exposes the gap tolerances and every relaxation knob: + +| Field | Default | Meaning | +|---|---|---| +| `abs_gap`, `rel_gap` | `1e-6` | stop when `ub − lb` clears either tolerance | +| `feas_tol` | `1e-6` | constraint tolerance for accepting an incumbent | +| `box_tol` | `1e-7` | stop branching a box this narrow | +| `max_nodes` | `5000` | node budget (else `NodeLimit`, with bound + incumbent) | +| `local_solve_iters` | `50` | IPM iteration cap for the NLP upper-bound polish (`0` off) | +| `sandwich_rounds` | `4` | cutting-plane rounds per node (`0` off) | +| `obbt_passes` | `2` | OBBT sweeps per node (`0` off — costly: `2n` LP solves/pass) | +| `alphabb_cuts` | `1` | αBB tangent planes added to the objective (`0` off) | +| `rlt` | `true` | level-1 RLT cuts | +| `multilinear` | `true` | multi-grouping trilinear relaxation | +| `branching` | `MostViolation` | branching rule: `Widest`, `MostViolation`, or `Reliability` | +| `parallel` | `false` | run OBBT's `2n` solves on a thread pool (deterministic) | +| `threads` | `1` | `> 1` runs the parallel node pool (non-deterministic order) | +| `fbbt` | — | FBBT configuration | + +The branching rule (`BranchRule`) chooses the variable to split: `Widest` (box +geometry), `MostViolation` (the variable whose nonconvexity drives the +relaxation gap — the default), or `Reliability` (pseudocosts learned from child +solves, with strong branching until a variable's pseudocost is reliable — the +MILP/MINLP SOTA rule). Because OBBT tightens every node here, the relaxation is +usually tight enough that the rule is second-order; reliability is most useful +on larger problems where variable choice dominates the node count. + +The defaults aim for robustness on small problems. OBBT dominates the per-node +cost; turn `obbt_passes` down (or off) on larger problems where the LP solves +outweigh the node savings. + +There are two opt-in forms of parallelism: + +- **`parallel = true`** parallelizes OBBT's `2n` independent solves per pass on a + thread pool — *deterministically* (the same nodes and optimum as serial, only + faster). On a 7-variable problem it cut wall-clock ≈2.3× on 14 cores; the + speedup is sub-linear because the relaxation build, sandwich cuts, αBB, RLT, + the local NLP solve, and branching remain serial within a node. +- **`threads > 1`** runs the **node pool**: workers pull whole frontier nodes + and process them concurrently (OBBT stays serial inside each worker). This is + coarser-grained and the larger speedup, but **non-deterministic** — the + certified optimum and gap are unchanged, yet the node count varies run to run + (parallel best-first explores some nodes a serial run would have pruned). On a + small 5-variable problem it was ≈2.6× on 14 cores (≈40 nodes — too few to + saturate the cores); it scales further as the tree widens. + +## The SOS / Lasserre path (polynomials) + +When the objective and constraints are **polynomials**, the +sum-of-squares / moment approach in `pounce-convex` is often the better tool: +it certifies the global minimum from a *single* semidefinite program — no +branching — by searching for the largest `γ` such that `p(x) − γ` lies in the +Putinar cone (a sum of squares plus constraint multipliers). + +```rust +use pounce_convex::{sos_minimize, PolyProblem, Polynomial}; +# use pounce_feral::FeralSolverInterface; +# use pounce_linsol::SparseSymLinearSolverInterface; +# fn backend() -> Box { Box::new(FeralSolverInterface::new()) } +// x⁴ − 2x² + 3 → global minimum 2 at x = ±1. +let p = Polynomial::new(1, vec![(vec![4], 1.0), (vec![2], -2.0), (vec![0], 3.0)]); +let sol = sos_minimize(&PolyProblem::new(p), None, backend); +// sol.lower_bound ≈ 2; when the moment matrix is flat, sol.minimizers holds +// the global minimizer(s) — here both x = +1 and x = −1. +``` + +The relaxation order can be raised to tighten the bound (the Lasserre +hierarchy), and the solution is recovered from the moment matrix: flat +truncation certifies exactness and a **facial-reduction** step recovers the +minimizers even when the optimum is non-unique. From Python this is +`pounce.sos_minimize`. The full treatment lives in the `pounce_convex::sos` +module documentation. + +When to prefer which: **SOS** for polynomials of modest degree and dimension +(one SDP, recovers all global minimizers, but the SDP grows with degree); +**spatial branch-and-bound** for general factorable problems including +`exp`/`ln`/trig, or polynomials where the SDP would be too large. + +## Honest limits + +`pounce-global` is a complete, correct *continuous* global solver. It is not +yet at commercial-solver scale: + +- **Continuous only** — no integer branching (MINLP). +- **Branching** offers widest, most-violation (default), and reliability + (pseudocost + strong branching) rules; with OBBT every node the rule is + usually second-order here, so it is a tunable knob rather than a fixed win. +- Atoms outside the supported set, `sin`/`cos` over a box spanning more than a + few full periods, and division by an interval straddling zero fall back to the + (valid but weak) interval box bound, which branching sharpens. (`sin`/`cos` + over a box wider than π but within a few periods now gets a valid sloped + relaxation rather than the bare box.) + +For the classes it does cover, the answer is global and certified. diff --git a/docs/src/images/solver-landscape.svg b/docs/src/images/solver-landscape.svg new file mode 100644 index 00000000..b2dd0b2b --- /dev/null +++ b/docs/src/images/solver-landscape.svg @@ -0,0 +1,146 @@ + + + + + + + + + + + POUNCE — solver landscape + + + + + CLI — pounce (.nl, .cbf) + + + Python — pounce + + + C API — cinterface + + + + + + + + + + Dispatch & routing + auto-classify (LP · QP · conic · NLP) — or force with --solver + + + + + + + + + LP · convex QP · conic + general NLP (default) + NLP, stable active set + QP / MPC subproblems + + + + + Convex IPM + pounce-convex + LP · convex QP + SOCP · exp · power cones + GLOBAL (convex) + warm-start · batch · sensitivity + + + + NLP filter-IPM + pounce-algorithm + nlp + general smooth NLP + (nonconvex OK) + LOCAL (KKT point) + filter line-search · restoration + + + + NLP active-set SQP + pounce-algorithm + general NLP + via QP subproblems + LOCAL + warm active-set re-solves + + + + Active-set QP + pounce-qp + QP (convex or indefinite) + parametric / MPC + LOCAL + factorization-reuse paths + + + + solves + + + + + + + + + Global optimization — certified global optima for nonconvex problems (orchestrate the solvers above) + + SOS / Lasserre + pounce-convex + polynomial → one SDP + + Spatial branch & bound + pounce-global + McCormick LP + FBBT/OBBT + NLP + SOS solves an SDP via the Convex IPM; branch-and-bound calls the Convex IPM (LP relaxations) and the NLP filter-IPM (upper bounds) + + + + + + Shared numerical core + + + Presolve (optional front-end) + FBBT · redundancy · LICQ repair — pounce-presolve + + + KKT assembly + sparse symmetric LDLᵀ factorization + pounce-linsol (shared by every interior-point / active-set solver) + + + + + + + + + + + FERAL + pure-Rust LDLᵀ — default + + + HSL MA57 + optional (feature ma57) + + + + Cross-cutting layers + Sensitivity — sIPOPT parametric step + reduced Hessian (NLP, pounce-sensitivity); QpSensitivity (convex QP) + Restoration phase — feasibility recovery inside the filter-IPM (pounce-restoration); ℓ₁-penalty reformulation (pounce-l1penalty) + Cone library — nonnegative · second-order · exponential · power · PSD (small dense); shared JSON solve report + + Convex/conic, SOS, and branch-and-bound paths return certified global optima; NLP & active-set solvers return a local (KKT) point. All share the pounce-linsol factorization backbone. + + diff --git a/docs/src/introduction.md b/docs/src/introduction.md index 9a2745af..5479c0a1 100644 --- a/docs/src/introduction.md +++ b/docs/src/introduction.md @@ -1,20 +1,32 @@ # Introduction -POUNCE is a pure-Rust port of the [Ipopt](https://github.com/coin-or/Ipopt) -interior-point nonlinear programming solver. It solves problems of the -form +POUNCE is a pure-Rust interior-point optimization solver. Its +nonlinear-programming core began as a faithful port of the +[Ipopt](https://github.com/coin-or/Ipopt) filter line-search method — +the algorithm, console output, and option semantics follow upstream Ipopt +closely enough that anyone used to reading `ipopt` logs can drop in +`pounce` without relearning where the numbers live — and it has since grown +into a *family* of solvers sharing one numerical backbone: -```text -min f(x) -s.t. g_L <= g(x) <= g_U - x_L <= x <= x_U -``` +- **Nonlinear programming** — the filter line-search interior-point method + (the Ipopt port) plus an active-set SQP path, for general smooth problems -where `f` and `g` are twice-continuously-differentiable. + ```text + min f(x) + s.t. g_L <= g(x) <= g_U + x_L <= x <= x_U + ``` -The algorithm, console output, and option semantics follow upstream -Ipopt closely enough that anyone used to reading `ipopt` logs can drop -in `pounce` without relearning where the numbers live. + where `f` and `g` are twice-continuously-differentiable. +- **Conic & quadratic** — LP, convex QP, second-order (SOCP), + positive-semidefinite (SDP), and the non-symmetric exponential and power + cones, each solved to the global optimum. +- **Global optimization** — certified global optima for nonconvex problems + via SOS / Lasserre relaxations (polynomials) and spatial branch-and-bound + (`pounce-global`, general factorable NLPs). + +See [Choosing a Solver](choosing-a-solver.md) for which solver fits which +problem. ## Pure Rust by default @@ -39,6 +51,14 @@ in and available behind option keys. Existing PyIpopt / cyipopt / JuMP / AMPL cl link against `libpounce_cinterface` in place of `libipopt` unchanged. +The conic and global solvers are wired end-to-end alongside the NLP +core: the convex interior-point solver (`pounce-convex`) handles +LP / QP, SOCP, exponential / power cones, and small SDPs — with a Conic +Benchmark Format (`.cbf`) reader cross-checked against the CBLIB tier — +while the global path adds SOS / Lasserre polynomial optimization and a +deterministic spatial branch-and-bound solver (`pounce-global`). All are +reachable from the CLI, the Python package, and the JSON solve report. + ## License EPL-2.0, the same license as upstream Ipopt. diff --git a/docs/src/lp-qp-routing.md b/docs/src/lp-qp-routing.md new file mode 100644 index 00000000..803d1bd5 --- /dev/null +++ b/docs/src/lp-qp-routing.md @@ -0,0 +1,166 @@ +# LP / QP Solver Routing + +POUNCE can route **linear programs (LP)** and **convex quadratic +programs (QP)** to a specialized interior-point solver +(`pounce-convex`) instead of the general nonlinear (NLP) filter-IPM. +The specialized path uses Mehrotra predictor-corrector and reaches the +solution in materially fewer iterations on these problem classes — +typically 30–50% fewer than the general NLP path on bound- or +inequality-constrained convex QPs. + +Routing is **automatic and transparent**: you do not change how you +call POUNCE. The same `pounce problem.nl`, the same +`SolverFactory('pounce')` in Pyomo, and the same AMPL `solve` all work +unchanged — POUNCE inspects the problem and picks the solver. + +## How routing works + +When POUNCE loads a problem it classifies it into one of: + +| Class | Routed to | +|------------------|----------------------------------------| +| **LP** | convex IPM (`pounce-convex`) | +| **convex QP** | convex IPM (`pounce-convex`) | +| **convex QCQP** | NLP filter-IPM *(conic solver: future)*| +| **nonconvex QP** | NLP filter-IPM (finds a local minimum) | +| **NLP** | NLP filter-IPM | + +The classifier is **conservative**: a problem is sent to the convex +solver only when POUNCE can *prove* it is an LP or a convex QP (the +objective is a degree-≤2 polynomial with a positive-semidefinite +Hessian and the constraints are linear). Anything it cannot prove +convex — transcendental terms, an indefinite Hessian, quadratic +constraints — falls back to the general NLP solver, which always +produces a correct (locally optimal) answer. You never get a wrong +"optimum" from a misclassification. + +> **Note on QP detection.** The AMPL `.nl` format has no dedicated +> quadratic section: a QP's quadratic terms are written into the +> nonlinear expression tree. POUNCE walks that tree to recover the +> Hessian and test convexity, the same way QP-capable AMPL solvers do. + +## Choosing the solver explicitly + +The `solver_selection` option overrides the automatic choice. It is a +normal POUNCE option, so it works on the command line, in an options +file, or through Pyomo's `solver.options`. + +| Value | Behavior | +|-----------------|---------------------------------------------------------------------| +| `auto` | **Default.** Route by detected class (table above). | +| `nlp` | Always use the NLP filter-IPM, regardless of class. | +| `lp-ipm` | Force the convex IPM; **errors** if the problem is not an LP. | +| `qp-ipm` | Force the convex IPM; **errors** if the problem is not LP/convex-QP. | +| `qp-active-set` | Reserved for the active-set QP track; currently falls back to NLP. | + +```sh +# Let POUNCE decide (default): +pounce model.nl + +# Force the NLP path even on a convex QP (e.g. to compare): +pounce model.nl solver_selection=nlp + +# Insist the problem is a convex QP — fail loudly if it is not: +pounce model.nl solver_selection=qp-ipm +``` + +A forced value that does not match the detected class is rejected with +a clear message rather than silently ignored: + +```text +pounce: problem class NLP does not match forced solver qp-ipm + (expected an LP or convex QP) +``` + +### From Pyomo + +```python +solver = SolverFactory('pounce') +solver.options['solver_selection'] = 'qp-ipm' # or 'auto', 'nlp', ... +solver.solve(model) +``` + +## What you get back + +Before solving, POUNCE prints a one-line **routing banner** naming the +detected class, the solver it selected, and the effective +`solver_selection` — so it is always clear which of POUNCE's solvers ran +and why: + +```text +Problem class: LP. Selected solver: convex QP interior-point (pounce-convex) [solver_selection=auto]. +``` + +(The banner is suppressed alongside the startup banner — `sb yes` or +JSON-debug protocol mode — to keep stdout clean for machine consumers.) + +The convex IPM then reports the same way as the NLP path: an +optimal-status line, the objective value (in your original sense — a +`maximize` objective and any constant term are reported correctly), and a +`.sol` file with the primal solution when one is requested. + +```text +POUNCE (LP IPM, pounce-convex): Optimal Solution Found. + obj=2.00000000 iters=2 +``` + +> **Driver.** The convex path uses the **homogeneous self-dual embedding +> (HSDE)** interior-point driver — the same self-dual formulation +> Clarabel/ECOS use. It is self-starting, returns verified +> infeasibility/unboundedness certificates, and conditions the KKT system +> internally through its per-cone scaling, so it solves even badly-scaled +> LPs (e.g. NETLIB `nl`, `‖c‖ ~ 1e6`) without external pre-scaling. + +## Presolve + +Before the convex interior-point solve, POUNCE runs a **presolve** pass +that shrinks the problem and can detect trivial infeasibility or +unboundedness without solving. It removes empty, duplicate, and +activity-redundant rows; fixes and substitutes structural columns +(singleton-row fixings, free columns, free column singletons); and +recovers both the primal and dual of the eliminated pieces so the +reported solution is for your original problem. When it reduces the +model, it logs a one-line summary: + +```text +Presolve: 40 → 32 vars, 12 → 8 rows (fixed 3, free-fixed 2, substituted 3) +``` + +Presolve is on by default. Turn it off with `qp_presolve=no` (e.g. to +compare timings or isolate a solver issue): + +```sh +pounce model.nl qp_presolve=no +``` + +## Scope and limitations + +- **Convex QP only.** Nonconvex (indefinite-Hessian) QPs are solved by + the NLP path to a *local* minimum; POUNCE does not do global + optimization. +- **Convex QCQP** (quadratic constraints) is detected as its own class + but currently routes to the NLP path; a second-order-cone solver is + planned. + +Both the primal solution and the constraint duals are written to the +`.sol` file, in the same sign convention as POUNCE's NLP path (so Pyomo +and AMPL read them identically regardless of which solver ran). + +### Infeasible and unbounded problems + +The convex solver detects infeasibility and unboundedness directly, +reporting a clean status instead of exhausting the iteration budget: + +- **Primal infeasible** — no point satisfies the constraints. Reported + with AMPL `solve_result_num` 200. +- **Unbounded** (dual infeasible) — the objective decreases without + bound along a feasible direction. Reported with `solve_result_num` + 300. + +Each verdict is backed by a *verified* certificate (a Farkas +infeasibility proof or an unbounded recession direction that is checked, +not merely inferred), so these statuses are never reported in error; a +problem the solver cannot certify simply runs to the iteration limit. + +The design and roadmap live in +[`dev-notes/lp-qp-routing.md`](https://github.com/jkitchin/pounce/blob/main/dev-notes/lp-qp-routing.md). diff --git a/docs/src/options.md b/docs/src/options.md index 04b42d28..19896cfe 100644 --- a/docs/src/options.md +++ b/docs/src/options.md @@ -30,6 +30,8 @@ file. | `print_level` | Console verbosity, 0 (silent) – 12 (maximum debug). | | `linear_solver` | KKT linear-solver backend. `ma57` requires the `ma57` feature build. | | `mu_strategy` | Barrier-parameter update strategy (`monotone` / `adaptive`). | +| `solver_selection` | Route LP/convex-QP to the specialized convex IPM. See [LP/QP Routing](lp-qp-routing.md). | +| `qp_presolve` | Presolve on the convex LP/QP path (`yes` / `no`, default `yes`). See [LP/QP Routing](lp-qp-routing.md#presolve). | For the full upstream option catalogue, see the [Ipopt options reference](https://coin-or.github.io/Ipopt/OPTIONS.html); diff --git a/docs/src/python.md b/docs/src/python.md index d98c5160..ce85e687 100644 --- a/docs/src/python.md +++ b/docs/src/python.md @@ -66,6 +66,128 @@ res = minimize(lambda x: (x - 1) @ (x - 1) + 1, x0=np.zeros(5)) print(res.fun, res.x) ``` +`minimize` is a thin facade over `pounce.Problem` shaped after +`scipy.optimize.minimize`, so SciPy code ports with few changes. It returns a +SciPy-`OptimizeResult`-shaped object (`res.x`, `res.fun`, `res.success`, +`res.status`, `res.message`, `res.nit`, plus `res.info` and dict-style +`res["x"]`). + +### Compatibility with `scipy.optimize.minimize` + +```python +minimize(fun, x0, jac=None, hess=None, bounds=None, + constraints=None, options=None) +``` + +| Argument | Status | Notes | +|---|---|---| +| `fun`, `x0` | ✅ | objective callable and start point | +| `jac` | ✅ | callable; **omitted → forward finite differences** (`√eps` step). Provide one for production. | +| `hess` | ⚠️ | used **only when there are no constraints**; with constraints the solver falls back to L-BFGS (`hessian_approximation=limited-memory`) | +| `bounds` | ✅ | a sequence of `(lo, hi)` pairs; a `None` element or a `None` endpoint means ±∞ | +| `constraints` | ✅ | SciPy **dict(s)** `{"type": "eq"\|"ineq", "fun": …, "jac": …}`; multiple are concatenated; `"jac"` optional (finite-diff fallback) | +| `options` | ⚠️ | forwarded to `Problem.add_option` — keys are **pounce/Ipopt option names** (`tol`, `max_iter`, `hessian_approximation`), **not** SciPy's (`maxiter`, `ftol`) | +| `args` | ❌ | not supported — close over extra arguments in `fun`/`jac` | +| `method` | ❌ | always the filter-IPM (see below for why there is no `method=`) | +| `hessp` | ❌ | no Hessian-vector-product mode | +| `tol` | ❌ | pass it via `options={"tol": …}` | +| `callback` | ❌ | not supported | + +**Conventions that match SciPy** (so constraint dicts port directly): + +- Inequalities use the SciPy sign convention **`g(x) ≥ 0`**; equalities are + **`g(x) = 0`**. +- The result object is SciPy-`OptimizeResult`-shaped (subset of fields + an + `info` map). + +**Gaps worth knowing:** + +- **Only the dict form of `constraints`** is accepted — a SciPy `Bounds`, + `LinearConstraint`, or `NonlinearConstraint` *object* will not work, and + `bounds` must be `(lo, hi)` pairs (not a `Bounds` object). +- The constraint **Jacobian is dense**; for large sparse Jacobians use the + `Problem` class directly (it takes a sparse Jacobian and structure). +- The most common porting snag is `options`: `options={"maxiter": 100}` is a + no-op — it is `options={"max_iter": 100}`. + +### Solver routing in `minimize` + +By default `minimize` **auto-routes** the same way the CLI's +`solver_selection=auto` does: a problem that is provably a **linear program** +or a **convex quadratic program** is dispatched to the specialized convex +interior-point solver (`pounce.solve_qp`, the HSDE driver), which reaches a +**global** optimum in materially fewer iterations; everything else is solved +by the general NLP filter line-search interior-point method, exactly as before. + +The catch is that `minimize` only sees **opaque callables** — it cannot read a +`.nl` expression tree the way the CLI can. So instead of *reading* the +structure it **probes** it: it evaluates `fun`/`jac`/`hess` at several points, +fits a linear/quadratic model, and then **validates that model against the +true callables at held-out points** before trusting it. The two +misclassification directions are not symmetric, and the validation gates the +dangerous one: + +- A convex LP/QP mistakenly sent to the NLP solver is merely *slower* — the + filter-IPM still solves it correctly. +- A genuinely nonlinear problem sent to the QP solver would return a + **silently wrong** answer. + +So any probe that raises, any model mismatch beyond `route_tol`, a +non-constant Hessian/Jacobian, or an indefinite Hessian (a nonconvex QP) all +fall back to the NLP solver. **You never get a wrong "optimum" from a +misclassification.** + +#### Forcing the solver + +The `solver_selection` option (passed in `options=`) overrides the automatic +choice — mirroring the CLI option of the same name: + +| `options={"solver_selection": …}` | Behavior | +|---|---| +| `"auto"` | **Default.** Probe-and-validate; route provable LP/convex-QP to `solve_qp`, else NLP. | +| `"nlp"` | Skip routing entirely; always use the NLP solver (the pre-routing behavior). | +| `"lp-ipm"` | Force the convex solver; raise `ValueError` if the problem is not detected as an LP. | +| `"qp-ipm"` | Force the convex solver; raise `ValueError` if it is not detected as a convex LP/QP. | + +```python +# Default: route a convex QP to the fast convex IPM automatically. +res = minimize(fun, x0, bounds=bounds) +print(res.info["solver"]) # 'qp-ipm' when routed; absent on the NLP path + +# Keep the pre-routing behavior — always the NLP solver: +res = minimize(fun, x0, options={"solver_selection": "nlp"}) + +# Insist the problem is a convex QP; fail loudly if the probe disagrees: +res = minimize(fun, x0, options={"solver_selection": "qp-ipm"}) +``` + +`route_tol` (default `1e-5`) sets the relative tolerance for the held-out +validation; raise it if a genuinely-linear problem with noisy finite-difference +Jacobians is being conservatively rejected, lower it to be stricter. The +routing keys are consumed by `minimize` and never forwarded to the backend, so +the rest of `options` still reaches the NLP solver unchanged. + +#### When you still need a typed entry point + +Auto-routing handles LP/convex-QP from the `minimize(fun, x0, …)` shape. The +remaining specialized solvers need structure that a callable cannot carry — a +cone list, a symbolic objective to relax and bound — so each keeps its own +pounce-native entry point: + +| Want | Entry point | You provide | Optimum | +|---|---|---|---| +| General nonlinear, fast local solve | `minimize(fun, x0, …)` | callables (`fun`/`jac`/`hess`) | local | +| LP / convex QP | `minimize` (auto) or `solve_qp(P, c, A, b, G, h, lb, ub, …)` | callables / matrices | **global** | +| SOCP / exp / power / PSD cones | `solve_socp(P, c, A, b, G, h, *, cones, …)` | matrices + cone list | **global** | +| Polynomial, certified global | `sos_minimize(objective, *, inequalities, equalities, …)` | a polynomial | **global** | +| Factorable nonconvex, certified global | `minimize_global(objective, *, constraints, lo, hi, …)` | a symbolic `Expr` + box | **global** | + +The `solve_qp` / `solve_socp` / `sos_minimize` / `minimize_global` functions +are pounce-native (not SciPy-shaped) by necessity — e.g. `minimize_global` +takes a symbolic `Expr` objective with keyword-only `lo`/`hi` box arrays and +`(Expr, lo, hi)` constraint triples, *not* callables and SciPy dicts. See +[Choosing a Solver](choosing-a-solver.md) for the full map. + ## Curve fitting `pounce.curve_fit` is the data-fitting companion to `minimize` — a diff --git a/linkedin-v0.4.0.md b/linkedin-v0.4.0.md new file mode 100644 index 00000000..5c203be8 --- /dev/null +++ b/linkedin-v0.4.0.md @@ -0,0 +1,33 @@ +# LinkedIn post — pounce 0.4.0 + +> Draft. Edit freely. The `---` rules mark the start/end of the post body +> you'd paste into LinkedIn; everything outside them is notes. + +--- + +🚀 pounce 0.4.0 is out — a pure-Rust interior-point NLP solver, now with a debugger for your optimization problems. + +When a nonlinear solver stalls, most tools give you a wall of iteration logs and a shrug. pounce 0.4.0 ships something different: an **interactive solver debugger**. + +Start a run with `--debug` and you can: + +- Break into a live solve — Ctrl-C pauses at the next iteration instead of killing the run +- Inspect the iterate — primals, duals, KKT residuals, the barrier parameter, inertia +- Probe the problem — `sweep` a variable, `multistart` from jittered points, `load` a saved iterate and step forward +- Drive it from an LLM — the same diagnostics are exposed over MCP, so you can ask Claude *why* a model isn't converging instead of decoding it yourself + +Plus: signed solve receipts (`pounce verify`), sparse colored AD for the JAX front-ends, and `curve_fit` in Python. + +Pure Rust. No Fortran or C. + +``` +pip install pounce-solver # core solver + Python API +pip install pyomo-pounce # Pyomo plugin +``` + +📦 Docs: https://kitchingroup.cheme.cmu.edu/pounce/ +🐙 Source: https://github.com/jkitchin/pounce + +#Rust #Optimization #NonlinearProgramming + + diff --git a/python/notebooks/13_convex_qp.ipynb b/python/notebooks/13_convex_qp.ipynb new file mode 100644 index 00000000..ee43df5a --- /dev/null +++ b/python/notebooks/13_convex_qp.ipynb @@ -0,0 +1,512 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "63430dd9", + "metadata": {}, + "source": [ + "# Convex QP & LP with `pounce.qp`\n", + "\n", + "POUNCE ships a specialized **convex conic interior-point solver**\n", + "(`pounce-convex`) alongside the general NLP filter-IPM. This notebook is the\n", + "gentle, build-up introduction to its Python surface, `pounce.qp`, for\n", + "linear and quadratic programs:\n", + "\n", + "$$\n", + "\\min_x\\;\\tfrac12 x^\\top P x + c^\\top x\n", + "\\quad\\text{s.t.}\\quad\n", + "A x = b,\\;\\; G x \\le h,\\;\\; \\text{lb} \\le x \\le \\text{ub}.\n", + "$$\n", + "\n", + "`P = 0` is an LP; `P \\succeq 0` a convex QP. We start with a one-line LP and\n", + "work up to **duals**, **verified infeasibility**, **warm starting**,\n", + "**parallel batches**, and **factorization reuse**.\n", + "\n", + "> The conic interior-point design follows\n", + "> [Clarabel](https://github.com/oxfordcontrol/Clarabel.rs) (Goulart & Chen)\n", + "> and the presolve follows [PaPILO](https://github.com/scipopt/papilo) (the\n", + "> presolving library of [SCIP](https://www.scipopt.org/)). POUNCE is pure\n", + "> Rust and wraps neither — it ports their ideas." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "daf41510", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:13:49.659117Z", + "iopub.status.busy": "2026-05-31T16:13:49.658920Z", + "iopub.status.idle": "2026-05-31T16:13:49.724620Z", + "shell.execute_reply": "2026-05-31T16:13:49.723841Z" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "from pounce.qp import solve_qp, solve_socp, solve_qp_batch, QpFactorization\n", + "\n", + "np.set_printoptions(precision=4, suppress=True)" + ] + }, + { + "cell_type": "markdown", + "id": "e76831a2", + "metadata": {}, + "source": [ + "## 1. The simplest LP\n", + "\n", + "Minimize $-x_0 - x_1$ over the box $0 \\le x \\le 1$ subject to\n", + "$x_0 + x_1 \\le 1$. The optimum sits on the constraint: any point with\n", + "$x_0 + x_1 = 1$ ties at objective $-1$." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7176ec4b", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:13:49.732255Z", + "iopub.status.busy": "2026-05-31T16:13:49.731944Z", + "iopub.status.idle": "2026-05-31T16:13:49.738866Z", + "shell.execute_reply": "2026-05-31T16:13:49.738060Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "optimal x = [0.5 0.5] obj = -0.99999999921875 iters = 7\n" + ] + } + ], + "source": [ + "r = solve_qp(\n", + " c=[-1.0, -1.0], # P=None -> linear objective\n", + " G=[[1.0, 1.0]], h=[1.0], # x0 + x1 <= 1\n", + " lb=[0, 0], ub=[1, 1],\n", + ")\n", + "print(r.status, \" x =\", r.x, \" obj =\", r.obj, \" iters =\", r.iters)\n", + "assert r.success and abs(r.obj + 1.0) < 1e-6" + ] + }, + { + "cell_type": "markdown", + "id": "b48256fb", + "metadata": {}, + "source": [ + "## 2. A quadratic objective, with duals\n", + "\n", + "$$\\min_x\\; \\tfrac12\\cdot 2\\|x\\|^2 - 3x_0 - 4x_1\n", + "\\quad\\text{s.t.}\\quad x_0 + x_1 \\le 1,\\; 0 \\le x \\le 1.$$\n", + "\n", + "The unconstrained minimizer of $\\tfrac12\\cdot2\\|x\\|^2-3x_0-4x_1$ is\n", + "$(1.5, 2)$, which violates $x_0+x_1\\le1$, so the inequality is **active**.\n", + "The result carries the full multiplier set." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "434225e1", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:13:49.740898Z", + "iopub.status.busy": "2026-05-31T16:13:49.740474Z", + "iopub.status.idle": "2026-05-31T16:13:49.747918Z", + "shell.execute_reply": "2026-05-31T16:13:49.746303Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "status : optimal\n", + "x : [0.25 0.75]\n", + "obj : -3.1249999998722653\n", + "z (ineq): [2.5] <- > 0 means x0+x1<=1 is active\n", + "z_lb : [0. 0.]\n", + "z_ub : [0. 0.]\n" + ] + } + ], + "source": [ + "r = solve_qp(\n", + " P=np.diag([2.0, 2.0]),\n", + " c=[-3.0, -4.0],\n", + " G=[[1.0, 1.0]], h=[1.0],\n", + " lb=[0, 0], ub=[1, 1],\n", + ")\n", + "print(\"status :\", r.status)\n", + "print(\"x :\", r.x)\n", + "print(\"obj :\", r.obj)\n", + "print(\"z (ineq):\", r.z, \" <- > 0 means x0+x1<=1 is active\")\n", + "print(\"z_lb :\", r.z_lb)\n", + "print(\"z_ub :\", r.z_ub)\n", + "assert r.success and abs(r.x.sum() - 1.0) < 1e-6" + ] + }, + { + "cell_type": "markdown", + "id": "0f8e6d0e", + "metadata": {}, + "source": [ + "### Stationarity check (KKT)\n", + "\n", + "At the optimum the gradient of the Lagrangian vanishes:\n", + "$$Px + c + G^\\top z - z_{lb} + z_{ub} = 0.$$\n", + "We verify the multipliers POUNCE returns actually close the KKT system." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d38547e2", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:13:49.750100Z", + "iopub.status.busy": "2026-05-31T16:13:49.749858Z", + "iopub.status.idle": "2026-05-31T16:13:49.755727Z", + "shell.execute_reply": "2026-05-31T16:13:49.754703Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Lagrangian gradient: [-0. 0.] (~0)\n" + ] + } + ], + "source": [ + "P = np.diag([2.0, 2.0]); c = np.array([-3.0, -4.0]); G = np.array([[1.0, 1.0]])\n", + "stat = P @ r.x + c + G.T @ r.z - r.z_lb + r.z_ub\n", + "print(\"Lagrangian gradient:\", stat, \" (~0)\")\n", + "assert np.linalg.norm(stat) < 1e-6" + ] + }, + { + "cell_type": "markdown", + "id": "113fcbef", + "metadata": {}, + "source": [ + "## 3. Equality constraints\n", + "\n", + "Project the origin's shifted point onto an affine subspace:\n", + "$$\\min_x \\tfrac12\\|x\\|^2 - x^\\top p \\quad\\text{s.t.}\\quad \\mathbf 1^\\top x = 1.$$\n", + "The closed-form solution is $x = p + \\lambda\\mathbf 1$ with $\\lambda$ set so\n", + "the sum is 1, i.e. $x_i = p_i + (1-\\sum p)/n$." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "0e13cd3e", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:13:49.758360Z", + "iopub.status.busy": "2026-05-31T16:13:49.757433Z", + "iopub.status.idle": "2026-05-31T16:13:49.765238Z", + "shell.execute_reply": "2026-05-31T16:13:49.764294Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "x : [0.2667 0.5667 0.1667]\n", + "closed : [0.2667 0.5667 0.1667]\n", + "y (eq) : [-0.0667]\n" + ] + } + ], + "source": [ + "p = np.array([0.2, 0.5, 0.1])\n", + "n = p.size\n", + "r = solve_qp(P=np.eye(n), c=-p, A=np.ones((1, n)), b=[1.0])\n", + "x_star = p + (1 - p.sum()) / n\n", + "print(\"x :\", r.x)\n", + "print(\"closed :\", x_star)\n", + "print(\"y (eq) :\", r.y)\n", + "assert np.allclose(r.x, x_star, atol=1e-7)" + ] + }, + { + "cell_type": "markdown", + "id": "5739216a", + "metadata": {}, + "source": [ + "## 4. Verified infeasibility & unboundedness\n", + "\n", + "POUNCE reports **certified** status, not an iteration-limit guess: a Farkas\n", + "certificate for primal infeasibility, a recession ray for unboundedness." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1dcbf9d2", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:13:49.767279Z", + "iopub.status.busy": "2026-05-31T16:13:49.767108Z", + "iopub.status.idle": "2026-05-31T16:13:49.773285Z", + "shell.execute_reply": "2026-05-31T16:13:49.772003Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "infeasible case : primal_infeasible\n", + "unbounded case : dual_infeasible\n" + ] + } + ], + "source": [ + "# Infeasible: x >= 2 (via -x <= -2) AND x <= 1.\n", + "bad = solve_qp(c=[1.0], G=[[-1.0]], h=[-2.0], ub=[1.0])\n", + "print(\"infeasible case :\", bad.status)\n", + "\n", + "# Unbounded LP: minimize -x with no upper bound.\n", + "unb = solve_qp(c=[-1.0], lb=[0.0])\n", + "print(\"unbounded case :\", unb.status)" + ] + }, + { + "cell_type": "markdown", + "id": "444c9e22", + "metadata": {}, + "source": [ + "## 5. Warm starting\n", + "\n", + "Feed a previous (or nearby) solution back to seed the interior-point\n", + "iteration — the payoff for **parametric sweeps**, receding-horizon MPC, and\n", + "branch-and-bound subproblems. The warm start changes only the iteration\n", + "count, never the solution.\n", + "\n", + "We sweep the linear term `c` along a path and reuse each solution to seed\n", + "the next." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c055e511", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:13:49.775610Z", + "iopub.status.busy": "2026-05-31T16:13:49.775384Z", + "iopub.status.idle": "2026-05-31T16:13:49.805009Z", + "shell.execute_reply": "2026-05-31T16:13:49.803595Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cold iters: [8, 8, 7, 7, 7, 7, 8, 8, 8, 8, 8, 10]\n", + "warm iters: [8, 7, 6, 6, 7, 7, 7, 7, 7, 7, 7, 9]\n", + "mean cold = 7.8, mean warm = 7.0\n" + ] + } + ], + "source": [ + "P = np.diag([2.0, 2.0])\n", + "G = np.array([[1.0, 1.0]]); h = [1.0]\n", + "lb, ub = [0, 0], [1, 1]\n", + "\n", + "cold_iters, warm_iters = [], []\n", + "prev = None\n", + "for t in np.linspace(0, 1, 12):\n", + " c = [-3.0 - t, -4.0 + 2 * t]\n", + " cold = solve_qp(P=P, c=c, G=G, h=h, lb=lb, ub=ub)\n", + " warm = solve_qp(P=P, c=c, G=G, h=h, lb=lb, ub=ub, warm_start=prev)\n", + " assert np.allclose(cold.x, warm.x, atol=1e-5) # same solution (to tol)\n", + " cold_iters.append(cold.iters)\n", + " warm_iters.append(warm.iters)\n", + " prev = warm\n", + "\n", + "print(\"cold iters:\", cold_iters)\n", + "print(\"warm iters:\", warm_iters)\n", + "print(f\"mean cold = {np.mean(cold_iters):.1f}, mean warm = {np.mean(warm_iters[1:]):.1f}\")" + ] + }, + { + "cell_type": "markdown", + "id": "c909a048", + "metadata": {}, + "source": [ + "## 6. Parallel batches\n", + "\n", + "`solve_qp_batch` solves many independent QPs across a rayon thread pool\n", + "(outer-parallel across instances, serial within each). Pass a list of\n", + "kwarg dicts — each is exactly a `solve_qp` call." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "be4a6e34", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:13:49.808627Z", + "iopub.status.busy": "2026-05-31T16:13:49.808264Z", + "iopub.status.idle": "2026-05-31T16:13:49.825772Z", + "shell.execute_reply": "2026-05-31T16:13:49.824431Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "all optimal: True\n", + " c=[-3. -4.] -> x=[0.25 0.75]\n", + " c=[-2.5 -4.3] -> x=[0.05 0.95]\n", + " c=[-2. -4.6] -> x=[0. 1.]\n" + ] + } + ], + "source": [ + "rng = np.random.default_rng(0)\n", + "cs = [(-3.0 + 0.5 * k, -4.0 - 0.3 * k) for k in range(8)]\n", + "problems = [dict(P=P, c=c, G=G, h=h, lb=lb, ub=ub) for c in cs]\n", + "results = solve_qp_batch(problems)\n", + "print(\"all optimal:\", all(r.success for r in results))\n", + "for c, r in zip(cs[:3], results[:3]):\n", + " print(f\" c={np.array(c)} -> x={r.x}\")" + ] + }, + { + "cell_type": "markdown", + "id": "b4b9fe0f", + "metadata": {}, + "source": [ + "You can also chain batches with `warm_starts=` — one warm start per\n", + "problem — to combine batching with warm starting across a sequence of\n", + "nearby batches." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "18a387b7", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:13:49.828265Z", + "iopub.status.busy": "2026-05-31T16:13:49.827975Z", + "iopub.status.idle": "2026-05-31T16:13:49.834497Z", + "shell.execute_reply": "2026-05-31T16:13:49.833362Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "warm batch all optimal: True\n", + "solutions unchanged : True\n" + ] + } + ], + "source": [ + "nxt = solve_qp_batch(problems, warm_starts=results)\n", + "print(\"warm batch all optimal:\", all(r.success for r in nxt))\n", + "print(\"solutions unchanged :\", all(np.allclose(a.x, b.x, atol=1e-7)\n", + " for a, b in zip(results, nxt)))" + ] + }, + { + "cell_type": "markdown", + "id": "c24986af", + "metadata": {}, + "source": [ + "## 7. Factorization reuse (build-once / solve-many)\n", + "\n", + "When only the *values* of `c`/`b`/`h`/bounds change but the **structure**\n", + "(sparsity, the set of finite bounds) is fixed, `QpFactorization` builds the\n", + "AMD ordering and symbolic factor **once** and reuses it for every solve.\n", + "Compose it with warm starting for the fastest parametric loop." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d06a5bbc", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:13:49.837581Z", + "iopub.status.busy": "2026-05-31T16:13:49.837263Z", + "iopub.status.idle": "2026-05-31T16:13:49.848305Z", + "shell.execute_reply": "2026-05-31T16:13:49.846888Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "t=0.00 x=[0.25 0.75] iters=8\n", + "t=0.25 x=[0.4375 0.5625] iters=7\n", + "t=0.50 x=[0.625 0.375] iters=7\n", + "t=0.75 x=[0.8125 0.1875] iters=7\n", + "t=1.00 x=[0.9999 0.0001] iters=9\n" + ] + } + ], + "source": [ + "fac = QpFactorization(P=P, c=[-3.0, -4.0], G=G, h=h, lb=lb, ub=ub)\n", + "prev = None\n", + "for t in np.linspace(0, 1, 5):\n", + " c = [-3.0 - t, -4.0 + 2 * t]\n", + " rk = fac.solve(P=P, c=c, G=G, h=h, lb=lb, ub=ub, warm_start=prev)\n", + " print(f\"t={t:.2f} x={rk.x} iters={rk.iters}\")\n", + " prev = rk" + ] + }, + { + "cell_type": "markdown", + "id": "015e634d", + "metadata": {}, + "source": [ + "## Where next\n", + "\n", + "- **`14_socp.ipynb`** — second-order cone programs (norm minimization,\n", + " robust LP, mixed cones) with the same API plus a `cones=` partition.\n", + "- **`15_differentiable_convex.ipynb`** — `pounce.jax`: differentiate the QP\n", + " and SOCP solutions w.r.t. their data with `jax.grad` / `jacrev` / `vmap`.\n", + "- The [Convex Solver chapter](../../docs/src/convex-solver.md) documents the\n", + " full API, presolve, and the differentiable layers." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python/notebooks/14_socp.ipynb b/python/notebooks/14_socp.ipynb new file mode 100644 index 00000000..853f7293 --- /dev/null +++ b/python/notebooks/14_socp.ipynb @@ -0,0 +1,379 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "899d3dde", + "metadata": {}, + "source": [ + "# Second-order cone programs with `pounce.qp.solve_socp`\n", + "\n", + "A **second-order (Lorentz) cone** is\n", + "$$\\mathcal Q^m = \\{\\, (t, u) \\in \\mathbb R\\times\\mathbb R^{m-1} : t \\ge \\|u\\|_2 \\,\\}.$$\n", + "An SOCP minimizes a linear/quadratic objective subject to equalities and a\n", + "product of cones — nonnegative orthants *and* second-order cones:\n", + "$$\\min_x\\;\\tfrac12 x^\\top P x + c^\\top x \\quad\\text{s.t.}\\quad A x = b,\\;\\; G x \\preceq_{\\mathcal K} h.$$\n", + "\n", + "POUNCE solves this with the same Mehrotra predictor–corrector machinery as\n", + "the LP/QP path, now with **Nesterov–Todd scaling** for the cones. The Python\n", + "call mirrors `solve_qp` but adds a `cones=` partition of the rows of `G`:\n", + "each slack block $s = h - Gx$ must lie in its cone.\n", + "\n", + "> **Inspiration.** The conic design follows\n", + "> [Clarabel](https://github.com/oxfordcontrol/Clarabel.rs) (Goulart & Chen);\n", + "> the sparse second-order-cone KKT representation follows\n", + "> [ECOS](https://github.com/embotech/ecos) (Domahidi, Chu & Boyd). POUNCE is\n", + "> pure Rust and wraps neither." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "685566ec", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:13:08.542058Z", + "iopub.status.busy": "2026-05-31T16:13:08.541812Z", + "iopub.status.idle": "2026-05-31T16:13:08.627369Z", + "shell.execute_reply": "2026-05-31T16:13:08.626771Z" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "from pounce.qp import solve_socp\n", + "\n", + "np.set_printoptions(precision=4, suppress=True)" + ] + }, + { + "cell_type": "markdown", + "id": "19ed93f2", + "metadata": {}, + "source": [ + "## 1. Norm minimization (projection)\n", + "\n", + "Minimize $\\|x - a\\|$ — i.e. find the closest point to $a$ inside whatever\n", + "feasible set we impose. With **no** other constraint the answer is trivially\n", + "$x = a$, which makes it a perfect first sanity check of the machinery.\n", + "\n", + "Epigraph form: introduce $t$ and minimize $t$ subject to $(t, x-a)\\in\\mathcal Q$:\n", + "- variable vector is $(t, x_0, x_1)$,\n", + "- slack $s = h - Gx$ must equal $(t,\\; x_0-a_0,\\; x_1-a_1)$.\n", + "\n", + "Take $a = (2, -1)$. With $G = -I$ and $h = (0, -2, 1)$ we get\n", + "$s = (t,\\, x_0 - 2,\\, x_1 + 1)$. The optimum is $t^\\* = 0,\\ x = (2, -1)$." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7181bd7b", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:13:08.633288Z", + "iopub.status.busy": "2026-05-31T16:13:08.632336Z", + "iopub.status.idle": "2026-05-31T16:13:08.642223Z", + "shell.execute_reply": "2026-05-31T16:13:08.640941Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "optimal t = 0.0 x = [ 2. -1.]\n" + ] + } + ], + "source": [ + "a = np.array([2.0, -1.0])\n", + "r = solve_socp(\n", + " c=[1.0, 0.0, 0.0], # minimize t (decision vars: t, x0, x1)\n", + " G=-np.eye(3), h=[0.0, -a[0], -a[1]],\n", + " cones=[(\"soc\", 3)],\n", + ")\n", + "t, x = r.x[0], r.x[1:]\n", + "print(r.status, \" t =\", round(t, 6), \" x =\", x)\n", + "assert r.success and abs(t) < 1e-6 and np.allclose(x, a, atol=1e-6)" + ] + }, + { + "cell_type": "markdown", + "id": "ce1c58ec", + "metadata": {}, + "source": [ + "## 2. Linear objective over a ball — a closed-form check\n", + "\n", + "$$\\min_x\\; c^\\top x \\quad\\text{s.t.}\\quad \\|x - a\\|_2 \\le r.$$\n", + "The minimizer pushes from $a$ straight down $-c$ to the ball boundary:\n", + "$$x^\\* = a - r\\,\\frac{c}{\\|c\\|},\\qquad \\text{obj}^\\* = c^\\top a - r\\,\\|c\\|.$$\n", + "\n", + "The cone constraint $\\|x-a\\|\\le r$ becomes $(r,\\ x-a)\\in\\mathcal Q$:\n", + "slack row 0 is the constant $r$ (so $G$ row 0 is zero, $h_0=r$), and the\n", + "remaining rows give $s_i = a_i - x_i$ (so $G = I$, $h_i = a_i$)." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ad725826", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:13:08.644711Z", + "iopub.status.busy": "2026-05-31T16:13:08.644014Z", + "iopub.status.idle": "2026-05-31T16:13:08.661344Z", + "shell.execute_reply": "2026-05-31T16:13:08.660390Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "status : optimal\n", + "x : [-0.1485 0.578 0.6235 -1.6203]\n", + "closed : [-0.1485 0.578 0.6235 -1.6203]\n", + "obj : -1.1528769470451996 vs closed -1.1528769477540548\n", + "on bdry : ||x-a|| = 0.6999999994473081 (= r = 0.7 )\n" + ] + } + ], + "source": [ + "n = 4\n", + "rng = np.random.default_rng(1)\n", + "a = rng.standard_normal(n)\n", + "c = rng.standard_normal(n)\n", + "r_ball = 0.7\n", + "\n", + "G = np.vstack([np.zeros((1, n)), np.eye(n)]) # (n+1) x n\n", + "h = np.concatenate([[r_ball], a])\n", + "res = solve_socp(c=c, G=G, h=h, cones=[(\"soc\", n + 1)])\n", + "\n", + "x_star = a - r_ball * c / np.linalg.norm(c)\n", + "print(\"status :\", res.status)\n", + "print(\"x :\", res.x)\n", + "print(\"closed :\", x_star)\n", + "print(\"obj :\", res.obj, \" vs closed\", float(c @ a - r_ball * np.linalg.norm(c)))\n", + "print(\"on bdry : ||x-a|| =\", np.linalg.norm(res.x - a), \" (= r =\", r_ball, \")\")\n", + "assert res.success and np.allclose(res.x, x_star, atol=1e-6)" + ] + }, + { + "cell_type": "markdown", + "id": "a05c6da3", + "metadata": {}, + "source": [ + "## 3. Constrained least squares (SOCP epigraph of a 2-norm)\n", + "\n", + "$$\\min_x\\; \\|Mx - d\\|_2 \\quad\\text{s.t.}\\quad \\mathbf 1^\\top x = 1.$$\n", + "Epigraph: minimize $t$ with $(t,\\ Mx - d)\\in\\mathcal Q$ and the equality\n", + "$\\mathbf 1^\\top x = 1$. We compare against the analytic equality-constrained\n", + "least-squares solution (a KKT linear system)." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "df9b9707", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:13:08.663447Z", + "iopub.status.busy": "2026-05-31T16:13:08.663211Z", + "iopub.status.idle": "2026-05-31T16:13:08.673365Z", + "shell.execute_reply": "2026-05-31T16:13:08.671944Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "status : optimal\n", + "x : [0.5684 0.3224 0.1092]\n", + "ref : [0.5684 0.3224 0.1092]\n", + "t = ||Mx-d|| : 1.1640359039248922 vs 1.1640359009413206\n" + ] + } + ], + "source": [ + "rng = np.random.default_rng(2)\n", + "m, n = 6, 3\n", + "M = rng.standard_normal((m, n))\n", + "d = rng.standard_normal(m)\n", + "\n", + "# decision vars: (t, x_0..x_{n-1}); slack s = (t, d - M x) in SOC(m+1).\n", + "nv = 1 + n\n", + "c = np.zeros(nv); c[0] = 1.0\n", + "G = np.zeros((m + 1, nv))\n", + "G[0, 0] = -1.0 # s_0 = t\n", + "G[1:, 1:] = M # s_i = d_i - (M x)_i\n", + "h = np.concatenate([[0.0], d])\n", + "A = np.concatenate([[0.0], np.ones(n)])[None, :] # sum(x) = 1\n", + "res = solve_socp(c=c, G=G, h=h, A=A, b=[1.0], cones=[(\"soc\", m + 1)])\n", + "\n", + "# Analytic equality-constrained least squares via the normal-equation KKT.\n", + "MtM = M.T @ M\n", + "KKT = np.block([[MtM, np.ones((n, 1))], [np.ones((1, n)), np.zeros((1, 1))]])\n", + "rhs = np.concatenate([M.T @ d, [1.0]])\n", + "x_ref = np.linalg.solve(KKT, rhs)[:n]\n", + "print(\"status :\", res.status)\n", + "print(\"x :\", res.x[1:])\n", + "print(\"ref :\", x_ref)\n", + "print(\"t = ||Mx-d|| :\", res.x[0], \" vs\", np.linalg.norm(M @ x_ref - d))\n", + "assert res.success and np.allclose(res.x[1:], x_ref, atol=1e-6)" + ] + }, + { + "cell_type": "markdown", + "id": "e597e934", + "metadata": {}, + "source": [ + "## 4. Mixed cones\n", + "\n", + "Cones **compose**: a `cones=[(\"nonneg\", k), (\"soc\", m)]` partition puts the\n", + "first $k$ slacks in $\\mathbb R^k_+$ and the next $m$ in a second-order cone.\n", + "\n", + "Here we minimize $-x_0 - x_1$ over the unit ball $\\|x\\|\\le 1$ (a 3-row SOC\n", + "slack $(1, x_0, x_1)$) *and* the linear cut $x_1 \\le 0.5$ (a 1-row nonnegative\n", + "slack). We verify feasibility and KKT stationarity from the returned duals." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e33e17e9", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:13:08.675657Z", + "iopub.status.busy": "2026-05-31T16:13:08.675266Z", + "iopub.status.idle": "2026-05-31T16:13:08.683755Z", + "shell.execute_reply": "2026-05-31T16:13:08.682447Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "status : optimal x = [0.866 0.5 ]\n", + "nonneg slack (>=0) : 3.6646204959467354e-09\n", + "soc slack (t>=||u||) : 1.0 >= 0.9999999999662506\n", + "stationarity ||c+G^T z||: 8.005932084973442e-16\n" + ] + } + ], + "source": [ + "# rows: [ nonneg: 0.5 - x1 >= 0 ] then [ soc: (1, x0, x1) ]\n", + "G = np.array([\n", + " [0.0, 1.0], # nonneg slack s0 = 0.5 - x1\n", + " [0.0, 0.0], # soc s0 = 1 (constant)\n", + " [-1.0, 0.0], # soc s1 = x0\n", + " [0.0, -1.0], # soc s2 = x1\n", + "])\n", + "h = np.array([0.5, 1.0, 0.0, 0.0])\n", + "c = np.array([-1.0, -1.0])\n", + "res = solve_socp(c=c, G=G, h=h, cones=[(\"nonneg\", 1), (\"soc\", 3)])\n", + "\n", + "x = res.x\n", + "s = h - G @ x\n", + "print(\"status :\", res.status, \" x =\", x)\n", + "print(\"nonneg slack (>=0) :\", s[0])\n", + "print(\"soc slack (t>=||u||) :\", s[1], \">=\", np.linalg.norm(s[2:]))\n", + "# KKT stationarity: c + G^T z = 0 (no P, no A here)\n", + "print(\"stationarity ||c+G^T z||:\", np.linalg.norm(c + G.T @ res.z))\n", + "assert res.success and s[0] > -1e-7 and s[1] + 1e-7 >= np.linalg.norm(s[2:])\n", + "assert np.linalg.norm(c + G.T @ res.z) < 1e-6" + ] + }, + { + "cell_type": "markdown", + "id": "783c0508", + "metadata": {}, + "source": [ + "## 5. A larger cone (sparse KKT)\n", + "\n", + "Large second-order cones use a **diagonal-plus-rank-1** KKT representation —\n", + "one auxiliary variable per cone (the ECOS/Clarabel \"sparse SOC\" trick) — so\n", + "the factorization stays sparse instead of dropping a dense $m\\times m$ block.\n", + "We solve the ball problem of §2 at dimension $n = 50$ and confirm the same\n", + "closed form." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b6491ae6", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:13:08.685813Z", + "iopub.status.busy": "2026-05-31T16:13:08.685618Z", + "iopub.status.idle": "2026-05-31T16:13:08.694928Z", + "shell.execute_reply": "2026-05-31T16:13:08.693666Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "status : optimal iters: 9\n", + "max |x - closed form| : 1.2557457296225039e-09\n", + "obj : -3.315024727719524 vs closed -3.315024727917102\n" + ] + } + ], + "source": [ + "n = 50\n", + "rng = np.random.default_rng(3)\n", + "a = rng.standard_normal(n)\n", + "c = rng.standard_normal(n)\n", + "r_ball = 1.3\n", + "\n", + "G = np.vstack([np.zeros((1, n)), np.eye(n)])\n", + "h = np.concatenate([[r_ball], a])\n", + "res = solve_socp(c=c, G=G, h=h, cones=[(\"soc\", n + 1)])\n", + "\n", + "x_star = a - r_ball * c / np.linalg.norm(c)\n", + "print(\"status :\", res.status, \" iters:\", res.iters)\n", + "print(\"max |x - closed form| :\", np.max(np.abs(res.x - x_star)))\n", + "print(\"obj :\", res.obj, \" vs closed\", float(c @ a - r_ball * np.linalg.norm(c)))\n", + "assert res.success and np.allclose(res.x, x_star, atol=1e-5)" + ] + }, + { + "cell_type": "markdown", + "id": "ee4e9d21", + "metadata": {}, + "source": [ + "## Where next\n", + "\n", + "- **`15_differentiable_convex.ipynb`** — differentiate these SOCP solutions\n", + " w.r.t. their data $P, c, G, h, A, b$ with `pounce.jax.solve_socp`.\n", + "- The [Convex Solver chapter](../../docs/src/convex-solver.md) covers the\n", + " cone abstraction, warm starting, and the sparse-KKT representation in\n", + " more detail." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python/notebooks/15_differentiable_convex.ipynb b/python/notebooks/15_differentiable_convex.ipynb new file mode 100644 index 00000000..5f630f7a --- /dev/null +++ b/python/notebooks/15_differentiable_convex.ipynb @@ -0,0 +1,529 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b4c12984", + "metadata": {}, + "source": [ + "# Differentiable convex optimization with `pounce.jax`\n", + "\n", + "`pounce.jax` exposes the convex solve as a **differentiable JAX op**. The\n", + "forward pass calls the solver; the backward pass differentiates the\n", + "*solution* w.r.t. the problem data by applying the implicit-function theorem\n", + "to the KKT system at the optimum (Amos & Kolter, *OptNet*, 2017). This lets\n", + "you drop a QP or SOCP inside a larger JAX model and get exact gradients from\n", + "`jax.grad` / `jax.jacrev`, and batch with `vmap`/`.batch`.\n", + "\n", + "This notebook builds up:\n", + "1. `solve_qp` forward + a gradient, checked against finite differences,\n", + "2. the full Jacobian with `jax.jacrev`,\n", + "3. gradients w.r.t. the **matrices** $P, G$ — the full OptNet rule,\n", + "4. `QpLayer` in a tiny learning loop, and a parallel `.batch`,\n", + "5. `solve_socp` — differentiating a second-order cone program.\n", + "\n", + "> `pounce.jax` enables float64 on import (the implicit-diff KKT solve needs\n", + "> the precision). Gradients are validated against finite differences in the\n", + "> POUNCE test suite; we reproduce a couple of those checks here." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "bbe1e7f0", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:14:41.126088Z", + "iopub.status.busy": "2026-05-31T16:14:41.125842Z", + "iopub.status.idle": "2026-05-31T16:14:41.597740Z", + "shell.execute_reply": "2026-05-31T16:14:41.596090Z" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import jax\n", + "import jax.numpy as jnp\n", + "from pounce.jax import solve_qp, solve_socp, QpLayer\n", + "\n", + "np.set_printoptions(precision=5, suppress=True)\n", + "\n", + "def fd_grad(f, x, eps=1e-6):\n", + " \"\"\"Central finite-difference gradient of a scalar f at vector x.\"\"\"\n", + " x = np.asarray(x, float)\n", + " g = np.zeros_like(x)\n", + " for i in range(x.size):\n", + " e = np.zeros_like(x); e[i] = eps\n", + " g[i] = (f(x + e) - f(x - e)) / (2 * eps)\n", + " return g" + ] + }, + { + "cell_type": "markdown", + "id": "f947e475", + "metadata": {}, + "source": [ + "## 1. A parametric QP, and its gradient\n", + "\n", + "Equality-constrained QP (smooth in its data — clean for a gradient check):\n", + "$$x^\\*(c) = \\arg\\min_x \\tfrac12 x^\\top P x + c^\\top x \\quad\\text{s.t.}\\quad A x = b.$$\n", + "Define a scalar loss $\\ell(c) = \\tfrac12\\|x^\\*(c) - x_{\\text{tgt}}\\|^2$ and\n", + "compare `jax.grad` to finite differences." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2f4a3639", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:14:41.600688Z", + "iopub.status.busy": "2026-05-31T16:14:41.600342Z", + "iopub.status.idle": "2026-05-31T16:14:43.420764Z", + "shell.execute_reply": "2026-05-31T16:14:43.418928Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "x*(c0) : [0.75 0.25]\n", + "loss : 0.20249999999999996\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "grad (implicit diff): [-0.225 0.225]\n", + "grad (finite diff) : [-0.225 0.225]\n" + ] + } + ], + "source": [ + "P = jnp.array([[3.0, 0.5], [0.5, 2.0]])\n", + "A = jnp.array([[1.0, 1.0]])\n", + "b = jnp.array([1.0])\n", + "x_tgt = jnp.array([0.3, 0.7])\n", + "\n", + "def x_star(c):\n", + " return solve_qp(P=P, c=c, A=A, b=b)\n", + "\n", + "def loss(c):\n", + " return 0.5 * jnp.sum((x_star(c) - x_tgt) ** 2)\n", + "\n", + "c0 = jnp.array([-1.0, 0.5])\n", + "print(\"x*(c0) :\", np.asarray(x_star(c0)))\n", + "print(\"loss :\", float(loss(c0)))\n", + "\n", + "g_ad = np.asarray(jax.grad(loss)(c0))\n", + "g_fd = fd_grad(lambda c: float(loss(jnp.asarray(c))), np.asarray(c0))\n", + "print(\"grad (implicit diff):\", g_ad)\n", + "print(\"grad (finite diff) :\", g_fd)\n", + "assert np.allclose(g_ad, g_fd, atol=1e-5)" + ] + }, + { + "cell_type": "markdown", + "id": "774ef0bf", + "metadata": {}, + "source": [ + "## 2. The full solution Jacobian with `jax.jacrev`\n", + "\n", + "$\\partial x^\\*/\\partial c$ is a $2\\times2$ matrix. `jax.jacrev` differentiates\n", + "the vector-valued solve directly." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f3e5f1d0", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:14:43.423873Z", + "iopub.status.busy": "2026-05-31T16:14:43.423439Z", + "iopub.status.idle": "2026-05-31T16:14:44.661941Z", + "shell.execute_reply": "2026-05-31T16:14:44.660450Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "d x* / d c :\n", + " [[-0.25 0.25]\n", + " [ 0.25 -0.25]]\n", + "finite-diff Jacobian :\n", + " [[-0.25 0.25]\n", + " [ 0.25 -0.25]]\n" + ] + } + ], + "source": [ + "J = np.asarray(jax.jacrev(x_star)(c0))\n", + "print(\"d x* / d c :\\n\", J)\n", + "\n", + "# column-by-column finite-difference check\n", + "J_fd = np.zeros((2, 2))\n", + "for j in range(2):\n", + " e = np.zeros(2); e[j] = 1e-6\n", + " J_fd[:, j] = (np.asarray(x_star(c0 + e)) - np.asarray(x_star(c0 - e))) / 2e-6\n", + "print(\"finite-diff Jacobian :\\n\", J_fd)\n", + "assert np.allclose(J, J_fd, atol=1e-5)" + ] + }, + { + "cell_type": "markdown", + "id": "9087d06d", + "metadata": {}, + "source": [ + "## 3. Gradients w.r.t. the matrices $P$ and $G$\n", + "\n", + "OptNet gives gradients w.r.t. **every** datum that enters the optimum — not\n", + "just the vectors $c, b, h$ but the matrices $P, G, A$ too ($\\nabla P$ is the\n", + "symmetric gradient). Here we differentiate the loss w.r.t. a quadratic\n", + "penalty matrix $P$ and an inequality matrix $G$. We tighten the first bound\n", + "so that inequality is **active** at the optimum — otherwise an inactive\n", + "constraint contributes nothing and $\\nabla G$ would (correctly) be zero." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d75bb194", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:14:44.665651Z", + "iopub.status.busy": "2026-05-31T16:14:44.665321Z", + "iopub.status.idle": "2026-05-31T16:14:45.664569Z", + "shell.execute_reply": "2026-05-31T16:14:45.663115Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dloss/dP :\n", + " [[0. 0.0075 ]\n", + " [0.0075 0.04125]]\n", + "dloss/dG :\n", + " [[0.0125 0.04375]\n", + " [0. 0. ]] <- nonzero: row 0 is active\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dloss/dP[0,0]: AD = 2.8287636082383194e-11 FD = 9.8879238130678e-11\n", + "dloss/dG[0,0]: AD = 0.012499999979282915 FD = 0.012499999825152375\n" + ] + } + ], + "source": [ + "G = jnp.array([[1.0, 0.0], [0.0, 1.0]])\n", + "h = jnp.array([0.2, 0.8]) # row 0 active at the optimum, row 1 slack\n", + "\n", + "def loss_PG(P, G):\n", + " x = solve_qp(P=P, c=jnp.array([-1.0, -1.2]), G=G, h=h)\n", + " return 0.5 * jnp.sum((x - x_tgt) ** 2)\n", + "\n", + "gP, gG = jax.grad(loss_PG, argnums=(0, 1))(P, G)\n", + "print(\"dloss/dP :\\n\", np.asarray(gP))\n", + "print(\"dloss/dG :\\n\", np.asarray(gG), \" <- nonzero: row 0 is active\")\n", + "\n", + "# Spot-check entries of both matrix gradients against finite differences.\n", + "def perturbed(P00=None, G00=None):\n", + " Pp = P if P00 is None else P.at[0, 0].set(P00)\n", + " Gp = G if G00 is None else G.at[0, 0].set(G00)\n", + " return float(loss_PG(Pp, Gp))\n", + "\n", + "fdP = (perturbed(P00=P[0, 0] + 1e-6) - perturbed(P00=P[0, 0] - 1e-6)) / 2e-6\n", + "fdG = (perturbed(G00=G[0, 0] + 1e-6) - perturbed(G00=G[0, 0] - 1e-6)) / 2e-6\n", + "print(\"dloss/dP[0,0]: AD =\", float(gP[0, 0]), \" FD =\", fdP)\n", + "print(\"dloss/dG[0,0]: AD =\", float(gG[0, 0]), \" FD =\", fdG)\n", + "assert abs(float(gP[0, 0]) - fdP) < 1e-4\n", + "assert abs(float(gG[0, 0]) - fdG) < 1e-4" + ] + }, + { + "cell_type": "markdown", + "id": "63c22b0b", + "metadata": {}, + "source": [ + "## 4. `QpLayer`: fixed structure inside a learning loop\n", + "\n", + "`QpLayer` captures `P`/`G`/`A` once and is called with the varying\n", + "`c`/`b`/`h`. It composes with `jax.grad`, `jax.jit`, and `vmap`. Here we run\n", + "a few steps of gradient descent on `c` so the QP's solution tracks a moving\n", + "target — a stand-in for training a QP layer end-to-end." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2f935c5d", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:14:45.668231Z", + "iopub.status.busy": "2026-05-31T16:14:45.667928Z", + "iopub.status.idle": "2026-05-31T16:14:45.971048Z", + "shell.execute_reply": "2026-05-31T16:14:45.969548Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "step 0: loss = 5.625e-03, x* = [0.36562 0.63437]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "step 2: loss = 3.297e-03, x* = [0.35024 0.64976]\n", + "step 4: loss = 1.933e-03, x* = [0.33847 0.66153]\n", + "step 6: loss = 1.133e-03, x* = [0.32945 0.67055]\n", + "step 7: loss = 8.674e-04, x* = [0.32577 0.67423]\n", + "target: [0.3 0.7]\n" + ] + } + ], + "source": [ + "layer = QpLayer(P=P, A=A) # equality-constrained QP layer\n", + "\n", + "@jax.jit\n", + "def step(c, lr=0.5):\n", + " def L(c):\n", + " return 0.5 * jnp.sum((layer(c, b=b) - x_tgt) ** 2)\n", + " return c - lr * jax.grad(L)(c), L(c)\n", + "\n", + "c = jnp.array([0.0, 0.0])\n", + "for k in range(8):\n", + " c, Lk = step(c)\n", + " if k % 2 == 0 or k == 7:\n", + " print(f\"step {k}: loss = {float(Lk):.3e}, x* = {np.asarray(layer(c, b=b))}\")\n", + "print(\"target:\", np.asarray(x_tgt))" + ] + }, + { + "cell_type": "markdown", + "id": "eb756176", + "metadata": {}, + "source": [ + "### Parallel batch through the layer\n", + "\n", + "`layer.batch(cs)` solves a batch (shape `(B, n)` of linear terms) on the\n", + "rayon-parallel path and is differentiable — gradients to the shared `P`/`A`\n", + "sum over the batch, gradients to `c` stay per-row." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "79f5d95d", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:14:45.973633Z", + "iopub.status.busy": "2026-05-31T16:14:45.973352Z", + "iopub.status.idle": "2026-05-31T16:14:47.427886Z", + "shell.execute_reply": "2026-05-31T16:14:47.426447Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "batch solutions:\n", + " [[0.75 0.25 ]\n", + " [0.375 0.625]\n", + " [0.075 0.925]]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "d(batch loss)/d cs:\n", + " [[-0.45 0.45 ]\n", + " [-0.075 0.075]\n", + " [ 0.225 -0.225]]\n" + ] + } + ], + "source": [ + "cs = jnp.array([[-1.0, 0.5], [-0.5, -0.5], [0.2, -1.0]])\n", + "xs = layer.batch(cs, b=b)\n", + "print(\"batch solutions:\\n\", np.asarray(xs))\n", + "\n", + "# differentiable: gradient of the summed batch loss w.r.t. the batched c's\n", + "def batch_loss(cs):\n", + " return jnp.sum((layer.batch(cs, b=b) - x_tgt) ** 2)\n", + "gcs = jax.grad(batch_loss)(cs)\n", + "print(\"d(batch loss)/d cs:\\n\", np.asarray(gcs))" + ] + }, + { + "cell_type": "markdown", + "id": "84995cdb", + "metadata": {}, + "source": [ + "## 5. Differentiating an SOCP\n", + "\n", + "`solve_socp` differentiates a second-order cone program — the\n", + "complementarity row uses the cone's **arrow operator** in place of the\n", + "orthant's diagonal. We use the closed-form ball problem\n", + "$x^\\*(c) = a - r\\,c/\\|c\\|$ (minimize $c^\\top x$ s.t. $\\|x-a\\|\\le r$), whose\n", + "Jacobian we know analytically:\n", + "$$\\frac{\\partial x^\\*}{\\partial c}\n", + "= -\\frac{r}{\\|c\\|}\\Big(I - \\frac{c\\,c^\\top}{\\|c\\|^2}\\Big).$$" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "dd441ea7", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:14:47.430632Z", + "iopub.status.busy": "2026-05-31T16:14:47.430380Z", + "iopub.status.idle": "2026-05-31T16:14:49.601967Z", + "shell.execute_reply": "2026-05-31T16:14:49.600425Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "x* : [ 0.10721 0.58558 -0.0964 ]\n", + "closed : [ 0.10721 0.58558 -0.0964 ]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Jacobian (implicit diff):\n", + " [[-0.31797 -0.14964 0.03741]\n", + " [-0.14964 -0.09352 -0.07482]\n", + " [ 0.03741 -0.07482 -0.37409]]\n", + "Jacobian (closed form) :\n", + " [[-0.31797 -0.14964 0.03741]\n", + " [-0.14964 -0.09352 -0.07482]\n", + " [ 0.03741 -0.07482 -0.37409]]\n" + ] + } + ], + "source": [ + "n = 3\n", + "a = jnp.array([0.5, -0.2, 0.1])\n", + "r_ball = 0.9\n", + "G = jnp.vstack([jnp.zeros((1, n)), jnp.eye(n)]) # (n+1) x n\n", + "\n", + "def socp_x(c):\n", + " h = jnp.concatenate([jnp.array([r_ball]), a])\n", + " return solve_socp(P=jnp.zeros((n, n)), c=c, G=G, h=h, cones=[(\"soc\", n + 1)])\n", + "\n", + "c0 = jnp.array([1.0, -2.0, 0.5])\n", + "x_cf = np.asarray(a) - r_ball * np.asarray(c0) / np.linalg.norm(np.asarray(c0))\n", + "print(\"x* :\", np.asarray(socp_x(c0)))\n", + "print(\"closed :\", x_cf)\n", + "\n", + "J_ad = np.asarray(jax.jacrev(socp_x)(c0))\n", + "cn = np.asarray(c0); nrm = np.linalg.norm(cn)\n", + "J_cf = -r_ball / nrm * (np.eye(n) - np.outer(cn, cn) / nrm**2)\n", + "print(\"Jacobian (implicit diff):\\n\", J_ad)\n", + "print(\"Jacobian (closed form) :\\n\", J_cf)\n", + "assert np.allclose(J_ad, J_cf, atol=1e-5)" + ] + }, + { + "cell_type": "markdown", + "id": "d52c3b66", + "metadata": {}, + "source": [ + "A scalar SOCP loss, end-to-end through `jax.grad`, checked against finite\n", + "differences for good measure." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "92eb534a", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-31T16:14:49.604510Z", + "iopub.status.busy": "2026-05-31T16:14:49.604259Z", + "iopub.status.idle": "2026-05-31T16:14:50.938849Z", + "shell.execute_reply": "2026-05-31T16:14:50.937221Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "grad (implicit diff): [-0.25064 -0.12719 -0.00748]\n", + "grad (finite diff) : [-0.25064 -0.12719 -0.00748]\n" + ] + } + ], + "source": [ + "def socp_loss(c):\n", + " return jnp.sum(socp_x(c) ** 2)\n", + "\n", + "g_ad = np.asarray(jax.grad(socp_loss)(c0))\n", + "g_fd = fd_grad(lambda c: float(socp_loss(jnp.asarray(c))), np.asarray(c0), eps=1e-6)\n", + "print(\"grad (implicit diff):\", g_ad)\n", + "print(\"grad (finite diff) :\", g_fd)\n", + "assert np.allclose(g_ad, g_fd, atol=1e-4)" + ] + }, + { + "cell_type": "markdown", + "id": "14318af0", + "metadata": {}, + "source": [ + "## Recap\n", + "\n", + "- `solve_qp` / `solve_socp` are JAX-differentiable w.r.t. **all** data\n", + " ($P, c, G, h, A, b$) via OptNet implicit differentiation.\n", + "- Use `jax.grad` for scalar losses, `jax.jacrev` for the full solution\n", + " Jacobian, and `QpLayer` to embed a fixed-structure problem in a model.\n", + "- `layer.batch` / `solve_qp_batch` run rayon-parallel and stay\n", + " differentiable.\n", + "\n", + "See the [Convex Solver chapter](../../docs/src/convex-solver.md) for the\n", + "math and the [Acknowledgments](../../docs/src/acknowledgments.md) for the\n", + "Clarabel / PaPILO / ECOS / OptNet lineage." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python/pounce/__init__.py b/python/pounce/__init__.py index e02f39d8..7f2b5594 100644 --- a/python/pounce/__init__.py +++ b/python/pounce/__init__.py @@ -1,12 +1,16 @@ -"""Python interface to POUNCE — a pure-Rust port of Ipopt. +"""Python interface to POUNCE — a pure-Rust interior-point optimization solver. -The public surface is intentionally cyipopt-compatible: Problem class -construction, ``add_option``, and ``solve`` accept the same arguments -and return the same shape of result. A scipy-style ``minimize`` facade -is also provided. JAX integration (autodiff-built derivatives, implicit -differentiation through ``x*(p)``) lives in the ``pounce.jax`` -submodule and is only imported on demand to avoid pulling in JAX when -it is not installed. +POUNCE began as a port of Ipopt and has grown into a family of solvers +sharing one numerical backbone. The nonlinear-programming surface is +intentionally cyipopt-compatible: Problem class construction, +``add_option``, and ``solve`` accept the same arguments and return the +same shape of result, with a scipy-style ``minimize`` facade alongside. +Convex and conic programs (LP, QP, SOCP, exponential / power cones, small +SDP) are exposed through ``solve_qp`` / ``solve_socp``; polynomial global +optimization through ``sos_minimize``. JAX integration (autodiff-built +derivatives, implicit differentiation through ``x*(p)``) lives in the +``pounce.jax`` submodule and is only imported on demand to avoid pulling +in JAX when it is not installed. """ from ._pounce import ( @@ -19,8 +23,20 @@ find_critical_points, find_saddles, reaction_network, CriticalPoint, CriticalPointResult, Connection, ReactionNetwork, ) +from .qp import ( + QpResult, + QpFactorization, + QpSensitivity, + ReducedHessian, + solve_qp, + solve_socp, + solve_qp_batch, + solve_qp_multi_rhs, +) +from .sos import sos_minimize, SosResult __all__ = [ + # Nonlinear programming (cyipopt-compatible) "Problem", "Solver", "NlProblem", @@ -40,5 +56,17 @@ "Connection", "ReactionNetwork", "classify_working_set", + # Convex QP / SOCP (the same solvers also live under ``pounce.qp``) + "QpResult", + "QpFactorization", + "QpSensitivity", + "ReducedHessian", + "solve_qp", + "solve_socp", + "solve_qp_batch", + "solve_qp_multi_rhs", + # Polynomial global optimization (SOS / Lasserre) + "sos_minimize", + "SosResult", "__version__", ] diff --git a/python/pounce/_minimize.py b/python/pounce/_minimize.py index 20d39272..3d3a9975 100644 --- a/python/pounce/_minimize.py +++ b/python/pounce/_minimize.py @@ -23,9 +23,20 @@ import numpy as np from ._pounce import Problem +from ._route import classify_and_extract _EPS = float(np.finfo(np.float64).eps) ** 0.5 +# Convex-solver status string → scipy-style integer status (0 == success), +# matching the NLP path's convention. +_QP_STATUS_CODE = { + "optimal": 0, + "primal_infeasible": 2, + "dual_infeasible": 3, + "iteration_limit": 1, + "numerical_failure": 4, +} + @dataclass class OptimizeResult: @@ -241,6 +252,43 @@ def hessian(self, x, lam, obj_factor): return cls() +def _solve_via_convex(ex, opts: dict) -> OptimizeResult: + """Adapt a routed convex LP/QP solve back into an :class:`OptimizeResult`. + + The convex solver minimizes ``½xᵀPx + cᵀx`` and never sees the objective's + degree-0 term, so we add ``ex.obj_const`` back to the reported value (the + same constant the CLI threads through ``run_convex_qp``). The result shape + is identical to the NLP path so the router is transparent to callers. + """ + from .qp import solve_qp + + res = solve_qp( + P=ex.P, c=ex.c, A=ex.A, b=ex.b, G=ex.G, h=ex.h, lb=ex.lb, ub=ex.ub, + tol=opts.get("tol"), max_iter=opts.get("max_iter"), + ) + fun_val = float(res.obj) + ex.obj_const + success = res.status == "optimal" + selector = "lp-ipm" if ex.kind == "lp" else "qp-ipm" + return OptimizeResult( + x=np.asarray(res.x), + fun=fun_val, + success=success, + status=_QP_STATUS_CODE.get(res.status, 1), + message=res.status, + nit=int(res.iters), + info={ + "solver": selector, + "problem_class": ex.kind, + "obj_val": fun_val, + "obj_constant": ex.obj_const, + "status": res.status, + "status_msg": res.status, + "iter_count": int(res.iters), + "residuals": res.residuals, + }, + ) + + def minimize( fun: Callable[[np.ndarray], float], x0: np.ndarray, @@ -250,7 +298,22 @@ def minimize( constraints: Sequence | dict | None = None, options: Mapping[str, Any] | None = None, ) -> OptimizeResult: - """scipy.optimize.minimize-style facade over pounce.""" + """scipy.optimize.minimize-style facade over pounce. + + Solver routing mirrors the CLI's ``solver_selection``. By default + (``options={"solver_selection": "auto"}``) the problem is probed for + structure: a linear or convex-quadratic objective with only linear + constraints is dispatched to the specialized convex LP/QP interior-point + solver (``pounce.solve_qp``), and everything else falls through to the + general NLP filter-IPM. Detection is conservative and validated against + the true callables at held-out points, so a nonlinear problem is never + silently sent to the QP solver. Override with ``"solver_selection"``: + + * ``"auto"`` (default) — route LP/convex-QP to the convex solver, else NLP; + * ``"nlp"`` — always use the NLP solver (the pre-routing behavior); + * ``"lp-ipm"`` / ``"qp-ipm"`` — force the convex solver, raising + ``ValueError`` if the problem is not detected as an LP / convex QP. + """ # Promote a scalar / 0-d x0 to 1-D, matching scipy.optimize.minimize, so a # single-variable problem can be written ``minimize(f, 1.5)``. x0 = np.atleast_1d(_to_array(x0)) @@ -258,6 +321,30 @@ def minimize( lb, ub = _normalize_bounds(bounds, n) m, g_combined, jac_combined, cl, cu = _wrap_constraints(constraints, n) + # Solver routing (mirrors the CLI's `solver_selection`). Pop the routing + # keys so the remainder of `options` still flows to the NLP solver. + opts = dict(options) if options else {} + selection = str(opts.pop("solver_selection", "auto")).lower() + route_tol = float(opts.pop("route_tol", 1e-5)) + if selection in ("auto", "lp-ipm", "qp-ipm"): + extract = classify_and_extract( + fun=fun, jac=jac, hess=hess, lb=lb, ub=ub, m=m, + g_combined=g_combined, jac_combined=jac_combined, + cl=cl, cu=cu, x0=x0, rtol=route_tol, + ) + if selection == "lp-ipm" and (extract is None or extract.kind != "lp"): + raise ValueError( + "solver_selection='lp-ipm' but the problem was not detected as " + "a linear program (linear objective + linear constraints)" + ) + if selection == "qp-ipm" and extract is None: + raise ValueError( + "solver_selection='qp-ipm' but the problem was not detected as " + "a convex LP/QP (convex-quadratic objective + linear constraints)" + ) + if extract is not None: + return _solve_via_convex(extract, opts) + problem_obj = _build_problem_obj( fun=fun, n=n, @@ -277,9 +364,10 @@ def minimize( cl=cl, cu=cu, ) - if options: - for k, v in options.items(): - problem.add_option(k, v) + # `opts` is `options` minus the routing keys (`solver_selection`, + # `route_tol`), so only genuine solver options reach the NLP backend. + for k, v in opts.items(): + problem.add_option(k, v) x, info = problem.solve(x0=x0) return OptimizeResult( diff --git a/python/pounce/_route.py b/python/pounce/_route.py new file mode 100644 index 00000000..4f002195 --- /dev/null +++ b/python/pounce/_route.py @@ -0,0 +1,291 @@ +"""Structure detection + extraction to auto-route a scipy-style +:func:`pounce.minimize` problem to the specialized convex LP/QP solver. + +The CLI classifies a problem by walking its symbolic ``.nl`` expression tree, +so its routing is *certain*. ``minimize`` takes opaque Python callables +(``fun``/``jac``/``hess`` and constraint functions), so we cannot read the +structure — we have to **probe** the callables at several points, fit a +linear/quadratic model, and then **validate** that model against the true +functions at held-out points before trusting it. + +Detection is deliberately conservative. The two misclassification directions +are not symmetric: + +* a convex LP/QP routed to the NLP solver is merely *slower* — the filter-IPM + solves convex QPs correctly; +* a genuinely nonlinear problem routed to the QP solver returns a **silently + wrong** answer. + +So the held-out validation gates the dangerous direction: any probe that +raises, any model mismatch beyond tolerance, a non-constant Hessian/Jacobian, +or an indefinite Hessian (nonconvex QP) all fall back to ``None`` — meaning +"let the general NLP solver handle it." +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Callable, Optional + +import numpy as np + +_EPS = float(np.finfo(np.float64).eps) +# Central-difference steps: ~eps^(1/2) for a first derivative (gradient) and +# ~eps^(1/3) for a second derivative (Hessian), the usual optimal balances of +# truncation vs. round-off error. +_H_GRAD = _EPS**0.5 +_H_HESS = _EPS ** (1.0 / 3.0) + + +@dataclass +class QpExtract: + """A convex LP/QP recovered from the callable problem. + + ``kind`` is ``"lp"`` (``P is None``) or ``"convex_qp"``. The objective is + ``½ xᵀP x + cᵀx + obj_const``; ``obj_const`` is the degree-0 term that the + QP solver does not see and must be added back to the reported value. + Equality block is ``A x = b``, inequality block ``G x ≤ h``, with box + ``lb ≤ x ≤ ub`` (either may be ``None``). + """ + + kind: str + P: Optional[np.ndarray] + c: np.ndarray + obj_const: float + A: Optional[np.ndarray] + b: Optional[np.ndarray] + G: Optional[np.ndarray] + h: Optional[np.ndarray] + lb: Optional[np.ndarray] + ub: Optional[np.ndarray] + + +class _NotConvex(Exception): + """Internal sentinel: the problem is not a confidently-convex LP/QP.""" + + +def _grad_fn(fun: Callable, jac: Optional[Callable]) -> Callable: + """Return a gradient callable: the user's ``jac`` if given, else a + central finite-difference of ``fun`` (central, not forward, because the + structure tests want the extra accuracy).""" + if jac is not None: + return lambda x: np.asarray(jac(x), dtype=np.float64).ravel() + + def g(x): + out = np.empty(x.size) + for i in range(x.size): + step = _H_GRAD * max(1.0, abs(x[i])) + xp = x.copy() + xm = x.copy() + xp[i] += step + xm[i] -= step + out[i] = (float(fun(xp)) - float(fun(xm))) / (2.0 * step) + return out + + return g + + +def _hessian(grad: Callable, x: np.ndarray, hess: Optional[Callable]) -> np.ndarray: + """Symmetric Hessian at ``x`` — the user's ``hess`` if given, else a + central finite-difference of the gradient.""" + if hess is not None: + return np.asarray(hess(x), dtype=np.float64).reshape(x.size, x.size) + n = x.size + H = np.empty((n, n)) + for j in range(n): + step = _H_HESS * max(1.0, abs(x[j])) + xp = x.copy() + xm = x.copy() + xp[j] += step + xm[j] -= step + H[:, j] = (grad(xp) - grad(xm)) / (2.0 * step) + return 0.5 * (H + H.T) + + +def _probe_points(x0, lb, ub, rng, k=5): + """``x0`` plus ``k`` random in-domain probe points. + + Steps are scaled to the box width (when finite) or to ``max(1, |x0|)``, + and clipped back into ``[lb, ub]`` so we never evaluate the user's + functions outside their declared domain (a log-barrier objective, say). + The first point is the anchor used to read off coefficients; the rest are + held out for validation. + """ + n = x0.size + if lb is not None and ub is not None: + width = ub - lb + finite = np.isfinite(width) + span = np.where(finite, np.maximum(width, 1e-6) * 0.25, + np.maximum(np.abs(x0), 1.0)) + else: + span = np.maximum(np.abs(x0), 1.0) + pts = [x0.copy()] + for _ in range(k): + p = x0 + span * rng.standard_normal(n) + if lb is not None: + p = np.maximum(p, lb) + if ub is not None: + p = np.minimum(p, ub) + pts.append(p) + return pts + + +def _objective_model(fun, grad, hess, probes): + """Fit the objective to ``c·x + d`` (LP) or ``½xᵀPx + c·x + d`` (QP). + + Returns ``(P_or_None, c, d)``; raises :class:`_NotConvex` if the gradient + is not affine-consistent enough to be a quadratic with a *constant* + Hessian. The quadratic vs. linear vs. nonlinear decision is finalized by + the held-out validation in :func:`classify_and_extract`. + """ + anchor = probes[0] + grads = [grad(p) for p in probes] + g0 = grads[0] + gscale = max(1.0, float(np.max(np.abs(g0)))) + + # Linear objective ⇔ the gradient is the same at every probe. + gvar = max(float(np.max(np.abs(gi - g0))) for gi in grads[1:]) + if gvar <= 1e-7 * gscale: + c = g0 + d = float(fun(anchor)) - float(c @ anchor) + return None, c, d + + # Otherwise fit a quadratic. With finite differences, require the Hessian + # to be constant across two probes (a true quadratic's is); with an exact + # user ``hess`` one evaluation already pins it. + P = _hessian(grad, anchor, hess) + if hess is None: + P2 = _hessian(grad, probes[1], hess) + pscale = max(1.0, float(np.max(np.abs(P)))) + if float(np.max(np.abs(P - P2))) > 1e-4 * pscale: + raise _NotConvex("Hessian is not constant — objective is not quadratic") + # grad(x) = P x + c ⇒ c = grad(anchor) − P·anchor. + c = g0 - P @ anchor + d = float(fun(anchor)) - (0.5 * float(anchor @ P @ anchor) + float(c @ anchor)) + return P, c, d + + +def _linear_constraints(g_combined, jac_combined, cl, cu, probes, m): + """Recover ``A x = b`` / ``G x ≤ h`` from the coalesced constraint + callable, or raise :class:`_NotConvex` if any constraint is nonlinear. + + ``cl``/``cu`` carry the scipy-style two-sided bounds that + ``_wrap_constraints`` produced (``[0, 0]`` for an equality, ``[0, ∞]`` + for ``g(x) ≥ 0``). The constraint value model is ``g(x) = J x + g0``. + """ + if m == 0: + return None, None, None, None + + anchor = probes[0] + J0 = np.atleast_2d(np.asarray(jac_combined(anchor), dtype=np.float64)) + g_anchor = np.asarray(g_combined(anchor), dtype=np.float64).ravel() + g0 = g_anchor - J0 @ anchor # the affine offset + + jscale = max(1.0, float(np.max(np.abs(J0)))) + for p in probes[1:]: + gp = np.asarray(g_combined(p), dtype=np.float64).ravel() + model = J0 @ p + g0 + if float(np.max(np.abs(gp - model))) > 1e-6 * (1.0 + float(np.max(np.abs(gp)))): + raise _NotConvex("a constraint is nonlinear") + Jp = np.atleast_2d(np.asarray(jac_combined(p), dtype=np.float64)) + if float(np.max(np.abs(Jp - J0))) > 1e-6 * jscale: + raise _NotConvex("a constraint Jacobian is not constant") + + A_rows, b_vals, G_rows, h_vals = [], [], [], [] + for i in range(m): + Ji, off = J0[i], g0[i] + lo, hi = cl[i], cu[i] + if np.isfinite(lo) and np.isfinite(hi) and lo == hi: + # Equality g = lo ⇒ J x = lo − off. + A_rows.append(Ji) + b_vals.append(lo - off) + continue + if np.isfinite(hi): + # g ≤ hi ⇒ J x ≤ hi − off. + G_rows.append(Ji) + h_vals.append(hi - off) + if np.isfinite(lo): + # g ≥ lo ⇒ −J x ≤ off − lo. + G_rows.append(-Ji) + h_vals.append(off - lo) + + A = np.array(A_rows, dtype=np.float64) if A_rows else None + b = np.array(b_vals, dtype=np.float64) if b_vals else None + G = np.array(G_rows, dtype=np.float64) if G_rows else None + h = np.array(h_vals, dtype=np.float64) if h_vals else None + return A, b, G, h + + +def _clean_bounds(lb, ub): + """Drop an all-infinite bound vector to ``None`` (no box).""" + if lb is not None and np.all(np.isinf(lb)): + lb = None + if ub is not None and np.all(np.isinf(ub)): + ub = None + return lb, ub + + +def classify_and_extract( + *, + fun, + jac, + hess, + lb, + ub, + m, + g_combined, + jac_combined, + cl, + cu, + x0, + rtol: float = 1e-5, + seed: int = 0, +) -> Optional[QpExtract]: + """Detect a convex LP/QP behind the callable problem and extract its data. + + Returns a :class:`QpExtract` if the objective is linear or convex-quadratic + *and* every constraint is linear (validated at held-out probe points), + otherwise ``None`` (route to the NLP solver). Any evaluation error during + probing — a domain error, a NaN, a shape surprise — also yields ``None``: + we never let a probe failure turn into a wrong solver choice. + """ + rng = np.random.default_rng(seed) + grad = _grad_fn(fun, jac) + try: + probes = _probe_points(x0, lb, ub, rng) + P, c, d = _objective_model(fun, grad, hess, probes) + + # Validate the fitted objective model at the held-out probes. + for p in probes[1:]: + quad = 0.5 * float(p @ P @ p) if P is not None else 0.0 + model = quad + float(c @ p) + d + fv = float(fun(p)) + if abs(model - fv) > rtol * (1.0 + abs(fv)): + raise _NotConvex("objective does not match its linear/quadratic model") + + # Convexity: a quadratic must have a positive-semidefinite Hessian. + if P is not None: + eig = np.linalg.eigvalsh(P) + if float(eig.min()) < -1e-8 * max(1.0, abs(float(eig.max()))): + raise _NotConvex("indefinite Hessian — nonconvex QP") + + A, b, G, h = _linear_constraints(g_combined, jac_combined, cl, cu, probes, m) + except _NotConvex: + return None + except Exception: + # Probing blew up (domain error, NaN, bad shape) — stay on the NLP path. + return None + + lb_c, ub_c = _clean_bounds(lb, ub) + return QpExtract( + kind="lp" if P is None else "convex_qp", + P=P, + c=np.asarray(c, dtype=np.float64).ravel(), + obj_const=float(d), + A=A, + b=b, + G=G, + h=h, + lb=lb_c, + ub=ub_c, + ) diff --git a/python/pounce/jax/__init__.py b/python/pounce/jax/__init__.py index 5dfb8bc5..4edece24 100644 --- a/python/pounce/jax/__init__.py +++ b/python/pounce/jax/__init__.py @@ -46,6 +46,7 @@ from ._diff import solve, solve_with_warm, vmap_solve, vmap_solve_parallel from ._problem import AnchorState, JaxProblem from ._path import PathFollower, PathTrace, inverse_map_rhs +from ._qp import QpLayer, solve_qp, solve_qp_batch, solve_socp __all__ = [ "from_jax", @@ -58,4 +59,8 @@ "PathFollower", "PathTrace", "inverse_map_rhs", + "solve_qp", + "solve_qp_batch", + "solve_socp", + "QpLayer", ] diff --git a/python/pounce/jax/_qp.py b/python/pounce/jax/_qp.py new file mode 100644 index 00000000..3d581a5b --- /dev/null +++ b/python/pounce/jax/_qp.py @@ -0,0 +1,761 @@ +"""Differentiable convex-QP layer (OptNet-style implicit differentiation). + +Solves, and differentiates through, the convex QP + +.. code-block:: text + + minimize ½ xᵀP x + cᵀx + subject to G x ≤ h + A x = b + +The forward solve calls the ``pounce-convex`` interior-point solver +through a host callback. The backward pass uses the implicit-function +theorem on the KKT system at the optimum (Amos & Kolter, *OptNet*, 2017): +the same KKT matrix that defines the solution also yields its +sensitivities, so a single linear solve gives the cotangents. + +Differentiable parameters. Gradients are provided w.r.t. **all** the +parameters that enter the QP linearly through the optimum: + +* the linear / right-hand-side vectors ``c``, ``b``, ``h``; and +* the matrices ``P``, ``G``, ``A`` (full OptNet matrix derivatives). + +``P`` is differentiated as a **symmetric** matrix — the solver reads its +lower triangle and treats it as symmetric, so ``∇P`` is the symmetrized +gradient ``½(d_x xᵀ + x d_xᵀ)``; perturb ``P`` symmetrically when checking +it against finite differences. + +Bounds ``lb ≤ x ≤ ub`` are supported in the *forward* solve by folding +them into ``G``/``h`` before differentiation, so the IFT sees a single +inequality block. The folded bound rows are constants, so they carry no +gradient back to ``lb``/``ub`` (differentiate bound *levels* by passing +them through ``G``/``h`` explicitly instead). + +Batching. :func:`solve_qp` is usable under ``jax.vmap`` (each instance is +an independent, sequential host solve). For a *parallel* batch over many +instances that share matrix structure, use :func:`solve_qp_batch`, which +routes the forward solves to the rayon-parallel ``solve_qp_batch`` binding +and differentiates each instance independently. + +Warm starting. Pass ``warm_start=`` a previous primal ``x`` to seed the +interior-point iteration on a nearby problem. The core applies a +Mehrotra-style recentering (it keeps the warm primal but pushes the +slacks/multipliers back into the interior with a scale-aware floor, since +a converged point lies on the complementarity boundary — the worst IPM +restart). The warm start is **not** differentiated and never changes the +solution or its gradients; it only reduces the iteration count. For +repeated solves on a *fixed structure*, the host API +:class:`pounce.qp.QpFactorization` additionally reuses the symbolic +factorization (AMD analysis / KKT pattern). +""" + +from __future__ import annotations + +from typing import Optional + +import jax +import jax.numpy as jnp +import numpy as np +from jax.scipy.linalg import block_diag + +from .. import _pounce + +__all__ = ["solve_qp", "solve_qp_batch", "solve_socp", "QpLayer"] + +# Active-set tolerance for the backward pass: an inequality counts as +# active when its multiplier is above this (complementarity slackness). +_ACTIVE_TOL = 1e-6 + + +def _expand_bounds(G, h, lb, ub, n): + """Fold finite variable bounds into G/h as extra rows. + + Returns ``(G_full, h_full)`` as dense jnp arrays. ``x_i ≤ ub_i`` and + ``−x_i ≤ −lb_i``.""" + rows = [] + rhs = [] + if G is not None and G.shape[0] > 0: + rows.append(G) + rhs.append(h) + if ub is not None: + for i in range(n): + if np.isfinite(float(ub[i])): + e = jnp.zeros(n).at[i].set(1.0) + rows.append(e[None, :]) + rhs.append(jnp.asarray(ub[i]).reshape(1)) + if lb is not None: + for i in range(n): + if np.isfinite(float(lb[i])): + e = jnp.zeros(n).at[i].set(-1.0) + rows.append(e[None, :]) + rhs.append((-jnp.asarray(lb[i])).reshape(1)) + if not rows: + return jnp.zeros((0, n)), jnp.zeros((0,)) + return jnp.concatenate(rows, axis=0), jnp.concatenate(rhs, axis=0) + + +def _to_coo_lower(M): + """COO ``(rows, cols, vals)`` of the lower triangle of dense ``M``.""" + r, cc = np.nonzero(M) + keep = r >= cc + return r[keep].tolist(), cc[keep].tolist(), M[r[keep], cc[keep]].tolist() + + +def _to_coo(M): + """COO ``(rows, cols, vals)`` of dense ``M``.""" + r, cc = np.nonzero(M) + return r.tolist(), cc.tolist(), M[r, cc].tolist() + + +def _build_problem(P, c, G, h, A, b): + """Assemble a ``_pounce.QpProblem`` from dense numpy arrays.""" + n = c.shape[0] + pr, pc, pv = _to_coo_lower(np.asarray(P)) + gr, gc, gv = _to_coo(np.asarray(G)) + ar, ac, av = _to_coo(np.asarray(A)) + return _pounce.QpProblem( + n=n, + c=np.asarray(c).tolist(), + p_rows=pr, + p_cols=pc, + p_vals=pv, + a_rows=ar, + a_cols=ac, + a_vals=av, + b=np.asarray(b).tolist(), + g_rows=gr, + g_cols=gc, + g_vals=gv, + h=np.asarray(h).tolist(), + ) + + +_SUCCESS_STATUS = "optimal" + + +def _check_status(status, where): + """Raise unless the convex solver reached an optimal solution. + + The differentiable layer reads the primal/dual iterate and solves a + KKT system for the gradient. If the forward solve did not converge + (``primal_infeasible`` / ``dual_infeasible`` / ``iteration_limit`` / + ``numerical_failure``), that iterate is not a KKT point and the + implicit-function gradient is meaningless — so fail loudly rather than + return silent NaNs/garbage into a downstream optimizer. Use the host + ``pounce.qp`` API (which surfaces ``QpResult.status``) to inspect the + failure.""" + if status != _SUCCESS_STATUS: + raise RuntimeError( + f"{where}: convex solver returned status {status!r}, not " + f"{_SUCCESS_STATUS!r}; the differentiable layer cannot produce a " + f"meaningful gradient for a non-optimal solve." + ) + + +def _split_duals(d, m_g, m_a): + """Extract (lam, nu) from a solver result dict, padding empty blocks.""" + lam = ( + np.asarray(d["z"], dtype=np.float64) + if m_g + else np.zeros((0,), dtype=np.float64) + ) + nu = ( + np.asarray(d["y"], dtype=np.float64) + if m_a + else np.zeros((0,), dtype=np.float64) + ) + return lam, nu + + +def _forward_solve(P, c, G, h, A, b, tol, max_iter, warm_x=None): + """Host-side forward solve via pounce-convex. Returns (x, lam, nu). + + ``lam`` are the inequality (``G``) multipliers, ``nu`` the equality + (``A``) multipliers. ``warm_x`` (if its length is ``n``) seeds the + iteration with that primal; it only affects the iteration count.""" + m_g = G.shape[0] + m_a = A.shape[0] + prob = _build_problem(P, c, G, h, A, b) + warm = None + if warm_x is not None and np.asarray(warm_x).size == c.shape[0]: + warm = {"x": np.asarray(warm_x, dtype=np.float64).tolist()} + d = _pounce.solve_qp(prob, tol=tol, max_iter=max_iter, warm_start=warm) + _check_status(d["status"], "QpLayer forward solve") + x = np.asarray(d["x"], dtype=np.float64) + lam, nu = _split_duals(d, m_g, m_a) + return x, lam, nu + + +def _forward_solve_batch(P, cs, G, hs, A, bs, tol, max_iter, warm_xs=None): + """Parallel host-side batch solve. Shared ``P``/``G``/``A``; per-row + ``cs``/``hs``/``bs``. Returns stacked (xs, lams, nus). ``warm_xs`` (if + shaped ``(B, n)``) seeds each instance's primal.""" + m_g = G.shape[0] + m_a = A.shape[0] + b_sz = cs.shape[0] + n = cs.shape[1] + probs = [_build_problem(P, cs[i], G, hs[i], A, bs[i]) for i in range(b_sz)] + warms = None + if warm_xs is not None and np.asarray(warm_xs).shape == (b_sz, n): + wx = np.asarray(warm_xs, dtype=np.float64) + warms = [{"x": wx[i].tolist()} for i in range(b_sz)] + dicts = _pounce.solve_qp_batch(probs, tol=tol, max_iter=max_iter, warm_starts=warms) + for i, d in enumerate(dicts): + _check_status(d["status"], f"QpLayer batch forward solve (row {i})") + xs = np.stack([np.asarray(d["x"], dtype=np.float64) for d in dicts]) + if m_g: + lams = np.stack([np.asarray(d["z"], dtype=np.float64) for d in dicts]) + else: + lams = np.zeros((b_sz, 0), dtype=np.float64) + if m_a: + nus = np.stack([np.asarray(d["y"], dtype=np.float64) for d in dicts]) + else: + nus = np.zeros((b_sz, 0), dtype=np.float64) + return xs, lams, nus + + +def _kkt_backward(P, G, A, h, x, lam, nu, gx): + """One OptNet implicit-diff backward (Amos & Kolter 2017, §3). + + At the optimum ``(x, λ, ν)`` of ``min ½xᵀPx+cᵀx s.t. Gx≤h, Ax=b`` the + KKT differential system is + + .. code-block:: text + + [ P Gᵀ Aᵀ ] [d_x] [ g_x ] + [ D(λ)G D(Gx−h) 0 ] [d_λ] = − [ 0 ] + [ A 0 0 ] [d_ν] [ 0 ] + + with ``D(·) = diag(·)``. Solving for ``(d_x, d_λ, d_ν)``, the loss + gradients are + + .. code-block:: text + + ∇_c = d_x ∇_P = ½(d_x xᵀ + x d_xᵀ) + ∇_b = −d_ν ∇_A = d_ν xᵀ + ν d_xᵀ + ∇_h = −d_λ ∇_G = d_λ xᵀ + λ d_xᵀ + + (The matrix forms follow from the standard OptNet result; in this + scaling ``d_λ`` already absorbs ``D(λ)``, so e.g. ``∇_h = −d_λ`` rather + than ``−D(λ)d_λ``. All six are checked against finite differences.) + """ + n = x.shape[0] + m_g = G.shape[0] + m_a = A.shape[0] + + slack = G @ x - h # ≤ 0 at feasibility; 0 on active rows + dlam_scale = jnp.diag(lam) + zero_ga = jnp.zeros((m_g, m_a)) + zero_ag = jnp.zeros((m_a, m_g)) + zero_aa = jnp.zeros((m_a, m_a)) + + top = jnp.concatenate([P, G.T, A.T], axis=1) + mid = jnp.concatenate([dlam_scale @ G, jnp.diag(slack), zero_ga], axis=1) + bot = jnp.concatenate([A, zero_ag, zero_aa], axis=1) + kkt = jnp.concatenate([top, mid, bot], axis=0) + + rhs = -jnp.concatenate([gx, jnp.zeros(m_g), jnp.zeros(m_a)]) + d = jnp.linalg.solve(kkt, rhs) + d_x = d[:n] + d_lam = d[n : n + m_g] + d_nu = d[n + m_g :] + + grad_c = d_x + grad_h = -d_lam + grad_b = -d_nu + # Matrix gradients (full OptNet). ∇_P symmetrized (P is symmetric). + grad_P = 0.5 * (jnp.outer(d_x, x) + jnp.outer(x, d_x)) + grad_G = jnp.outer(d_lam, x) + jnp.outer(lam, d_x) + grad_A = jnp.outer(d_nu, x) + jnp.outer(nu, d_x) + return grad_P, grad_c, grad_G, grad_h, grad_A, grad_b + + +def _make_qp_vjp(n, m_g, m_a, tol, max_iter): + # `warm_x` is a primal input so it threads cleanly through jit/grad, + # but it never affects the solution (only the iteration count), so its + # cotangent is zero. + @jax.custom_vjp + def qp(P, c, G, h, A, b, warm_x): + x, _, _ = _pure_forward(P, c, G, h, A, b, warm_x, n, m_g, m_a, tol, max_iter) + return x + + def fwd(P, c, G, h, A, b, warm_x): + x, lam, nu = _pure_forward( + P, c, G, h, A, b, warm_x, n, m_g, m_a, tol, max_iter + ) + return x, (P, G, A, h, x, lam, nu, warm_x) + + def bwd(res, gx): + P, G, A, h, x, lam, nu, warm_x = res + gP, gc, gG, gh, gA, gb = _kkt_backward(P, G, A, h, x, lam, nu, gx) + return (gP, gc, gG, gh, gA, gb, jnp.zeros_like(warm_x)) + + qp.defvjp(fwd, bwd) + return qp + + +def _make_qp_batch_vjp(n, m_g, m_a, tol, max_iter): + """custom_vjp for a parallel batch. Differentiable args are the shared + ``P``/``G``/``A`` and the per-row ``cs``/``hs``/``bs`` (all leading + axis ``B``). Matrix gradients sum over the batch; RHS gradients stay + per-row.""" + + @jax.custom_vjp + def qp(P, cs, G, hs, A, bs, warm_xs): + xs, _, _ = _pure_forward_batch( + P, cs, G, hs, A, bs, warm_xs, n, m_g, m_a, tol, max_iter + ) + return xs + + def fwd(P, cs, G, hs, A, bs, warm_xs): + xs, lams, nus = _pure_forward_batch( + P, cs, G, hs, A, bs, warm_xs, n, m_g, m_a, tol, max_iter + ) + return xs, (P, G, A, hs, xs, lams, nus, warm_xs) + + def bwd(res, gxs): + P, G, A, hs, xs, lams, nus, warm_xs = res + per = jax.vmap( + lambda h, x, lam, nu, gx: _kkt_backward(P, G, A, h, x, lam, nu, gx) + )(hs, xs, lams, nus, gxs) + gP, gc, gG, gh, gA, gb = per + # Shared matrices: sum cotangents over the batch axis. Warm start is + # not differentiated (start-independent solution). + return ( + jnp.sum(gP, axis=0), + gc, + jnp.sum(gG, axis=0), + gh, + jnp.sum(gA, axis=0), + gb, + jnp.zeros_like(warm_xs), + ) + + qp.defvjp(fwd, bwd) + return qp + + +def _pure_forward(P, c, G, h, A, b, warm_x, n, m_g, m_a, tol, max_iter): + """custom_vjp-friendly forward via pure_callback. Returns (x, lam, nu). + + ``warm_x`` is an extra (non-differentiated) operand carrying an optional + warm-start primal; an empty array means cold start.""" + shapes = ( + jax.ShapeDtypeStruct((n,), jnp.float64), + jax.ShapeDtypeStruct((m_g,), jnp.float64), + jax.ShapeDtypeStruct((m_a,), jnp.float64), + ) + + def host(P_h, c_h, G_h, h_h, A_h, b_h, w_h): + return _forward_solve( + np.asarray(P_h), + np.asarray(c_h), + np.asarray(G_h), + np.asarray(h_h), + np.asarray(A_h), + np.asarray(b_h), + tol, + max_iter, + warm_x=np.asarray(w_h), + ) + + # `vmap_method="sequential"` lets the layer be used under jax.vmap + # (each instance is an independent host solve). Older JAX releases + # don't accept the kwarg, so fall back gracefully. + try: + return jax.pure_callback( + host, shapes, P, c, G, h, A, b, warm_x, vmap_method="sequential" + ) + except TypeError: + return jax.pure_callback(host, shapes, P, c, G, h, A, b, warm_x) + + +def _pure_forward_batch(P, cs, G, hs, A, bs, warm_xs, n, m_g, m_a, tol, max_iter): + """Parallel-batch forward via a single host callback. Returns stacked + (xs, lams, nus). ``warm_xs`` is a non-differentiated warm-start operand + (empty trailing dim ⇒ cold).""" + b_sz = cs.shape[0] + shapes = ( + jax.ShapeDtypeStruct((b_sz, n), jnp.float64), + jax.ShapeDtypeStruct((b_sz, m_g), jnp.float64), + jax.ShapeDtypeStruct((b_sz, m_a), jnp.float64), + ) + + def host(P_h, cs_h, G_h, hs_h, A_h, bs_h, w_h): + return _forward_solve_batch( + np.asarray(P_h), + np.asarray(cs_h), + np.asarray(G_h), + np.asarray(hs_h), + np.asarray(A_h), + np.asarray(bs_h), + tol, + max_iter, + warm_xs=np.asarray(w_h), + ) + + return jax.pure_callback(host, shapes, P, cs, G, hs, A, bs, warm_xs) + + +def _warm_primal(warm_start, n): + """Extract a warm-start primal ``x`` (length ``n``) from a previous + solution, returning an empty array (cold start) when absent.""" + if warm_start is None: + return jnp.zeros((0,)) + wx = getattr(warm_start, "x", None) + if wx is None: + wx = warm_start.get("x") if hasattr(warm_start, "get") else warm_start + if wx is None: + return jnp.zeros((0,)) + wx = jnp.asarray(wx, dtype=jnp.float64).ravel() + return wx if wx.shape[0] == n else jnp.zeros((0,)) + + +def solve_qp( + *, + P, + c, + G=None, + h=None, + A=None, + b=None, + lb=None, + ub=None, + tol: Optional[float] = None, + max_iter: Optional[int] = None, + warm_start=None, +): + """Differentiable convex-QP solve ``x*(P, c, G, h, A, b)``. + + Solves ``min ½xᵀPx+cᵀx s.t. Gx≤h, Ax=b, lb≤x≤ub`` and is + differentiable w.r.t. ``P``, ``c``, ``G``, ``h``, ``A``, ``b`` via the + OptNet implicit-function rule (``∇P`` is the symmetric gradient). + + All array args are dense jnp/np arrays. Bounds are folded into the + inequality block as constant rows (no gradient flows to ``lb``/``ub``; + pass differentiable bound levels through ``G``/``h`` instead). + + ``warm_start`` (optional) supplies a previous primal ``x`` (an array, or + anything with an ``x`` attribute/key — e.g. a prior result) to seed the + interior-point iteration on a nearby problem. It is **not** + differentiated and does not change the solution or its gradients; it + only reduces the iteration count. This is the natural fit here, since + the layer returns the primal — feed the previous output back in. + """ + P = jnp.asarray(P, dtype=jnp.float64) + c = jnp.asarray(c, dtype=jnp.float64) + n = c.shape[0] + G0 = jnp.zeros((0, n)) if G is None else jnp.asarray(G, dtype=jnp.float64) + h0 = jnp.zeros((0,)) if h is None else jnp.asarray(h, dtype=jnp.float64) + A0 = jnp.zeros((0, n)) if A is None else jnp.asarray(A, dtype=jnp.float64) + b0 = jnp.zeros((0,)) if b is None else jnp.asarray(b, dtype=jnp.float64) + + # Fold finite bounds into G/h (constants w.r.t. differentiation here). + G_full, h_full = _expand_bounds(G0, h0, lb, ub, n) + warm_x = _warm_primal(warm_start, n) + + fn = _make_qp_vjp(n, G_full.shape[0], A0.shape[0], tol, max_iter) + return fn(P, c, G_full, h_full, A0, b0, warm_x) + + +def _warm_primal_batch(warm_start, b_sz, n): + """Extract a ``(B, n)`` warm-start primal from a batch result + (a ``(B, n)`` array, or a sequence of per-row results/vectors), + returning an empty ``(B, 0)`` array (cold) when absent or mismatched.""" + if warm_start is None: + return jnp.zeros((b_sz, 0)) + arr = warm_start + if isinstance(warm_start, (list, tuple)): + rows = [] + for w in warm_start: + wx = getattr(w, "x", None) + if wx is None: + wx = w.get("x") if hasattr(w, "get") else w + rows.append(jnp.asarray(wx, dtype=jnp.float64).ravel()) + arr = jnp.stack(rows) if rows else jnp.zeros((b_sz, 0)) + arr = jnp.asarray(arr, dtype=jnp.float64) + return arr if arr.shape == (b_sz, n) else jnp.zeros((b_sz, 0)) + + +def solve_qp_batch( + *, + P, + c, + G=None, + h=None, + A=None, + b=None, + lb=None, + ub=None, + tol: Optional[float] = None, + max_iter: Optional[int] = None, + warm_start=None, +): + """Differentiable **parallel** batch of convex QPs sharing structure. + + ``c`` is required and batched with shape ``(B, n)``. The matrices + ``P``, ``G``, ``A`` are shared across the batch (2-D). The RHS vectors + ``h`` and ``b`` may be batched (``(B, ·)``) or shared (``(·,)`` / + ``None``, broadcast over the batch). Returns ``xs`` of shape + ``(B, n)``. + + Forward solves run on the rayon-parallel ``solve_qp_batch`` path + (outer-parallel across instances, serial within). The backward + differentiates each instance independently: gradients to the shared + ``P``/``G``/``A`` sum over the batch; gradients to ``c``/``h``/``b`` + stay per-row. ``∇P`` is the symmetric gradient. + + ``warm_start`` (optional) seeds each instance's iteration: a ``(B, n)`` + array of primals (e.g. a previous batch's returned ``xs``) or a + sequence of per-row results/vectors. It is not differentiated and does + not change the solution or its gradients — only the iteration count. + """ + P = jnp.asarray(P, dtype=jnp.float64) + cs = jnp.asarray(c, dtype=jnp.float64) + if cs.ndim != 2: + raise ValueError(f"solve_qp_batch: `c` must be 2-D (B, n), got {cs.shape}") + b_sz, n = cs.shape + + G0 = jnp.zeros((0, n)) if G is None else jnp.asarray(G, dtype=jnp.float64) + A0 = jnp.zeros((0, n)) if A is None else jnp.asarray(A, dtype=jnp.float64) + + # Fold shared finite bounds into the (shared) inequality block. The + # per-instance h block only spans the user G rows; the bound rows are + # constant and broadcast across the batch. + G_full, h_bounds = _expand_bounds(G0, jnp.zeros((G0.shape[0],)), lb, ub, n) + m_g = G_full.shape[0] + n_user_rows = G0.shape[0] + bound_rows = m_g - n_user_rows + + if h is None: + hs_user = jnp.zeros((b_sz, n_user_rows)) + else: + h_arr = jnp.asarray(h, dtype=jnp.float64) + hs_user = ( + jnp.broadcast_to(h_arr, (b_sz, n_user_rows)) + if h_arr.ndim == 1 + else h_arr + ) + hs_bounds = jnp.broadcast_to(h_bounds[n_user_rows:], (b_sz, bound_rows)) + hs = jnp.concatenate([hs_user, hs_bounds], axis=1) + + m_a = A0.shape[0] + if b is None: + bs = jnp.zeros((b_sz, m_a)) + else: + b_arr = jnp.asarray(b, dtype=jnp.float64) + bs = jnp.broadcast_to(b_arr, (b_sz, m_a)) if b_arr.ndim == 1 else b_arr + + warm_xs = _warm_primal_batch(warm_start, b_sz, n) + fn = _make_qp_batch_vjp(n, m_g, m_a, tol, max_iter) + return fn(P, cs, G_full, hs, A0, bs, warm_xs) + + +class QpLayer: + """A reusable differentiable QP layer with fixed structure. + + Captures ``P, G, A`` (and bounds) once; calling the layer with + ``c``/``b``/``h`` solves and is differentiable w.r.t. those (and, via + :func:`solve_qp`, w.r.t. the captured matrices too). Suitable for use + inside a larger JAX model (``jax.grad`` / ``jacrev`` / ``vmap``). + + Pass ``warm_start=`` (a previous primal ``x``) to ``__call__`` to seed + the iteration on a nearby problem; for fixed-structure repeated solves, + :class:`pounce.qp.QpFactorization` (host API) additionally reuses the + symbolic factorization. + """ + + def __init__(self, P, G=None, A=None, lb=None, ub=None, *, tol=None, max_iter=None): + self._P = P + self._G = G + self._A = A + self._lb = lb + self._ub = ub + self._tol = tol + self._max_iter = max_iter + + def __call__(self, c, *, b=None, h=None, warm_start=None): + return solve_qp( + P=self._P, + c=c, + G=self._G, + h=h, + A=self._A, + b=b, + lb=self._lb, + ub=self._ub, + tol=self._tol, + max_iter=self._max_iter, + warm_start=warm_start, + ) + + def batch(self, cs, *, b=None, h=None, warm_start=None): + """Solve a parallel batch (rayon) sharing this layer's structure. + + ``cs`` has shape ``(B, n)``; ``h``/``b`` may be batched or shared. + Pass ``warm_start`` (a ``(B, n)`` array of primals) to seed each + instance. Differentiable; see :func:`solve_qp_batch`. + """ + return solve_qp_batch( + P=self._P, + c=cs, + G=self._G, + h=h, + A=self._A, + b=b, + lb=self._lb, + ub=self._ub, + tol=self._tol, + max_iter=self._max_iter, + warm_start=warm_start, + ) + + +# --- Differentiable SOCP (cone-aware OptNet implicit differentiation) ---- +# +# Generalizes the QP backward to a product of nonnegative-orthant and +# second-order cones. The only change in the KKT differential is the +# complementarity row: the orthant's diagonal scalings `diag(z)`, +# `diag(slack)` become the cone's **arrow operators** `Arw(z)`, `Arw(slack)` +# (block-diagonal; an orthant block stays diagonal). The forward solve calls +# the cone-capable `_pounce.solve_socp`. + + +def _normalize_socp_cones(cones): + """Coerce cone specs into ``((is_soc, dim), …)`` (static) and the + ``[(kind, dim), …]`` form the binding wants. Ints are second-order.""" + static = [] + specs = [] + for spec in cones: + if isinstance(spec, (tuple, list)) and len(spec) == 2: + kind, d = str(spec[0]).lower(), int(spec[1]) + elif isinstance(spec, int): + kind, d = "soc", int(spec) + else: + raise ValueError(f"bad cone spec {spec!r}") + is_soc = kind in ("soc", "q", "secondorder") + static.append((is_soc, d)) + specs.append(("soc" if is_soc else "nonneg", d)) + return tuple(static), specs + + +def _arrow(v): + """Arrow matrix ``Arw(v) = [[v₀, v₁ᵀ], [v₁, v₀ I]]`` of a cone block.""" + m = v.shape[0] + if m == 1: + return v.reshape(1, 1) + v0, v1 = v[0], v[1:] + top = jnp.concatenate([v0.reshape(1, 1), v1.reshape(1, -1)], axis=1) + bot = jnp.concatenate([v1.reshape(-1, 1), v0 * jnp.eye(m - 1)], axis=1) + return jnp.concatenate([top, bot], axis=0) + + +def _scaling_blockdiag(v, cones): + """Block-diagonal cone scaling: ``Arw(v_block)`` for a second-order + block, ``diag(v_block)`` for an orthant block.""" + blocks = [] + off = 0 + for is_soc, d in cones: + vb = v[off : off + d] + blocks.append(_arrow(vb) if is_soc else jnp.diag(vb)) + off += d + return block_diag(*blocks) if blocks else jnp.zeros((0, 0)) + + +def _socp_backward(P, G, A, h, x, lam, nu, gx, cones): + """Cone-aware OptNet backward (cf. :func:`_kkt_backward`). The + complementarity row uses the arrow operators of the cones.""" + n = x.shape[0] + m_g = G.shape[0] + m_a = A.shape[0] + slack = G @ x - h + arw_z = _scaling_blockdiag(lam, cones) + arw_slack = _scaling_blockdiag(slack, cones) + zero_ga = jnp.zeros((m_g, m_a)) + zero_ag = jnp.zeros((m_a, m_g)) + zero_aa = jnp.zeros((m_a, m_a)) + top = jnp.concatenate([P, G.T, A.T], axis=1) + mid = jnp.concatenate([arw_z @ G, arw_slack, zero_ga], axis=1) + bot = jnp.concatenate([A, zero_ag, zero_aa], axis=1) + kkt = jnp.concatenate([top, mid, bot], axis=0) + rhs = -jnp.concatenate([gx, jnp.zeros(m_g), jnp.zeros(m_a)]) + d = jnp.linalg.solve(kkt, rhs) + d_x = d[:n] + d_lam = d[n : n + m_g] + d_nu = d[n + m_g :] + grad_c = d_x + grad_h = -d_lam + grad_b = -d_nu + grad_P = 0.5 * (jnp.outer(d_x, x) + jnp.outer(x, d_x)) + grad_G = jnp.outer(d_lam, x) + jnp.outer(lam, d_x) + grad_A = jnp.outer(d_nu, x) + jnp.outer(nu, d_x) + return grad_P, grad_c, grad_G, grad_h, grad_A, grad_b + + +def _forward_solve_socp(P, c, G, h, A, b, specs, tol, max_iter): + """Host-side SOCP forward via pounce-convex. Returns (x, z, y).""" + m_g = G.shape[0] + m_a = A.shape[0] + prob = _build_problem(P, c, G, h, A, b) + d = _pounce.solve_socp(prob, specs, tol=tol, max_iter=max_iter) + _check_status(d["status"], "SOCP differentiable forward solve") + x = np.asarray(d["x"], dtype=np.float64) + lam, nu = _split_duals(d, m_g, m_a) + return x, lam, nu + + +def _make_socp_vjp(n, m_g, m_a, cones, specs, tol, max_iter): + shapes = ( + jax.ShapeDtypeStruct((n,), jnp.float64), + jax.ShapeDtypeStruct((m_g,), jnp.float64), + jax.ShapeDtypeStruct((m_a,), jnp.float64), + ) + + def forward(P, c, G, h, A, b): + def host(P_h, c_h, G_h, h_h, A_h, b_h): + return _forward_solve_socp( + np.asarray(P_h), np.asarray(c_h), np.asarray(G_h), + np.asarray(h_h), np.asarray(A_h), np.asarray(b_h), + specs, tol, max_iter, + ) + + return jax.pure_callback(host, shapes, P, c, G, h, A, b) + + @jax.custom_vjp + def socp(P, c, G, h, A, b): + x, _, _ = forward(P, c, G, h, A, b) + return x + + def fwd(P, c, G, h, A, b): + x, lam, nu = forward(P, c, G, h, A, b) + return x, (P, G, A, h, x, lam, nu) + + def bwd(res, gx): + P, G, A, h, x, lam, nu = res + return _socp_backward(P, G, A, h, x, lam, nu, gx, cones) + + socp.defvjp(fwd, bwd) + return socp + + +def solve_socp(*, P, c, G, h, A=None, b=None, cones, tol=None, max_iter=None): + """Differentiable convex-SOCP solve ``x*(P, c, G, h, A, b)`` over a + product of cones. + + Solves ``min ½xᵀPx+cᵀx s.t. Gx ⪯_K h, Ax=b`` where the inequality block + is partitioned by ``cones`` — a sequence of ``(kind, dim)`` specs + (``"nonneg"``/``"soc"``; an int means a second-order cone). Each slack + ``s = h − Gx`` block must lie in its cone. Differentiable w.r.t. + ``P, c, G, h, A, b`` via cone-aware OptNet implicit differentiation + (``diag`` → the cones' arrow operators). + """ + P = jnp.asarray(P, dtype=jnp.float64) + c = jnp.asarray(c, dtype=jnp.float64) + n = c.shape[0] + G = jnp.asarray(G, dtype=jnp.float64) + h = jnp.asarray(h, dtype=jnp.float64) + A0 = jnp.zeros((0, n)) if A is None else jnp.asarray(A, dtype=jnp.float64) + b0 = jnp.zeros((0,)) if b is None else jnp.asarray(b, dtype=jnp.float64) + static, specs = _normalize_socp_cones(cones) + fn = _make_socp_vjp(n, G.shape[0], A0.shape[0], static, specs, tol, max_iter) + return fn(P, c, G, h, A0, b0) diff --git a/python/pounce/qp.py b/python/pounce/qp.py new file mode 100644 index 00000000..6506fbba --- /dev/null +++ b/python/pounce/qp.py @@ -0,0 +1,630 @@ +"""Convex LP/QP solver — Pythonic wrapper over the ``pounce-convex`` IPM. + +Solves the standard-form convex quadratic program + +.. code-block:: text + + minimize ½ xᵀP x + cᵀx + subject to A x = b + G x ≤ h + lb ≤ x ≤ ub + +with a specialized interior-point method (Mehrotra predictor-corrector), +presolve, and verified infeasibility / unboundedness detection. ``P = 0`` +gives an LP. + +This module is the friendly surface over the compiled ``_pounce`` +bindings: it accepts dense vectors and (optionally) scipy-sparse or dense +matrices, and returns a small :class:`QpResult`. For differentiable QP +layers (JAX), see :mod:`pounce.jax` (``solve_qp`` / ``QpLayer``). + +Example +------- +>>> import numpy as np +>>> from pounce.qp import solve_qp +>>> # min ½‖x‖²·2 − 3x0 − 4x1 s.t. 0 ≤ x ≤ 1 +>>> r = solve_qp(P=np.diag([2.0, 2.0]), c=[-3.0, -4.0], +... lb=[0, 0], ub=[1, 1]) +>>> r.status, r.x +('optimal', array([1., 1.])) +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Optional, Sequence + +import numpy as np + +from . import _pounce + +__all__ = [ + "QpResult", + "QpFactorization", + "QpSensitivity", + "ReducedHessian", + "solve_qp", + "solve_socp", + "solve_qp_batch", + "solve_qp_multi_rhs", +] + + +@dataclass +class QpResult: + """Solution of a convex QP. + + Attributes + ---------- + status: + One of ``"optimal"``, ``"primal_infeasible"``, + ``"dual_infeasible"`` (unbounded), ``"iteration_limit"``, + ``"numerical_failure"``. + x: + Primal solution, shape ``(n,)``. + y: + Equality multipliers, shape ``(m_eq,)``. + z: + Inequality multipliers ``≥ 0``, shape ``(m_ineq,)``. + z_lb, z_ub: + Bound multipliers ``≥ 0``, shape ``(n,)``. + obj: + Objective value ``½ xᵀP x + cᵀx``. + iters: + Interior-point iterations taken. + residuals: + Final KKT residuals as a dict with keys + ``primal_infeasibility``, ``dual_infeasibility``, + ``complementarity``, and ``kkt_error`` (the max of the three). + ``None`` for conic (:func:`solve_socp`) solves, where the slack + lives in a non-orthant cone and these orthant residuals do not + apply. + iterates: + Per-iteration convergence trace — a list of dicts with keys + ``iter``, ``objective``, ``primal_infeasibility``, + ``dual_infeasibility``, ``mu``, ``alpha_primal``, ``alpha_dual``. + Empty unless the solve was called with ``collect_iterates=True``. + """ + + status: str + x: np.ndarray + y: np.ndarray + z: np.ndarray + z_lb: np.ndarray + z_ub: np.ndarray + obj: float + iters: int + residuals: Optional[dict] = None + iterates: list = field(default_factory=list) + + @property + def success(self) -> bool: + return self.status == "optimal" + + @property + def kkt_error(self) -> Optional[float]: + """Overall KKT error (max residual), or ``None`` for conic solves.""" + return None if self.residuals is None else self.residuals["kkt_error"] + + +@dataclass +class ReducedHessian: + """Reduced Hessian of a QP on its active manifold, with eigendecomposition. + + Attributes + ---------- + n_dof: + Degrees of freedom — the dimension of every array here. Equals + ``n`` minus the rank of the active-constraint Jacobian. + matrix: + The reduced Hessian ``H_R = Zᵀ P Z``, shape ``(n_dof, n_dof)``. + eigenvalues: + Eigenvalues of ``H_R`` in ascending order, shape ``(n_dof,)``. All + positive ⟺ a strict second-order minimizer; the smallest gives the + weakest curvature, and the spread is the conditioning on the active + manifold. + eigenvectors: + Eigenvectors as columns, shape ``(n_dof, n_dof)``; column ``j`` + pairs with ``eigenvalues[j]``. + """ + + n_dof: int + matrix: np.ndarray + eigenvalues: np.ndarray + eigenvectors: np.ndarray + + @property + def is_positive_definite(self) -> bool: + """Whether every eigenvalue is positive (strict second-order min).""" + return self.n_dof == 0 or bool(self.eigenvalues[0] > 0.0) + + +def _coo(mat, n_cols: int, what: str): + """Return ``(rows, cols, vals)`` int/int/float lists for a matrix + given as a scipy-sparse matrix, a dense array, or ``None``.""" + if mat is None: + return [], [], [] + # scipy sparse (any format) → COO. + if hasattr(mat, "tocoo"): + coo = mat.tocoo() + return ( + coo.row.astype(np.int64).tolist(), + coo.col.astype(np.int64).tolist(), + coo.data.astype(np.float64).tolist(), + ) + arr = np.asarray(mat, dtype=np.float64) + if arr.ndim != 2: + raise ValueError(f"{what}: expected a 2-D matrix, got shape {arr.shape}") + rows, cols = np.nonzero(arr) + return ( + rows.astype(np.int64).tolist(), + cols.astype(np.int64).tolist(), + arr[rows, cols].tolist(), + ) + + +def _lower_triangle_coo(P, n: int): + """COO of the lower triangle of the symmetric Hessian ``P``. + + Accepts a scipy-sparse or dense ``P`` (assumed symmetric) and keeps + only entries with ``row >= col``; ``None`` → empty (an LP).""" + r, c, v = _coo(P, n, "P") + out_r, out_c, out_v = [], [], [] + for ri, ci, vi in zip(r, c, v): + if ri >= ci: + out_r.append(ri) + out_c.append(ci) + out_v.append(vi) + return out_r, out_c, out_v + + +def _build( + P, + c: Sequence[float], + A, + b: Optional[Sequence[float]], + G, + h: Optional[Sequence[float]], + lb: Optional[Sequence[float]], + ub: Optional[Sequence[float]], +) -> "_pounce.QpProblem": + c = np.asarray(c, dtype=np.float64).ravel() + n = c.shape[0] + pr, pc, pv = _lower_triangle_coo(P, n) + ar, ac, av = _coo(A, n, "A") + gr, gc, gv = _coo(G, n, "G") + return _pounce.QpProblem( + n=n, + c=c.tolist(), + p_rows=pr, + p_cols=pc, + p_vals=pv, + a_rows=ar, + a_cols=ac, + a_vals=av, + b=[] if b is None else np.asarray(b, dtype=np.float64).ravel().tolist(), + g_rows=gr, + g_cols=gc, + g_vals=gv, + h=[] if h is None else np.asarray(h, dtype=np.float64).ravel().tolist(), + lb=[] if lb is None else np.asarray(lb, dtype=np.float64).ravel().tolist(), + ub=[] if ub is None else np.asarray(ub, dtype=np.float64).ravel().tolist(), + ) + + +def _to_result(d: dict) -> QpResult: + return QpResult( + status=d["status"], + x=np.asarray(d["x"]), + y=np.asarray(d["y"]), + z=np.asarray(d["z"]), + z_lb=np.asarray(d["z_lb"]), + z_ub=np.asarray(d["z_ub"]), + obj=float(d["obj"]), + iters=int(d["iters"]), + residuals=d.get("residuals"), + iterates=list(d.get("iterates", [])), + ) + + +def _warm_dict(warm): + """Coerce a warm start (a :class:`QpResult` or a mapping) into the + ``{x, y, z, z_lb, z_ub}`` dict the binding expects, or ``None``.""" + if warm is None: + return None + if isinstance(warm, QpResult): + src = { + "x": warm.x, + "y": warm.y, + "z": warm.z, + "z_lb": warm.z_lb, + "z_ub": warm.z_ub, + } + else: + src = warm + out = {} + for k in ("x", "y", "z", "z_lb", "z_ub"): + v = src.get(k) if hasattr(src, "get") else src[k] + if v is not None: + out[k] = np.asarray(v, dtype=np.float64).ravel().tolist() + return out + + +def solve_qp( + P=None, + c=None, + A=None, + b=None, + G=None, + h=None, + lb=None, + ub=None, + *, + tol: Optional[float] = None, + max_iter: Optional[int] = None, + warm_start=None, + collect_iterates: bool = False, +) -> QpResult: + """Solve one convex QP. See the module docstring for the form. + + ``P`` (lower triangle is used; assumed symmetric) and ``A``/``G`` may + be scipy-sparse or dense; ``None`` matrices are empty. ``c`` is + required and sets ``n``. + + ``warm_start`` (optional) is a previous :class:`QpResult` (or a mapping + with ``x``/``y``/``z``/``z_lb``/``z_ub``) for a *nearby* problem. It + seeds the interior-point iteration to reduce the iteration count; it + does not change the solution, and a dimension mismatch is ignored. + + The returned :class:`QpResult` carries the final KKT ``residuals``; + pass ``collect_iterates=True`` to also capture the per-iteration + convergence trace in ``result.iterates``. + """ + if c is None: + raise ValueError("solve_qp: `c` is required") + prob = _build(P, c, A, b, G, h, lb, ub) + return _to_result( + _pounce.solve_qp( + prob, + tol=tol, + max_iter=max_iter, + warm_start=_warm_dict(warm_start), + collect_iterates=collect_iterates, + ) + ) + + +def _normalize_cones(cones): + """Coerce a cone partition into the binding's ``[(kind, dim), …]``. + + Accepts ``("soc", 3)`` / ``("nonneg", 2)`` / ``("exp", 3)`` / + ``("pow", 0.5)`` / ``("psd", 3)`` tuples, or the shorthand ``3`` (a + second-order cone of that dim). Kind strings are case-insensitive + (``"soc"``/``"q"``, ``"nonneg"``/``"nn"``/``"+"``, + ``"exp"``/``"exponential"``, ``"pow"``/``"power"``, ``"psd"``/``"sdp"``). + The second element is the dimension for ``soc``/``nonneg``, the exponent + ``α`` for ``pow``, and the **matrix size n** for ``psd`` (spanning + ``n(n+1)/2`` svec rows).""" + out = [] + for spec in cones: + if isinstance(spec, (tuple, list)) and len(spec) == 2: + # Pass the value through as a float; the binding interprets it as a + # dimension (soc/nonneg) or an exponent (pow). + out.append((str(spec[0]), float(spec[1]))) + elif isinstance(spec, int): + out.append(("soc", float(spec))) + else: + raise ValueError(f"bad cone spec {spec!r}; use (kind, dim) or an int") + return out + + +def solve_socp( + P=None, + c=None, + A=None, + b=None, + G=None, + h=None, + *, + cones, + tol: Optional[float] = None, + max_iter: Optional[int] = None, + collect_iterates: bool = False, +) -> QpResult: + """Solve a standard-form conic program (LP/QP + second-order and/or + exponential cones). + + Same form as :func:`solve_qp` minus variable bounds, but the inequality + block ``Gx ≤ h`` is partitioned by ``cones`` — a sequence of + ``(kind, dim)`` specs covering the rows of ``G`` in order. Each slack + block ``s = h − Gx`` must lie in its cone: + + - ``("nonneg", d)`` — the nonnegative orthant ``s ≥ 0``; + - ``("soc", d)`` — the second-order cone ``{ (t, x) : t ≥ ‖x‖₂ }`` + (an int ``d`` is shorthand for this); + - ``("exp", 3)`` — the 3-D exponential cone + ``{ (x, y, z) : y·exp(x/y) ≤ z, y > 0 }``, which routes to the + non-symmetric HSDE solver and unlocks geometric programming, entropy, + log-sum-exp, and logistic models; + - ``("pow", α)`` — the 3-D power cone + ``{ (x, y, z) : |x| ≤ y^α z^{1−α}, y,z ≥ 0 }`` with ``α ∈ (0, 1)`` + (the second tuple element is the **exponent**, not a dimension); the + building block for ``p``-norm and general geometric constraints. + - ``("psd", n)`` — the positive-semidefinite cone over symmetric + ``n×n`` matrices (small dense SDPs). Its slack block is the + **symmetric vectorization** ``svec(X)`` (length ``n(n+1)/2``; lower + triangle, column by column, off-diagonals scaled by ``√2`` so that + ``⟨X,Y⟩ = svec(X)·svec(Y)``), and ``smat(s) ⪰ 0`` is enforced. + + A second-order cone may be freely mixed with an exp/power cone (the + non-symmetric driver handles both). The PSD cone is self-scaled and runs + on the symmetric driver, so it **cannot** be combined with exp/power + cones in one problem (a clear error is raised if you try). + + Examples + -------- + >>> # min t s.t. (t, x − x*) ∈ SOC (minimize ‖x − x*‖) + >>> r = solve_socp(c=[1, 0, 0], G=-np.eye(3), h=[0, -2, 1], + ... cones=[("soc", 3)]) + + >>> # Geometric program min x + 1/x = min_u e^u + e^{-u} (optimum 2). + >>> # Variables (u, t1, t2); (u,1,t1)∈Kexp, (-u,1,t2)∈Kexp. + >>> import numpy as np + >>> G = np.zeros((6, 3)) + >>> G[0, 0] = -1.0 # s0 = u + >>> G[2, 1] = -1.0 # s2 = t1 + >>> G[3, 0] = 1.0 # s3 = -u + >>> G[5, 2] = -1.0 # s5 = t2 + >>> r = solve_socp(c=[0, 1, 1], G=G, h=[0, 1, 0, 0, 1, 0], + ... cones=[("exp", 3), ("exp", 3)]) + >>> round(r.obj, 6) + 2.0 + """ + if c is None: + raise ValueError("solve_socp: `c` is required") + prob = _build(P, c, A, b, G, h, None, None) + specs = _normalize_cones(cones) + return _to_result( + _pounce.solve_socp( + prob, specs, tol=tol, max_iter=max_iter, collect_iterates=collect_iterates + ) + ) + + +def solve_qp_batch( + problems: Sequence[dict], + *, + tol: Optional[float] = None, + max_iter: Optional[int] = None, + warm_starts: Optional[Sequence] = None, +) -> list[QpResult]: + """Solve a batch of convex QPs in parallel (across instances). + + ``problems`` is a sequence of kwarg dicts, each accepted by + :func:`solve_qp` (keys ``P, c, A, b, G, h, lb, ub``). Returns one + :class:`QpResult` per input, in order. + + ``warm_starts`` (optional) is a sequence — one per problem — of prior + :class:`QpResult`\\ s or mappings (for a sequence of nearby batches). + Each seeds its instance's iteration; mismatched entries are ignored. + """ + built = [ + _build( + pr.get("P"), + pr["c"], + pr.get("A"), + pr.get("b"), + pr.get("G"), + pr.get("h"), + pr.get("lb"), + pr.get("ub"), + ) + for pr in problems + ] + ws = None + if warm_starts is not None: + if len(warm_starts) != len(built): + raise ValueError( + f"warm_starts has length {len(warm_starts)}, expected {len(built)}" + ) + ws = [_warm_dict(w) or {} for w in warm_starts] + dicts = _pounce.solve_qp_batch(built, tol=tol, max_iter=max_iter, warm_starts=ws) + return [_to_result(d) for d in dicts] + + +def solve_qp_multi_rhs( + P=None, + c=None, + A=None, + b=None, + G=None, + h=None, + lb=None, + ub=None, + *, + cs: Sequence[Sequence[float]], + tol: Optional[float] = None, + max_iter: Optional[int] = None, +) -> list[QpResult]: + """Solve one QP *structure* against many linear objectives, in parallel. + + All of ``P``/``A``/``b``/``G``/``h``/``lb``/``ub`` are shared; only the + linear term varies, given as ``cs`` — a sequence of length-``n`` vectors + (one objective per solve). Returns one :class:`QpResult` per entry of + ``cs``, in order. The ``c`` argument here is only a placeholder for + shape; the per-solve objectives come from ``cs``. + + This is the multiple-right-hand-side analog of :func:`solve_qp_batch`: + use it when the constraint geometry is fixed and you are sweeping the + objective (e.g. a family of cost vectors, a parametric linear term, or + the inner objective of a bilevel sweep). + """ + if cs is None or len(cs) == 0: + raise ValueError("solve_qp_multi_rhs: `cs` must be a non-empty sequence") + n = len(np.asarray(cs[0], dtype=np.float64).ravel()) + # `c` only fixes `n` for the base structure; the real objectives are `cs`. + base_c = c if c is not None else np.zeros(n) + base = _build(P, base_c, A, b, G, h, lb, ub) + cs_list = [np.asarray(ci, dtype=np.float64).ravel().tolist() for ci in cs] + dicts = _pounce.solve_qp_multi_rhs(base, cs_list, tol=tol, max_iter=max_iter) + return [_to_result(d) for d in dicts] + + +class QpFactorization: + """Build-once / solve-many handle for a fixed QP *structure*. + + Builds the KKT symbolic factor once; each :meth:`solve` reuses it for + a problem that shares the structure (same sparsity and set of finite + bounds, varying only ``c``/``b``/``h``/bound *values*). A mismatched + problem returns a result with status ``"numerical_failure"``. + """ + + def __init__( + self, + P=None, + c=None, + A=None, + b=None, + G=None, + h=None, + lb=None, + ub=None, + *, + tol: Optional[float] = None, + max_iter: Optional[int] = None, + ): + if c is None: + raise ValueError("QpFactorization: `c` is required (representative problem)") + base = _build(P, c, A, b, G, h, lb, ub) + self._inner = _pounce.QpFactorization(base, tol=tol, max_iter=max_iter) + + def solve( + self, + P=None, + c=None, + A=None, + b=None, + G=None, + h=None, + lb=None, + ub=None, + *, + warm_start=None, + ) -> QpResult: + """Solve a same-structure instance, reusing the symbolic factor. + + Pass ``warm_start`` (a previous :class:`QpResult` for a nearby + problem) to also seed the iteration — combining symbolic-factor + reuse with warm starting. + """ + if c is None: + raise ValueError("QpFactorization.solve: `c` is required") + prob = _build(P, c, A, b, G, h, lb, ub) + return _to_result(self._inner.solve(prob, warm_start=_warm_dict(warm_start))) + + +class QpSensitivity: + """Post-optimal sensitivity for a convex QP — the sIPOPT analog. + + Solves the QP on construction and holds the active-set KKT + factorization, so each :meth:`parametric_step` is a single + back-substitution (build-once / solve-many). This mirrors the NLP + :class:`pounce.Solver` session — which caches the converged factor for + ``parametric_step`` / ``reduced_hessian`` — specialized to a QP, where + the Lagrangian Hessian is the constant ``P``. + + The standard use is a *parametric* QP: designate one or more equality + constraints as parameters (their right-hand side ``b`` is the + parameter), then predict how the optimum moves as those values change. + ``sensitivity.x + sensitivity.parametric_step(pins, deltas)`` is the + first-order predictor of the perturbed solution — exact while the + active set is unchanged. + + Example + ------- + >>> import numpy as np + >>> from pounce.qp import QpSensitivity + >>> # min ½‖x‖² s.t. x0 + x1 = 2 → x* = (1, 1), dx/db = (½, ½) + >>> s = QpSensitivity(P=np.eye(2), c=[0.0, 0.0], + ... A=[[1.0, 1.0]], b=[2.0]) + >>> dx = s.parametric_step([0], [1.0]) # perturb b0 by +1 + >>> np.round(s.x + dx, 6) + array([1.5, 1.5]) + """ + + def __init__( + self, + P=None, + c=None, + A=None, + b=None, + G=None, + h=None, + lb=None, + ub=None, + *, + tol: Optional[float] = None, + max_iter: Optional[int] = None, + active_tol: float = 1e-7, + ): + if c is None: + raise ValueError("QpSensitivity: `c` is required") + prob = _build(P, c, A, b, G, h, lb, ub) + self._inner = _pounce.QpSensitivity( + prob, tol=tol, max_iter=max_iter, active_tol=active_tol + ) + + @property + def x(self) -> np.ndarray: + """The optimal primal solution ``x*``.""" + return np.asarray(self._inner.x) + + @property + def obj(self) -> float: + """The optimal objective value.""" + return float(self._inner.obj) + + @property + def kkt_dim(self) -> int: + """Active-set KKT dimension ``n + m_eq + n_active``.""" + return int(self._inner.kkt_dim) + + def parametric_step(self, pin_constraint_indices, deltas) -> np.ndarray: + """First-order primal step ``dx ≈ x*(b + Δb) − x*(b)``. + + Equality constraint ``pin_constraint_indices[k]`` (an index into + ``b``) is perturbed by ``deltas[k]``; all other data is held fixed. + Returns the length-``n`` sensitivity, so ``self.x + dx`` predicts + the perturbed solution (exact to first order while the active set is + unchanged). The factorization is reused, so a continuation sweep + costs one back-substitution per query. + """ + pins = [int(i) for i in pin_constraint_indices] + ds = [float(d) for d in deltas] + return np.asarray(self._inner.parametric_step(pins, ds)) + + def reduced_hessian(self, rank_tol: float = 1e-9) -> ReducedHessian: + """Reduced Hessian ``Zᵀ P Z`` on the active manifold + eigendecomp. + + Projects the objective Hessian ``P`` onto the null space of the + active constraints (equalities, active inequalities, and active + variable bounds), then eigendecomposes it. The eigenvalues are the + objective's curvatures along feasible directions — all positive + confirms a strict (well-conditioned) minimizer. Mirrors the NLP + ``solve_with_sens(compute_reduced_hessian=True, rh_eigendecomp=True)``. + + ``rank_tol`` is the relative threshold used to determine the rank of + the active Jacobian (hence the degrees of freedom). The computation + densifies ``P``, so it is meant for QPs with a modest variable count. + """ + d = self._inner.reduced_hessian(rank_tol) + n = int(d["n_dof"]) + # The Rust side returns column-major flat arrays. + matrix = np.asarray(d["matrix"]).reshape((n, n), order="F") + eigvecs = np.asarray(d["eigenvectors"]).reshape((n, n), order="F") + return ReducedHessian( + n_dof=n, + matrix=matrix, + eigenvalues=np.asarray(d["eigenvalues"]), + eigenvectors=eigvecs, + ) diff --git a/python/pounce/sos.py b/python/pounce/sos.py new file mode 100644 index 00000000..d199fa4e --- /dev/null +++ b/python/pounce/sos.py @@ -0,0 +1,129 @@ +"""Polynomial global optimization via sum-of-squares (SOS / Lasserre). + +Globally minimize a polynomial — optionally subject to polynomial +inequality/equality constraints — over the SDP solver. Returns a certified +global lower bound and, when the relaxation is exact (the moment matrix is +flat), the global minimizer(s). + +Polynomials are written as dicts mapping an **exponent tuple** to its +coefficient. Over variables ``(x, y)`` the term ``3·x²y`` is ``(2, 1): 3.0``; +a constant is the all-zeros key. For example ``x⁴ − 2x² + 3`` over one +variable is ``{(4,): 1.0, (2,): -2.0, (0,): 3.0}``. + +Example +------- +>>> from pounce.sos import sos_minimize +>>> r = sos_minimize({(4,): 1.0, (2,): -2.0, (0,): 3.0}) # x⁴ − 2x² + 3 +>>> round(r.lower_bound, 6) +2.0 +>>> r.is_exact, r.num_minimizers # two global minimizers, x = ±1 +(True, 2) +>>> # min −x s.t. 1 − x² ≥ 0 (x ∈ [−1, 1]) → −1 at x = 1 +>>> r = sos_minimize({(1,): -1.0}, inequalities=[{(0,): 1.0, (2,): -1.0}]) +>>> round(r.lower_bound, 6) +-1.0 +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional, Sequence + +import numpy as np + +from . import _pounce + +__all__ = ["sos_minimize", "SosResult"] + + +@dataclass +class SosResult: + """Result of an SOS/Lasserre solve. + + Attributes + ---------- + lower_bound: + Certified global lower bound ``γ* ≤ min p`` (the global minimum when + ``is_exact``). + status: + Underlying SDP solve status (``"optimal"`` on success). + is_exact: + ``True`` when the moment matrix is flat — a *sufficient* certificate + that ``lower_bound`` is the global minimum. Non-unique optima (which an + interior-point solver would otherwise return at inflated rank) are + handled by a facial-reduction re-solve, so all global minimizers are + recovered in that case too. It can still be ``False`` — e.g. when the + relaxation order is too low for flatness, or the relaxation is not + exact — but ``lower_bound`` is a valid lower bound either way. + num_minimizers: + Number of global minimizers detected (the flat moment-matrix rank). + minimizers: + The extracted global minimizers, each a length-``n_vars`` array. + Populated when ``is_exact``. + """ + + lower_bound: float + status: str + is_exact: bool + num_minimizers: int + minimizers: list + + @property + def success(self) -> bool: + return self.status == "optimal" + + +def _terms(poly, n_vars: int, what: str): + """Normalize a polynomial (dict ``{exp_tuple: coeff}`` or an iterable of + ``(exp_tuple, coeff)``) into the binding's ``[(list[int], float), …]``.""" + items = poly.items() if hasattr(poly, "items") else poly + out = [] + for exps, coef in items: + exps = tuple(int(e) for e in exps) + if len(exps) != n_vars: + raise ValueError( + f"{what}: exponent {exps} has length {len(exps)}, " + f"expected n_vars = {n_vars}" + ) + out.append((list(exps), float(coef))) + return out + + +def _infer_n_vars(*polys) -> int: + for p in polys: + keys = p.keys() if hasattr(p, "keys") else (e for e, _ in p) + for k in keys: + return len(tuple(k)) + raise ValueError("cannot infer n_vars from empty polynomials; pass n_vars=") + + +def sos_minimize( + objective, + *, + inequalities: Sequence = (), + equalities: Sequence = (), + n_vars: Optional[int] = None, + order: Optional[int] = None, +) -> SosResult: + """Globally minimize ``objective`` subject to ``gᵢ ≥ 0`` (``inequalities``) + and ``hⱼ = 0`` (``equalities``) via the SOS/Lasserre relaxation. + + Each polynomial is a dict ``{exponent_tuple: coefficient}`` (see the module + docstring). ``n_vars`` is inferred from the exponent tuples if omitted. + ``order`` raises the relaxation order above the minimum to tighten the + bound (the Lasserre hierarchy). Returns an :class:`SosResult`. + """ + polys = [objective, *inequalities, *equalities] + if n_vars is None: + n_vars = _infer_n_vars(*polys) + obj = _terms(objective, n_vars, "objective") + ineq = [_terms(g, n_vars, "inequality") for g in inequalities] + eq = [_terms(h, n_vars, "equality") for h in equalities] + d = _pounce.sos_minimize(n_vars, obj, ineq, eq, order=order) + return SosResult( + lower_bound=float(d["lower_bound"]), + status=d["status"], + is_exact=bool(d["is_exact"]), + num_minimizers=int(d["num_minimizers"]), + minimizers=[np.asarray(m) for m in d["minimizers"]], + ) diff --git a/python/pyproject.toml b/python/pyproject.toml index 2a09395a..060f6798 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -10,7 +10,7 @@ build-backend = "maturin" # `python-source = "."` keep the package folder named `pounce`. name = "pounce-solver" version = "0.4.0" -description = "Python interface to POUNCE — a pure-Rust port of the Ipopt interior-point NLP solver. cyipopt-style Problem class, scipy-style minimize() facade, and JAX-friendly autodiff / implicit differentiation." +description = "Python interface to POUNCE — a pure-Rust interior-point optimization solver for nonlinear, conic (LP/QP/SOCP/SDP/exp/power), and global problems (NLP core ported from Ipopt). cyipopt-style Problem class, scipy-style minimize() facade, solve_qp/solve_socp/sos_minimize, and JAX-friendly autodiff / implicit differentiation." readme = "README.md" requires-python = ">=3.9" license = { text = "EPL-2.0" } diff --git a/python/tests/conftest.py b/python/tests/conftest.py new file mode 100644 index 00000000..6d744b48 --- /dev/null +++ b/python/tests/conftest.py @@ -0,0 +1,79 @@ +"""Pytest configuration for the pounce Python test suite. + +Build-hygiene guard against a **stale compiled extension**. + +When the suite runs against an in-repo editable build — the compiled +extension ``python/pounce/_pounce*.so`` sitting next to the package source, +where ``maturin develop`` leaves it — this guard checks that the artifact is +not older than the Rust binding sources it was built from. A stale ``.so`` is +the single most confusing local failure mode: the Rust binding grows a new +keyword argument or function, but pytest imports the old artifact and the +tests die with cryptic ``TypeError: ... unexpected keyword argument`` errors +that read like real bugs rather than "you forgot to rebuild" (this exact +trap cost a debugging session — see dev-notes/pr70-hardening.md, Item H). + +We deliberately *fail fast* with an actionable message rather than +auto-rebuilding: a rebuild needs the Rust toolchain and would make test runs +surprisingly slow and stateful. Wheel installs (site-packages) are +unaffected — there is no in-repo ``.so`` next to the sources to compare, so +the guard is skipped, and CI (which builds a fresh wheel every run, then +installs it) never trips it. + +Set ``POUNCE_SKIP_EXT_STALE_CHECK=1`` to bypass. +""" + +import os +from pathlib import Path + +import pytest + + +def _newest_rust_mtime(crates_dir: Path) -> float: + """Newest mtime among the workspace's Rust sources and crate manifests. + + The extension statically links the whole workspace, so an edit to *any* + crate (not just ``pounce-py``) can change its behavior; comparing against + all of ``crates/`` is the conservative choice. A false "stale" verdict is + harmless — it just asks for a rebuild, which is cheap and always correct. + """ + newest = 0.0 + for p in crates_dir.rglob("*"): + if p.suffix == ".rs" or p.name == "Cargo.toml": + try: + newest = max(newest, p.stat().st_mtime) + except OSError: + pass + return newest + + +def _check_extension_freshness() -> None: + if os.environ.get("POUNCE_SKIP_EXT_STALE_CHECK"): + return + repo_root = Path(__file__).resolve().parents[2] + pkg_dir = repo_root / "python" / "pounce" + crates_dir = repo_root / "crates" + # Only meaningful for an in-repo source checkout that has the editable + # extension built in place. A wheel install has no sibling Rust sources + # (or no in-repo `.so`), so there is nothing to go stale — skip silently. + if not crates_dir.is_dir(): + return + built = sorted(pkg_dir.glob("_pounce*.so")) + sorted(pkg_dir.glob("_pounce*.pyd")) + if not built: + return + so_mtime = max(p.stat().st_mtime for p in built) + src_mtime = _newest_rust_mtime(crates_dir) + if so_mtime < src_mtime: + newest_so = max(built, key=lambda p: p.stat().st_mtime) + raise pytest.UsageError( + f"pounce compiled extension is STALE: {newest_so.name} is older " + "than the Rust sources under crates/. Running pytest now would " + "import the old binding and fail with confusing errors (e.g. " + "'unexpected keyword argument'). Rebuild it first:\n" + " cd python && maturin develop # rebuild in place, or\n" + " make python-test # rebuild then run pytest\n" + "(set POUNCE_SKIP_EXT_STALE_CHECK=1 to bypass this guard.)" + ) + + +def pytest_configure(config): # noqa: ARG001 (pytest hook signature) + _check_extension_freshness() diff --git a/python/tests/test_minimize_autoroute.py b/python/tests/test_minimize_autoroute.py new file mode 100644 index 00000000..af1554fe --- /dev/null +++ b/python/tests/test_minimize_autoroute.py @@ -0,0 +1,111 @@ +"""Auto-routing of ``pounce.minimize`` to the convex LP/QP solver. + +``minimize`` takes opaque callables, so the router (``pounce._route``) probes +them, fits a linear/quadratic model, and validates it at held-out points +before dispatching to ``solve_qp``. These tests pin the two correctness +properties that matter: genuine LP/convex-QP problems route (and report the +right objective, constant included), while nonlinear / nonconvex problems +stay on the NLP path — the router never silently sends them to the QP solver. +""" + +import numpy as np +import pytest + +from pounce import minimize + + +def _routed_to(res): + """The convex selector a result was routed through, or ``None`` for NLP.""" + return res.info.get("solver") + + +def test_convex_qp_routes_and_recovers_objective_constant(): + # min x0² + x1² − 3x0 − 4x1 + 5 s.t. 0 ≤ x ≤ 1 → x*=(1,1), f*=0. + # The +5 constant lives only in `fun`; the QP solver never sees it, so the + # reported objective must add it back (the Finding-#1 issue, Python side). + fun = lambda x: x[0] ** 2 + x[1] ** 2 - 3 * x[0] - 4 * x[1] + 5.0 + jac = lambda x: np.array([2 * x[0] - 3, 2 * x[1] - 4]) + hess = lambda x: np.array([[2.0, 0.0], [0.0, 2.0]]) + res = minimize(fun, [0.5, 0.5], jac=jac, hess=hess, bounds=[(0, 1), (0, 1)]) + + assert _routed_to(res) == "qp-ipm" + assert res.info["problem_class"] == "convex_qp" + assert res.success + np.testing.assert_allclose(res.x, [1.0, 1.0], atol=1e-6) + assert res.fun == pytest.approx(0.0, abs=1e-6) # constant folded back in + assert res.info["obj_constant"] == pytest.approx(5.0) + + +def test_lp_routes_to_lp_selector(): + # min −x0 − 2x1 s.t. x0 + x1 ≤ 1, x ≥ 0 → x*=(0,1), f*=−2. + fun = lambda x: -x[0] - 2 * x[1] + con = {"type": "ineq", "fun": lambda x: 1.0 - x[0] - x[1]} # ≥ 0 + res = minimize(fun, [0.1, 0.1], bounds=[(0, None), (0, None)], constraints=con) + + assert _routed_to(res) == "lp-ipm" + assert res.info["problem_class"] == "lp" + np.testing.assert_allclose(res.x, [0.0, 1.0], atol=1e-6) + assert res.fun == pytest.approx(-2.0, abs=1e-6) + + +def test_routed_qp_matches_nlp_solve(): + # The router must be transparent: forcing NLP gives the same optimum. + fun = lambda x: x[0] ** 2 + x[1] ** 2 - 3 * x[0] - 4 * x[1] + jac = lambda x: np.array([2 * x[0] - 3, 2 * x[1] - 4]) + hess = lambda x: np.array([[2.0, 0.0], [0.0, 2.0]]) + kw = dict(jac=jac, hess=hess, bounds=[(0, 1), (0, 1)]) + + auto = minimize(fun, [0.5, 0.5], **kw) + nlp = minimize(fun, [0.5, 0.5], options={"solver_selection": "nlp"}, **kw) + + assert _routed_to(auto) == "qp-ipm" + assert _routed_to(nlp) is None # forced onto the NLP path + np.testing.assert_allclose(auto.x, nlp.x, atol=1e-6) + assert auto.fun == pytest.approx(nlp.fun, abs=1e-6) + + +def test_nonlinear_objective_stays_on_nlp(): + # Rosenbrock: quartic, not a quadratic — must NOT be routed to the QP solver. + fun = lambda x: (1 - x[0]) ** 2 + 100 * (x[1] - x[0] ** 2) ** 2 + jac = lambda x: np.array([ + -2 * (1 - x[0]) - 400 * x[0] * (x[1] - x[0] ** 2), + 200 * (x[1] - x[0] ** 2), + ]) + res = minimize(fun, [-1.2, 1.0], jac=jac) + + assert _routed_to(res) is None + np.testing.assert_allclose(res.x, [1.0, 1.0], atol=1e-4) + + +def test_nonconvex_qp_stays_on_nlp(): + # Indefinite Hessian diag(−2, 2): a *nonconvex* QP. The convex solver would + # be wrong here, so the router must reject it and fall back to NLP. + fun = lambda x: -(x[0] ** 2) + x[1] ** 2 + jac = lambda x: np.array([-2 * x[0], 2 * x[1]]) + hess = lambda x: np.array([[-2.0, 0.0], [0.0, 2.0]]) + res = minimize(fun, [0.5, 0.5], jac=jac, hess=hess, bounds=[(0, 1), (0, 1)]) + + assert _routed_to(res) is None + + +def test_forced_lp_on_nonlinear_raises(): + fun = lambda x: (1 - x[0]) ** 2 + 100 * (x[1] - x[0] ** 2) ** 2 + with pytest.raises(ValueError): + minimize(fun, [-1.2, 1.0], options={"solver_selection": "lp-ipm"}) + + +def test_forced_qp_on_nonlinear_raises(): + fun = lambda x: x[0] ** 4 + x[1] ** 2 + with pytest.raises(ValueError): + minimize(fun, [1.0, 1.0], options={"solver_selection": "qp-ipm"}) + + +def test_finite_difference_qp_routes_without_user_derivatives(): + # No jac/hess supplied: the router fits the quadratic by finite differences + # and the held-out validation confirms it. min ½‖x−a‖² style box QP. + a = np.array([0.3, 0.7]) + fun = lambda x: float((x[0] - a[0]) ** 2 + (x[1] - a[1]) ** 2) + res = minimize(fun, [0.0, 0.0], bounds=[(0, 1), (0, 1)]) + + assert _routed_to(res) == "qp-ipm" + np.testing.assert_allclose(res.x, a, atol=1e-5) diff --git a/python/tests/test_qp.py b/python/tests/test_qp.py new file mode 100644 index 00000000..de88a6f8 --- /dev/null +++ b/python/tests/test_qp.py @@ -0,0 +1,179 @@ +"""Tests for the convex LP/QP solver bindings (pounce-convex via PyO3). + +Cover one-shot solve, multiple-RHS, the build-once/solve-many +QpFactorization handle, batched solving, and status reporting +(infeasible / unbounded). +""" + +import numpy as np + +from pounce import _pounce as p + + +def _box_qp(c, lo=0.0, hi=1.0): + """min ½·2·‖x‖² + cᵀx s.t. lo ≤ x ≤ hi (P = 2I).""" + n = len(c) + return p.QpProblem( + n=n, + c=list(c), + p_rows=list(range(n)), + p_cols=list(range(n)), + p_vals=[2.0] * n, + lb=[lo] * n, + ub=[hi] * n, + ) + + +def test_solve_qp_box_clamps_to_bounds(): + # unconstrained optimum at (1.5, 2.0); clamped to (1, 1). + r = p.solve_qp(_box_qp([-3.0, -4.0])) + assert r["status"] == "optimal" + x = np.asarray(r["x"]) + assert abs(x[0] - 1.0) < 1e-6 + assert abs(x[1] - 1.0) < 1e-6 + # Upper-bound multipliers are active and positive. + assert np.asarray(r["z_ub"])[0] > 0.5 + + +def test_solve_qp_equality(): + # min x0²+x1² s.t. x0+x1 = 2 → (1, 1), equality dual reported. + prob = p.QpProblem( + n=2, + c=[0.0, 0.0], + p_rows=[0, 1], + p_cols=[0, 1], + p_vals=[2.0, 2.0], + a_rows=[0, 0], + a_cols=[0, 1], + a_vals=[1.0, 1.0], + b=[2.0], + ) + r = p.solve_qp(prob) + assert r["status"] == "optimal" + x = np.asarray(r["x"]) + assert abs(x[0] - 1.0) < 1e-6 and abs(x[1] - 1.0) < 1e-6 + assert np.asarray(r["y"]).shape == (1,) + + +def test_solve_qp_multi_rhs_matches_individual(): + base = _box_qp([0.0, 0.0]) + cs = [[-1.0, -4.0], [-4.0, 1.0], [3.0, -2.0], [0.0, 0.0]] + res = p.solve_qp_multi_rhs(base, cs) + assert len(res) == len(cs) + for c, r in zip(cs, res): + single = p.solve_qp(_box_qp(c)) + assert r["status"] == "optimal" + np.testing.assert_allclose( + np.asarray(r["x"]), np.asarray(single["x"]), atol=1e-6 + ) + + +def test_qp_factorization_build_once_solve_many(): + base = _box_qp([0.0, 0.0]) + handle = p.QpFactorization(base) + for c in ([-1.0, -4.0], [-4.0, 1.0], [3.0, -2.0]): + reused = handle.solve(_box_qp(c)) + one_shot = p.solve_qp(_box_qp(c)) + assert reused["status"] == "optimal" + assert one_shot["status"] == "optimal" + # Both are independent interior-point solves. When the optimum sits on + # an active bound (e.g. c=[3,-2] → vertex (0,1)), the IPM only + # approaches the boundary asymptotically, so the two runs stop at + # slightly different distances from it (here ~1e-5, since they take a + # different iteration count). They agree on the same optimum to the + # solver's near-boundary primal slack, not to full KKT tolerance. + np.testing.assert_allclose( + np.asarray(reused["x"]), np.asarray(one_shot["x"]), atol=1e-4 + ) + + +def test_qp_factorization_rejects_pattern_mismatch(): + handle = p.QpFactorization(_box_qp([0.0, 0.0])) # n = 2 + bad = handle.solve(_box_qp([0.0, 0.0, 0.0])) # n = 3 + assert bad["status"] == "numerical_failure" + # A matching solve still works afterward. + ok = handle.solve(_box_qp([-1.0, -1.0])) + assert ok["status"] == "optimal" + + +def test_solve_qp_batch_order_and_status(): + probs = [_box_qp([-float(k), -1.0]) for k in range(6)] + res = p.solve_qp_batch(probs) + assert len(res) == 6 + assert all(r["status"] == "optimal" for r in res) + + +def test_solve_qp_batch_warm_start(): + # Per-instance warm starts: same solutions as cold, no iter regression. + base_probs = [_box_qp([-float(k), -1.0]) for k in range(4)] + base = p.solve_qp_batch(base_probs) + pert_probs = [_box_qp([-float(k) - 0.1, -1.05]) for k in range(4)] + cold = p.solve_qp_batch(pert_probs) + warm = p.solve_qp_batch(pert_probs, warm_starts=base) + assert len(warm) == 4 + for c, w in zip(cold, warm): + assert w["status"] == "optimal" + np.testing.assert_allclose( + np.asarray(w["x"]), np.asarray(c["x"]), atol=1e-6 + ) + assert int(w["iters"]) <= int(c["iters"]) + + +def test_solve_qp_detects_unbounded(): + # min −x0 with x0 ≥ 0, no upper bound → unbounded below. + prob = p.QpProblem( + n=1, + c=[-1.0], + g_rows=[0], + g_cols=[0], + g_vals=[-1.0], # −x0 ≤ 0 (x0 ≥ 0) + h=[0.0], + ) + r = p.solve_qp(prob) + assert r["status"] == "dual_infeasible" + + +def test_solve_qp_warm_start_matches_cold(): + # Warm starting from a nearby solution must reach the same optimum and + # not increase iterations. + base = p.QpProblem( + n=3, + c=[-1.0, -2.0, -0.5], + p_rows=[0, 1, 2], + p_cols=[0, 1, 2], + p_vals=[2.0, 2.0, 2.0], + g_rows=[0, 0, 0], + g_cols=[0, 1, 2], + g_vals=[1.0, 1.0, 1.0], + h=[1.0], + ) + base_sol = p.solve_qp(base) + pert = p.QpProblem( + n=3, + c=[-1.1, -1.9, -0.55], + p_rows=[0, 1, 2], + p_cols=[0, 1, 2], + p_vals=[2.0, 2.0, 2.0], + g_rows=[0, 0, 0], + g_cols=[0, 1, 2], + g_vals=[1.0, 1.0, 1.0], + h=[1.05], + ) + cold = p.solve_qp(pert) + warm = p.solve_qp(pert, warm_start=base_sol) + assert warm["status"] == "optimal" + np.testing.assert_allclose( + np.asarray(warm["x"]), np.asarray(cold["x"]), atol=1e-6 + ) + assert int(warm["iters"]) <= int(cold["iters"]) + + +def test_qp_problem_validation(): + import pytest + + # c length must equal n. + with pytest.raises(ValueError): + p.QpProblem(n=2, c=[1.0]) + # P strict-upper entry rejected (lower triangle only). + with pytest.raises(ValueError): + p.QpProblem(n=2, c=[0.0, 0.0], p_rows=[0], p_cols=[1], p_vals=[1.0]) diff --git a/python/tests/test_qp_host.py b/python/tests/test_qp_host.py new file mode 100644 index 00000000..6f15a935 --- /dev/null +++ b/python/tests/test_qp_host.py @@ -0,0 +1,107 @@ +"""Host-level convex QP surface (``pounce.qp`` + the top-level re-exports). + +These cover the ergonomics that bring the QP path toward NLP parity: +top-level discoverability, the final KKT ``residuals`` and opt-in iterate +trace on :class:`~pounce.qp.QpResult`, the multiple-RHS host wrapper, and +the catchable error on a malformed cone partition. +""" + +import numpy as np +import pytest + +import pounce +from pounce.qp import QpResult, solve_qp, solve_qp_multi_rhs, solve_socp + + +def test_qp_is_reexported_at_top_level(): + # The QP entry points are reachable from ``pounce.*`` (like ``Problem``), + # not only from ``pounce.qp.*``. + for name in ( + "solve_qp", + "solve_socp", + "solve_qp_batch", + "solve_qp_multi_rhs", + "QpResult", + "QpFactorization", + ): + assert hasattr(pounce, name), name + assert pounce.solve_qp is solve_qp + + +def test_qp_module_star_import_has_no_dangling_names(): + # Every name advertised in ``__all__`` must actually exist (regression: + # ``QpProblem`` was listed but never defined, breaking ``import *``). + import pounce.qp as qp + + missing = [n for n in qp.__all__ if not hasattr(qp, n)] + assert missing == [] + + +def test_residuals_attached_and_kkt_error(): + # min x0²+x1² −3x0 −4x1 s.t. 0 ≤ x ≤ 1 → clamps to (1, 1). + r = solve_qp(P=np.diag([2.0, 2.0]), c=[-3.0, -4.0], lb=[0, 0], ub=[1, 1]) + assert r.status == "optimal" + assert isinstance(r, QpResult) + assert set(r.residuals) == { + "primal_infeasibility", + "dual_infeasibility", + "complementarity", + "kkt_error", + } + assert r.kkt_error == r.residuals["kkt_error"] + assert r.kkt_error < 1e-6 + + +def test_iterate_trace_is_opt_in(): + kw = dict(P=np.diag([2.0, 2.0]), c=[-3.0, -4.0], lb=[0, 0], ub=[1, 1]) + assert solve_qp(**kw).iterates == [] # default: no trace + traced = solve_qp(**kw, collect_iterates=True) + # N interior-point iterations log N+1 records: one per iteration plus a + # terminal record at the converged iterate (matching the NLP trace's + # N+1 convention, so the trace always ends at the optimum). + assert len(traced.iterates) == traced.iters + 1 + first = traced.iterates[0] + assert set(first) == { + "iter", + "objective", + "primal_infeasibility", + "dual_infeasibility", + "mu", + "alpha_primal", + "alpha_dual", + } + # The duality measure decreases over the run. + assert traced.iterates[-1]["mu"] < traced.iterates[0]["mu"] + + +def test_conic_solve_has_no_orthant_residuals(): + # SOCP slack lives in a non-orthant cone: orthant residuals don't apply. + r = solve_socp(c=[1.0, 0.0, 0.0], G=-np.eye(3), h=[0.0, -2.0, 1.0], + cones=[("soc", 3)]) + assert r.status == "optimal" + assert r.residuals is None + assert r.kkt_error is None + + +def test_solve_qp_multi_rhs_host_matches_individual(): + # Shared box structure, swept objective: each solve matches a one-off. + cs = [[-3.0, -4.0], [1.0, 1.0], [-1.0, 2.0], [0.0, 0.0]] + sweep = solve_qp_multi_rhs(P=np.diag([2.0, 2.0]), lb=[0, 0], ub=[1, 1], cs=cs) + assert len(sweep) == len(cs) + for c, r in zip(cs, sweep): + one = solve_qp(P=np.diag([2.0, 2.0]), c=c, lb=[0, 0], ub=[1, 1]) + assert r.status == "optimal" + np.testing.assert_allclose(r.x, one.x, atol=1e-6) + assert r.residuals is not None # multi-RHS still reports residuals + + +def test_solve_qp_multi_rhs_requires_cs(): + with pytest.raises(ValueError): + solve_qp_multi_rhs(P=np.eye(2), cs=[]) + + +def test_malformed_cone_partition_raises_valueerror(): + # An exp cone is always 3 rows; declaring it over a 2-row G is a usage + # error and must raise a catchable ValueError (not panic across FFI). + with pytest.raises(ValueError): + solve_socp(c=[1.0, 0.0], G=-np.eye(2), h=[0.0, 0.0], cones=[("exp", 2)]) diff --git a/python/tests/test_qp_jax.py b/python/tests/test_qp_jax.py new file mode 100644 index 00000000..fd64bc76 --- /dev/null +++ b/python/tests/test_qp_jax.py @@ -0,0 +1,299 @@ +"""Differentiable convex-QP layer (pounce.jax.solve_qp / QpLayer). + +Validates the OptNet implicit-differentiation backward against finite +differences for the linear/RHS parameters (c, b, h), and checks +jacrev / vmap / QpLayer compose. +""" + +import numpy as np +import pytest + +jax = pytest.importorskip("jax") +jax.config.update("jax_enable_x64", True) +import jax.numpy as jnp # noqa: E402 + +from pounce.jax import QpLayer, solve_qp, solve_qp_batch # noqa: E402 + + +def _fd(fn, x, eps=1e-6): + x = np.asarray(x, float) + g = np.zeros_like(x) + for i in range(len(x)): + xp = x.copy() + xp[i] += eps + xm = x.copy() + xm[i] -= eps + g[i] = (float(fn(jnp.array(xp))) - float(fn(jnp.array(xm)))) / (2 * eps) + return g + + +def _fd_mat(fn, M, eps=1e-6): + """Finite-difference gradient of a scalar ``fn`` over a dense matrix.""" + M = np.asarray(M, float) + g = np.zeros_like(M) + for i in range(M.shape[0]): + for j in range(M.shape[1]): + mp = M.copy() + mp[i, j] += eps + mm = M.copy() + mm[i, j] -= eps + g[i, j] = (float(fn(jnp.array(mp))) - float(fn(jnp.array(mm)))) / (2 * eps) + return g + + +def _fd_mat_sym(fn, M, eps=1e-6): + """Finite-difference gradient over a *symmetric* matrix: perturb the + (i, j) and (j, i) entries together so the symmetry is preserved. The + returned array matches the symmetrized analytic gradient.""" + M = np.asarray(M, float) + g = np.zeros_like(M) + for i in range(M.shape[0]): + for j in range(i, M.shape[1]): + mp = M.copy() + mm = M.copy() + mp[i, j] += eps + mm[i, j] -= eps + if i != j: + mp[j, i] += eps + mm[j, i] -= eps + d = (float(fn(jnp.array(mp))) - float(fn(jnp.array(mm)))) / (2 * eps) + # d is ∂/∂(symmetric pair); split across the two entries. + if i == j: + g[i, j] = d + else: + g[i, j] = d / 2 + g[j, i] = d / 2 + return g + + +P = jnp.array([[2.0, 0.0], [0.0, 2.0]]) + + +def test_grad_c_interior(): + # Interior inequalities: gradient flows only through c. + G = jnp.array([[1.0, 1.0], [-1.0, 0.0], [0.0, -1.0]]) + h = jnp.array([10.0, 0.0, 0.0]) + target = jnp.array([0.3, 0.4]) + + def loss(c): + return jnp.sum((solve_qp(P=P, c=c, G=G, h=h) - target) ** 2) + + c0 = jnp.array([-0.5, -0.7]) + g = jax.grad(loss)(c0) + np.testing.assert_allclose(np.asarray(g), _fd(loss, c0), atol=1e-4) + + +def test_grad_h_active_inequality(): + # Active inequality x0+x1 ≤ h: gradient flows through h. + G = jnp.array([[1.0, 1.0]]) + c0 = jnp.array([-4.0, -4.0]) # pulls past the constraint → active + + def loss(h): + return jnp.sum(solve_qp(P=P, c=c0, G=G, h=h) ** 2) + + h0 = jnp.array([1.0]) + g = jax.grad(loss)(h0) + np.testing.assert_allclose(np.asarray(g), _fd(loss, h0), atol=1e-4) + + +def test_grad_c_and_b_equality(): + A = jnp.array([[1.0, 1.0]]) + + def loss_c(c): + return jnp.sum(solve_qp(P=P, c=c, A=A, b=jnp.array([2.0])) ** 2) + + def loss_b(b): + return jnp.sum(solve_qp(P=P, c=jnp.array([-1.0, -3.0]), A=A, b=b) ** 2) + + c0 = jnp.array([-1.0, -3.0]) + b0 = jnp.array([2.0]) + np.testing.assert_allclose( + np.asarray(jax.grad(loss_c)(c0)), _fd(loss_c, c0), atol=1e-4 + ) + np.testing.assert_allclose( + np.asarray(jax.grad(loss_b)(b0)), _fd(loss_b, b0), atol=1e-4 + ) + + +def test_jacrev_of_solution(): + # Jacobian of x*(c) w.r.t. c via jacrev should be well-formed. + G = jnp.array([[1.0, 1.0], [-1.0, 0.0], [0.0, -1.0]]) + h = jnp.array([10.0, 0.0, 0.0]) + c0 = jnp.array([-0.5, -0.7]) + J = jax.jacrev(lambda c: solve_qp(P=P, c=c, G=G, h=h))(c0) + assert J.shape == (2, 2) + # For an interior solution of ½·2‖x‖²+cᵀx, x* = −c/2, so dx/dc = −½I. + np.testing.assert_allclose(np.asarray(J), -0.5 * np.eye(2), atol=1e-5) + + +def test_qp_layer_and_vmap(): + # QpLayer captures fixed structure; vmap over a batch of objectives. + G = jnp.array([[1.0, 1.0]]) + layer = QpLayer(P=P, G=G) + cs = jnp.array([[-1.0, -1.0], [-4.0, -4.0], [0.5, 0.5]]) + hs = jnp.array([[1.0], [1.0], [1.0]]) + xs = jax.vmap(lambda c, h: layer(c, h=h))(cs, hs) + assert xs.shape == (3, 2) + # Each row matches a direct solve. + for i in range(3): + xi = solve_qp(P=P, c=cs[i], G=G, h=hs[i]) + np.testing.assert_allclose(np.asarray(xs[i]), np.asarray(xi), atol=1e-5) + + +# --- Matrix gradients (P, G, A) --------------------------------------- + + +# Matrix-perturbation finite differences amplify the solver's residual +# tolerance (≈ noise/eps), so tighten the IPM tolerance for these checks. +_TIGHT = dict(tol=1e-11, max_iter=200) + + +def test_grad_P_symmetric(): + # ∇P on an active-inequality QP, checked with symmetric perturbations. + G = jnp.array([[1.0, 2.0]]) + h = jnp.array([1.0]) + c0 = jnp.array([-4.0, -1.0]) + target = jnp.array([0.2, 0.3]) + + def loss(Pm): + return jnp.sum((solve_qp(P=Pm, c=c0, G=G, h=h, **_TIGHT) - target) ** 2) + + P0 = jnp.array([[3.0, 0.5], [0.5, 2.0]]) + g = jax.grad(loss)(P0) + np.testing.assert_allclose(np.asarray(g), _fd_mat_sym(loss, P0), atol=1e-4) + + +def test_grad_G_active_inequality(): + # ∇G with an active inequality: gradient flows through the constraint + # matrix. + h = jnp.array([1.0]) + c0 = jnp.array([-4.0, -4.0]) + + def loss(Gm): + return jnp.sum(solve_qp(P=P, c=c0, G=Gm, h=h, **_TIGHT) ** 2) + + G0 = jnp.array([[1.0, 1.0]]) + g = jax.grad(loss)(G0) + np.testing.assert_allclose(np.asarray(g), _fd_mat(loss, G0), atol=1e-4) + + +def test_grad_A_equality(): + # ∇A with an equality constraint. + b = jnp.array([1.0]) + c0 = jnp.array([-1.0, -3.0]) + + def loss(Am): + return jnp.sum(solve_qp(P=P, c=c0, A=Am, b=b, **_TIGHT) ** 2) + + A0 = jnp.array([[1.0, 2.0]]) + g = jax.grad(loss)(A0) + np.testing.assert_allclose(np.asarray(g), _fd_mat(loss, A0), atol=1e-4) + + +# --- Parallel differentiable batch ------------------------------------ + + +def test_solve_qp_batch_matches_single(): + G = jnp.array([[1.0, 1.0]]) + cs = jnp.array([[-1.0, -1.0], [-4.0, -4.0], [0.5, 0.5]]) + hs = jnp.array([[5.0], [1.0], [5.0]]) + xs = solve_qp_batch(P=P, c=cs, G=G, h=hs) + assert xs.shape == (3, 2) + for i in range(3): + xi = solve_qp(P=P, c=cs[i], G=G, h=hs[i]) + np.testing.assert_allclose(np.asarray(xs[i]), np.asarray(xi), atol=1e-5) + + +def test_solve_qp_batch_grad_c_per_row(): + # Per-row gradient w.r.t. c matches summing each instance's grad. + G = jnp.array([[1.0, 1.0]]) + hs = jnp.array([[5.0], [5.0]]) # inactive → interior, dx/dc = -½I + + def loss(cs): + return jnp.sum(solve_qp_batch(P=P, c=cs, G=G, h=hs) ** 2) + + cs0 = jnp.array([[-0.5, -0.7], [0.3, -0.2]]) + g = jax.grad(loss)(cs0) + # Interior: x = -c/2, loss row = ‖c/2‖², dloss/dc = c/2. + np.testing.assert_allclose(np.asarray(g), np.asarray(cs0) / 2.0, atol=1e-5) + + +def test_warm_start_same_solution_and_grad(): + # A warm start must not change the solution or its gradient — only the + # iteration count (which we can't see from JAX). Check x and ∇c match. + G = jnp.array([[1.0, 1.0]]) + h = jnp.array([1.0]) + c0 = jnp.array([-4.0, -4.0]) + + cold = solve_qp(P=P, c=c0, G=G, h=h) + warm = solve_qp(P=P, c=c0, G=G, h=h, warm_start=cold) + np.testing.assert_allclose(np.asarray(cold), np.asarray(warm), atol=1e-7) + + def loss(c, ws=None): + return jnp.sum(solve_qp(P=P, c=c, G=G, h=h, warm_start=ws) ** 2) + + g_cold = jax.grad(lambda c: loss(c))(c0) + # Warm start passed as a plain primal array; gradient must be identical. + g_warm = jax.grad(lambda c: loss(c, ws=np.asarray(cold)))(c0) + np.testing.assert_allclose(np.asarray(g_cold), np.asarray(g_warm), atol=1e-6) + + +def test_solve_qp_batch_warm_same_solution_and_grad(): + # Batch warm start: same xs and same ∇c as cold; only iterations differ. + G = jnp.array([[1.0, 1.0]]) + cs = jnp.array([[-1.0, -1.0], [-4.0, -4.0], [0.5, 0.5]]) + hs = jnp.array([[5.0], [1.0], [5.0]]) + + cold = solve_qp_batch(P=P, c=cs, G=G, h=hs) + warm = solve_qp_batch(P=P, c=cs, G=G, h=hs, warm_start=cold) + np.testing.assert_allclose(np.asarray(cold), np.asarray(warm), atol=1e-6) + + def loss(cs_, ws=None): + return jnp.sum(solve_qp_batch(P=P, c=cs_, G=G, h=hs, warm_start=ws) ** 2) + + g_cold = jax.grad(lambda cs_: loss(cs_))(cs) + g_warm = jax.grad(lambda cs_: loss(cs_, ws=np.asarray(cold)))(cs) + np.testing.assert_allclose(np.asarray(g_cold), np.asarray(g_warm), atol=1e-6) + + +def test_solve_qp_batch_grad_shared_P_sums(): + # Gradient w.r.t. the shared P equals the sum of per-instance ∇P. + cs = jnp.array([[-1.0, -2.0], [-3.0, 0.5]]) + + def loss_batch(Pm): + return jnp.sum(solve_qp_batch(P=Pm, c=cs) ** 2) + + def loss_single(Pm, c): + return jnp.sum(solve_qp(P=Pm, c=c) ** 2) + + P0 = jnp.array([[3.0, 0.5], [0.5, 2.0]]) + g_batch = jax.grad(loss_batch)(P0) + g_sum = sum(jax.grad(lambda Pm, c=c: loss_single(Pm, c))(P0) for c in cs) + np.testing.assert_allclose(np.asarray(g_batch), np.asarray(g_sum), atol=1e-5) + + +def test_infeasible_forward_raises(): + """B3 regression: a non-optimal forward solve must raise, not return a + silent garbage iterate (which would feed meaningless gradients into a + downstream optimizer). Inconsistent equalities x0=1 and x0=2 are + primal-infeasible.""" + P = jnp.array([[2.0]]) + c = jnp.array([0.0]) + A = jnp.array([[1.0], [1.0]]) + b = jnp.array([1.0, 2.0]) + with pytest.raises(RuntimeError, match="status"): + solve_qp(P=P, c=c, A=A, b=b) + + +def test_infeasible_grad_raises(): + """The differentiation path must also surface the failure rather than + differentiate through a non-KKT point.""" + P = jnp.array([[2.0]]) + A = jnp.array([[1.0], [1.0]]) + b = jnp.array([1.0, 2.0]) + + def loss(c): + return jnp.sum(solve_qp(P=P, c=c, A=A, b=b) ** 2) + + with pytest.raises(RuntimeError, match="status"): + jax.grad(loss)(jnp.array([0.0])) diff --git a/python/tests/test_qp_sensitivity.py b/python/tests/test_qp_sensitivity.py new file mode 100644 index 00000000..a43d9494 --- /dev/null +++ b/python/tests/test_qp_sensitivity.py @@ -0,0 +1,176 @@ +"""Post-optimal QP sensitivity (the sIPOPT analog) — pounce.qp.QpSensitivity. + +The parametric step predicts how the optimum moves when an equality +constraint's right-hand side (the "pinned" parameter) changes, reusing one +active-set KKT factorization across queries. Each test cross-checks the +first-order predictor against an exact re-solve of the perturbed QP. +""" + +import numpy as np +import pytest + +import pounce +from pounce.qp import QpSensitivity, ReducedHessian, solve_qp + + +def test_top_level_export(): + assert pounce.QpSensitivity is QpSensitivity + + +def test_equality_rhs_matches_closed_form_and_resolve(): + # min ½‖x‖² s.t. x0 + x1 = b → x* = (b/2, b/2), dx/db = (½, ½). + s = QpSensitivity(P=np.eye(2), c=[0.0, 0.0], A=[[1.0, 1.0]], b=[2.0]) + np.testing.assert_allclose(s.x, [1.0, 1.0], atol=1e-7) + dx = s.parametric_step([0], [1.0]) + np.testing.assert_allclose(dx, [0.5, 0.5], atol=1e-6) + # Predictor lands on the exact re-solve at b = 3. + exact = solve_qp(P=np.eye(2), c=[0.0, 0.0], A=[[1.0, 1.0]], b=[3.0]) + np.testing.assert_allclose(s.x + dx, exact.x, atol=1e-6) + + +def test_step_with_active_inequality(): + # min ½‖x‖² s.t. x0 + x1 = 1, x0 ≥ 1. The bound binds: x* = (1, 0). + # Perturbing b slides along the active face: x = (1, b−1), dx/db = (0, 1). + s = QpSensitivity( + P=np.eye(2), c=[0.0, 0.0], + A=[[1.0, 1.0]], b=[1.0], + G=[[-1.0, 0.0]], h=[-1.0], # −x0 ≤ −1 ⇔ x0 ≥ 1 + ) + np.testing.assert_allclose(s.x, [1.0, 0.0], atol=1e-6) + dx = s.parametric_step([0], [0.5]) + np.testing.assert_allclose(dx, [0.0, 0.5], atol=1e-6) + exact = solve_qp( + P=np.eye(2), c=[0.0, 0.0], A=[[1.0, 1.0]], b=[1.5], + G=[[-1.0, 0.0]], h=[-1.0], + ) + np.testing.assert_allclose(s.x + dx, exact.x, atol=1e-6) + + +def test_step_with_active_variable_bound(): + # min ½‖x‖² s.t. x0 + x1 = 1, x0 ≥ 0.6 via a variable bound. + # x* = (0.6, 0.4); perturbing b moves x1: dx/db = (0, 1). + s = QpSensitivity( + P=np.eye(2), c=[0.0, 0.0], A=[[1.0, 1.0]], b=[1.0], lb=[0.6, -10.0] + ) + np.testing.assert_allclose(s.x, [0.6, 0.4], atol=1e-6) + dx = s.parametric_step([0], [0.2]) + np.testing.assert_allclose(dx, [0.0, 0.2], atol=1e-6) + + +def test_multiple_pins_and_factor_reuse(): + # Two equality constraints, both pinned; and repeated queries reuse the + # factorization (build-once / solve-many). + # min ½‖x‖² s.t. x0 = b0, x1 = b1 → x* = (b0, b1), dx = Δb. + s = QpSensitivity( + P=np.eye(3), c=[0.0, 0.0, 0.0], + A=[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], b=[1.0, 2.0], + ) + np.testing.assert_allclose(s.x[:2], [1.0, 2.0], atol=1e-6) + d1 = s.parametric_step([0, 1], [0.3, -0.5]) + np.testing.assert_allclose(d1, [0.3, -0.5, 0.0], atol=1e-6) + # A second, different query against the same cached factor. + d2 = s.parametric_step([1], [1.0]) + np.testing.assert_allclose(d2, [0.0, 1.0, 0.0], atol=1e-6) + + +def test_unbounded_qp_raises(): + with pytest.raises(ValueError): + QpSensitivity(c=[-1.0], G=[[-1.0]], h=[0.0]) # min −x, x ≥ 0 + + +def test_mismatched_pin_and_delta_lengths_raise(): + s = QpSensitivity(P=np.eye(2), c=[0.0, 0.0], A=[[1.0, 1.0]], b=[2.0]) + with pytest.raises(ValueError): + s.parametric_step([0], [1.0, 2.0]) + + +def test_pin_index_out_of_range_raises(): + s = QpSensitivity(P=np.eye(2), c=[0.0, 0.0], A=[[1.0, 1.0]], b=[2.0]) + with pytest.raises(ValueError): + s.parametric_step([5], [1.0]) # only 1 equality constraint + + +def test_top_level_reduced_hessian_export(): + assert pounce.ReducedHessian is ReducedHessian + + +def test_reduced_hessian_unconstrained_equals_P(): + # No active constraints: the null space is all of ℝⁿ, so H_R = P and its + # eigenvalues are P's diagonal {2, 3}. + s = QpSensitivity(P=np.diag([2.0, 3.0]), c=[0.0, 0.0]) + rh = s.reduced_hessian() + assert isinstance(rh, ReducedHessian) + assert rh.n_dof == 2 + np.testing.assert_allclose(rh.eigenvalues, [2.0, 3.0], atol=1e-9) + assert rh.is_positive_definite + + +def test_reduced_hessian_hand_value(): + # P = [[3,1],[1,2]], x0 + x1 = 0 ⇒ Z = (1,−1)/√2, zᵀPz = 3/2. + s = QpSensitivity(P=[[3.0, 1.0], [1.0, 2.0]], c=[0.0, 0.0], A=[[1.0, 1.0]], b=[0.0]) + rh = s.reduced_hessian() + assert rh.n_dof == 1 + np.testing.assert_allclose(rh.eigenvalues, [1.5], atol=1e-9) + np.testing.assert_allclose(rh.matrix, [[1.5]], atol=1e-9) + + +def test_reduced_hessian_matches_numpy_nullspace(): + # Cross-check the eigenvalues against an independent null-space + # projection computed with numpy (eigenvalues are basis-invariant). + P = np.array([[4.0, 1.0, 0.0], [1.0, 3.0, 1.0], [0.0, 1.0, 2.0]]) + A = np.array([[1.0, 1.0, 1.0]]) + s = QpSensitivity(P=P, c=[0.0, 0.0, 0.0], A=A, b=[1.0]) + rh = s.reduced_hessian() + assert rh.n_dof == 2 + + # Orthonormal null-space basis of A from the SVD (rank(A) = 1). + _, _, vt = np.linalg.svd(A) + Z = vt[1:].T # (3, 2), orthonormal columns spanning null(A) + expected = np.linalg.eigvalsh(Z.T @ P @ Z) # ascending + np.testing.assert_allclose(rh.eigenvalues, expected, atol=1e-7) + + # H_R should reconstruct from its own eigendecomposition. + recon = rh.eigenvectors @ np.diag(rh.eigenvalues) @ rh.eigenvectors.T + np.testing.assert_allclose(recon, rh.matrix, atol=1e-9) + + +def test_reduced_hessian_full_rank_active_set_has_zero_dof(): + # Two independent active constraints in 2 variables pin the point + # completely: zero degrees of freedom, so the reduced Hessian is 0×0. + s = QpSensitivity( + P=np.eye(2), c=[0.0, 0.0], A=[[1.0, 1.0]], b=[1.0], lb=[0.6, -10.0] + ) + rh = s.reduced_hessian() + assert rh.n_dof == 0 + assert rh.matrix.shape == (0, 0) + assert rh.is_positive_definite # vacuously true + + +def test_reduced_hessian_with_active_bound(): + # min ½‖x‖² s.t. x0+x1+x2 = 1, x0 ≥ 0.9. The bound binds (x0 = 0.9), + # leaving 1 DOF in the (x1, x2) plane along (0, 1, −1)/√2: H_R = 1. + s = QpSensitivity( + P=np.eye(3), c=[0.0, 0.0, 0.0], + A=[[1.0, 1.0, 1.0]], b=[1.0], lb=[0.9, -10.0, -10.0], + ) + np.testing.assert_allclose(s.x, [0.9, 0.05, 0.05], atol=1e-6) + rh = s.reduced_hessian() + assert rh.n_dof == 1 + np.testing.assert_allclose(rh.eigenvalues, [1.0], atol=1e-7) + + +def test_finite_difference_agreement(): + # The analytic step agrees with a central finite difference of the + # re-solve, on a non-trivial QP with an active inequality. + P = np.array([[2.0, 0.5], [0.5, 1.0]]) + A = [[1.0, 2.0]] + G = [[1.0, 0.0]] + base = dict(P=P, c=[-1.0, 0.5], A=A, b=[1.0], G=G, h=[0.4]) + s = QpSensitivity(**base) + dx = s.parametric_step([0], [1.0]) # d x / d b0 + + eps = 1e-5 + xp = solve_qp(**{**base, "b": [1.0 + eps]}).x + xm = solve_qp(**{**base, "b": [1.0 - eps]}).x + fd = (xp - xm) / (2 * eps) + np.testing.assert_allclose(dx, fd, atol=1e-5) diff --git a/python/tests/test_socp.py b/python/tests/test_socp.py new file mode 100644 index 00000000..e843c76b --- /dev/null +++ b/python/tests/test_socp.py @@ -0,0 +1,161 @@ +"""SOCP solving from Python (pounce.qp.solve_socp).""" + +import numpy as np + +from pounce.qp import solve_socp + + +def test_min_norm_to_point(): + # min t s.t. (t, x0-2, x1+1) in SOC(3) -> t=0, x=(2,-1). + r = solve_socp(c=[1.0, 0.0, 0.0], G=-np.eye(3), h=[0.0, -2.0, 1.0], cones=[("soc", 3)]) + assert r.status == "optimal" + np.testing.assert_allclose(r.x, [0.0, 2.0, -1.0], atol=1e-6) + + +def test_projection_onto_soc(): + # Euclidean projection of (1,2,0) onto the SOC: closed form (1.5,1.5,0). + r = solve_socp(P=np.eye(3), c=[-1.0, -2.0, 0.0], G=-np.eye(3), h=[0, 0, 0], cones=[3]) + assert r.status == "optimal" + np.testing.assert_allclose(r.x, [1.5, 1.5, 0.0], atol=1e-5) + + +def test_mixed_orthant_and_soc(): + # max x0 + x1 s.t. x0 <= 1 (nonneg), |x1| <= 1 (soc) -> (1, 1). + G = np.array([[1.0, 0.0], [0.0, 0.0], [0.0, -1.0]]) + r = solve_socp(c=[-1.0, -1.0], G=G, h=[1.0, 1.0, 0.0], cones=[("nonneg", 1), ("soc", 2)]) + assert r.status == "optimal" + np.testing.assert_allclose(r.x, [1.0, 1.0], atol=1e-5) + + +def test_int_shorthand_is_soc(): + r = solve_socp(c=[1.0, 0.0, 0.0], G=-np.eye(3), h=[0.0, -2.0, 1.0], cones=[3]) + assert r.status == "optimal" + np.testing.assert_allclose(r.x, [0.0, 2.0, -1.0], atol=1e-6) + + +def test_bad_cone_kind_raises(): + import pytest + + with pytest.raises(Exception): + solve_socp(c=[1.0], G=-np.eye(1), h=[0.0], cones=[("banana", 1)]) + + +def test_exp_cone_geometric_program(): + # Geometric program min x + 1/x = min_u e^u + e^{-u} (optimum 2), + # via two exponential cones: (u,1,t1)∈Kexp, (-u,1,t2)∈Kexp. + G = np.zeros((6, 3)) + G[0, 0] = -1.0 # s0 = u + G[2, 1] = -1.0 # s2 = t1 + G[3, 0] = 1.0 # s3 = -u + G[5, 2] = -1.0 # s5 = t2 + r = solve_socp( + c=[0.0, 1.0, 1.0], + G=G, + h=[0.0, 1.0, 0.0, 0.0, 1.0, 0.0], + cones=[("exp", 3), ("exp", 3)], + ) + assert r.status == "optimal" + assert abs(r.obj - 2.0) < 1e-5 + assert abs(r.x[0]) < 1e-4 # u ~ 0 + + +def test_exp_cone_log_sum_exp_mixed(): + # min t s.t. t >= log(e^0 + e^0) = log 2, via two exp cones plus an + # orthant row (u1 + u2 <= 1) -- exercises a mixed exp + nonneg product. + G = np.zeros((7, 3)) + G[0, 0] = 1.0 # s0 = -t + G[2, 1] = -1.0 # s2 = u1 + G[3, 0] = 1.0 # s3 = -t + G[5, 2] = -1.0 # s5 = u2 + G[6, 1] = 1.0 + G[6, 2] = 1.0 # s6 = 1 - u1 - u2 + r = solve_socp( + c=[1.0, 0.0, 0.0], + G=G, + h=[0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], + cones=[("exp", 3), ("exp", 3), ("nonneg", 1)], + ) + assert r.status == "optimal" + assert abs(r.obj - np.log(2.0)) < 1e-5 + + +def test_exp_cone_dim_must_be_three(): + import pytest + + with pytest.raises(Exception): + solve_socp(c=[1.0, 0.0], G=-np.eye(2), h=[0.0, 0.0], cones=[("exp", 2)]) + + +def test_soc_mixed_with_exp(): + # A SOC and an exp cone in one problem: + # min t + z s.t. (t, 3, 4) in SOC(3) -> t >= 5, + # (1, 1, z) in K_exp -> z >= e. + # Optimum t = 5, z = e. + G = np.zeros((6, 2)) + G[0, 0] = -1.0 # SOC s0 = t + G[5, 1] = -1.0 # exp s5 = z + r = solve_socp( + c=[1.0, 1.0], + G=G, + h=[0.0, 3.0, 4.0, 1.0, 1.0, 0.0], + cones=[("soc", 3), ("exp", 3)], + ) + assert r.status == "optimal" + assert abs(r.x[0] - 5.0) < 1e-5 + assert abs(r.x[1] - np.e) < 1e-5 + + +def test_power_cone_known_optimum(): + # max x s.t. (x, 2, 0.5) in K_alpha -> x = 2^alpha * 0.5^(1-alpha). + import numpy as np + + G = -np.eye(3) + for alpha in (0.5, 0.3, 0.75): + r = solve_socp( + c=[-1.0, 0.0, 0.0], + A=[[0, 1, 0], [0, 0, 1]], + b=[2.0, 0.5], + G=G, + h=[0.0, 0.0, 0.0], + cones=[("pow", alpha)], + ) + assert r.status == "optimal" + want = 2.0**alpha * 0.5 ** (1.0 - alpha) + assert abs(r.x[0] - want) < 1e-5 + + +def test_power_cone_bad_alpha_raises(): + import numpy as np + import pytest + + with pytest.raises(Exception): + solve_socp(c=[-1.0, 0.0, 0.0], G=-np.eye(3), h=[0, 0, 0], cones=[("pow", 1.5)]) + + +def test_psd_min_eigenvalue_diagonal(): + # max λ s.t. M − λI ⪰ 0 ⇒ λ = λ_min(M). M = diag(2, 5) → 2. + # x = (λ); G's column is svec(I) = [1, 0, 1], h = svec(M) = [2, 0, 5]. + r = solve_socp(c=[-1.0], G=[[1.0], [0.0], [1.0]], h=[2.0, 0.0, 5.0], + cones=[("psd", 2)]) + assert r.status == "optimal" + assert abs(r.x[0] - 2.0) < 1e-5 + assert abs(r.obj + 2.0) < 1e-5 + + +def test_psd_min_eigenvalue_offdiagonal(): + # M = [[2,1],[1,2]] → λ_min = 1; svec(M) = [2, √2, 2] exercises the + # off-diagonal of the dense W⊗ₛW scaling block. + r = solve_socp(c=[-1.0], G=[[1.0], [0.0], [1.0]], + h=[2.0, 2.0 ** 0.5, 2.0], cones=[("psd", 2)]) + assert r.status == "optimal" + assert abs(r.x[0] - 1.0) < 1e-5 + assert abs(r.obj + 1.0) < 1e-5 + + +def test_psd_cannot_mix_with_exp(): + import numpy as np + import pytest + + with pytest.raises(ValueError): + solve_socp(c=[1.0, 0.0, 0.0, 0.0], G=-np.eye(4), h=[0.0] * 4, + cones=[("psd", 2), ("exp", 3)]) diff --git a/python/tests/test_socp_jax.py b/python/tests/test_socp_jax.py new file mode 100644 index 00000000..689c6e36 --- /dev/null +++ b/python/tests/test_socp_jax.py @@ -0,0 +1,92 @@ +"""Differentiable SOCP layer (pounce.jax.solve_socp). + +Validates the cone-aware OptNet backward (arrow operators in the +complementarity row) against finite differences, for second-order and +mixed orthant+SOC cones. +""" + +import numpy as np +import pytest + +jax = pytest.importorskip("jax") +jax.config.update("jax_enable_x64", True) +import jax.numpy as jnp # noqa: E402 + +from pounce.jax import solve_socp # noqa: E402 + + +def _fd(fn, x, eps=1e-6): + x = np.asarray(x, float) + g = np.zeros_like(x) + for i in range(len(x)): + xp = x.copy() + xp[i] += eps + xm = x.copy() + xm[i] -= eps + g[i] = (float(fn(jnp.array(xp))) - float(fn(jnp.array(xm)))) / (2 * eps) + return g + + +P3 = jnp.eye(3) +G3 = -jnp.eye(3) # s = -G x = x ∈ SOC +H3 = jnp.zeros(3) + + +def test_grad_c_soc_projection(): + # min ½‖x‖² − cᵀx s.t. x ∈ SOC(3): projection-like, smooth in c. + def loss(c): + return jnp.sum(solve_socp(P=P3, c=c, G=G3, h=H3, cones=[("soc", 3)]) ** 2) + + c0 = jnp.array([-1.0, -2.0, 0.3]) + np.testing.assert_allclose(np.asarray(jax.grad(loss)(c0)), _fd(loss, c0), atol=1e-4) + + +def test_grad_h_soc(): + c0 = jnp.array([-1.0, -2.0, 0.3]) + + def loss(h): + return jnp.sum(solve_socp(P=P3, c=c0, G=G3, h=h, cones=[3]) ** 2) + + h0 = jnp.array([0.5, 0.0, 0.0]) + np.testing.assert_allclose(np.asarray(jax.grad(loss)(h0)), _fd(loss, h0), atol=1e-4) + + +def test_grad_c_and_b_soc_with_equality(): + A = jnp.array([[1.0, 0.0, 0.0]]) + + def loss_c(c): + return jnp.sum( + solve_socp(P=P3, c=c, G=G3, h=H3, A=A, b=jnp.array([0.5]), cones=[3]) ** 2 + ) + + def loss_b(b): + c0 = jnp.array([0.0, -1.0, 0.0]) + return jnp.sum(solve_socp(P=P3, c=c0, G=G3, h=H3, A=A, b=b, cones=[3]) ** 2) + + c0 = jnp.array([0.0, -1.0, 0.0]) + b0 = jnp.array([0.5]) + np.testing.assert_allclose(np.asarray(jax.grad(loss_c)(c0)), _fd(loss_c, c0), atol=1e-4) + np.testing.assert_allclose(np.asarray(jax.grad(loss_b)(b0)), _fd(loss_b, b0), atol=1e-4) + + +def test_grad_mixed_orthant_and_soc(): + # Composite cone: an orthant block and a second-order block. The + # backward must use diag on the orthant rows and the arrow operator on + # the SOC rows. + G = jnp.array([[1.0, 0.0], [0.0, 0.0], [0.0, -1.0]]) + h = jnp.array([1.0, 1.0, 0.0]) + + def loss(c): + return jnp.sum( + solve_socp(P=jnp.eye(2), c=c, G=G, h=h, cones=[("nonneg", 1), ("soc", 2)]) ** 2 + ) + + c0 = jnp.array([-0.5, -0.5]) + np.testing.assert_allclose(np.asarray(jax.grad(loss)(c0)), _fd(loss, c0), atol=1e-4) + + +def test_jacrev_of_soc_solution(): + # x*(c) for the projection is differentiable; jacrev is well-formed. + c0 = jnp.array([-1.0, -2.0, 0.3]) + J = jax.jacrev(lambda c: solve_socp(P=P3, c=c, G=G3, h=H3, cones=[3]))(c0) + assert J.shape == (3, 3) diff --git a/python/tests/test_sos.py b/python/tests/test_sos.py new file mode 100644 index 00000000..24020e21 --- /dev/null +++ b/python/tests/test_sos.py @@ -0,0 +1,103 @@ +"""Polynomial global optimization via SOS (pounce.sos.sos_minimize). + +Polynomials are dicts {exponent_tuple: coefficient}; the solver returns a +certified global lower bound and (when the moment matrix is flat) the global +minimizers extracted from the moment matrix. +""" + +import numpy as np +import pytest + +import pounce +from pounce.sos import SosResult, sos_minimize + + +def test_top_level_export(): + assert pounce.sos_minimize is sos_minimize + assert pounce.SosResult is SosResult + + +def test_univariate_quartic_two_minimizers(): + # x⁴ − 2x² + 3 → min 2 at x = ±1. + r = sos_minimize({(4,): 1.0, (2,): -2.0, (0,): 3.0}) + assert r.success + assert abs(r.lower_bound - 2.0) < 1e-5 + assert r.is_exact and r.num_minimizers == 2 + roots = sorted(float(m[0]) for m in r.minimizers) + assert abs(roots[0] + 1.0) < 1e-3 and abs(roots[1] - 1.0) < 1e-3 + + +def test_facial_reduction_nonunique_minimizers(): + # (x²−1)² + y² → min 0 at (±1, 0). Non-unique optimum: the interior-point + # solver's central moment matrix is rank-inflated, so flat truncation only + # succeeds via the facial-reduction (trace-penalty) re-solve. + p = {(4, 0): 1.0, (2, 0): -2.0, (0, 0): 1.0, (0, 2): 1.0} + r = sos_minimize(p) + assert r.success + assert abs(r.lower_bound) < 1e-5 + assert r.is_exact and r.num_minimizers == 2 + xs = sorted(float(m[0]) for m in r.minimizers) + assert abs(xs[0] + 1.0) < 1e-2 and abs(xs[1] - 1.0) < 1e-2 + assert all(abs(float(m[1])) < 1e-2 for m in r.minimizers) + + +def test_facial_reduction_four_minimizers_order_three(): + # (x²−1)² + (y²−1)² → four global minima (value 0) at (±1, ±1). Needs the + # order-3 relaxation, a larger degenerate SDP that the solver now carries to + # optimality (homogeneous self-dual embedding) so all four atoms come out. + p = { + (4, 0): 1.0, + (2, 0): -2.0, + (0, 4): 1.0, + (0, 2): -2.0, + (0, 0): 2.0, + } + r = sos_minimize(p, order=3) + assert r.success + assert abs(r.lower_bound) < 1e-5 + assert r.is_exact and r.num_minimizers == 4 + quads = {(float(m[0]) > 0, float(m[1]) > 0) for m in r.minimizers} + assert len(quads) == 4, f"expected all four quadrants, got {r.minimizers}" + for m in r.minimizers: + assert abs(abs(float(m[0])) - 1.0) < 2e-2 + assert abs(abs(float(m[1])) - 1.0) < 2e-2 + + +def test_unique_minimizer_2d(): + # (x−1)² + (y−2)² → min 0 at (1, 2). + p = {(2, 0): 1.0, (1, 0): -2.0, (0, 2): 1.0, (0, 1): -4.0, (0, 0): 5.0} + r = sos_minimize(p) + assert r.success and r.is_exact + assert r.num_minimizers == 1 + np.testing.assert_allclose(r.minimizers[0], [1.0, 2.0], atol=1e-3) + assert abs(r.lower_bound) < 1e-5 + + +def test_constrained_box_nonconvex(): + # min −x s.t. 1 − x² ≥ 0 (x ∈ [−1,1]) → −1 at x = 1. + r = sos_minimize({(1,): -1.0}, inequalities=[{(0,): 1.0, (2,): -1.0}]) + assert r.success + assert abs(r.lower_bound + 1.0) < 1e-5 + + +def test_equality_constraint(): + # min x² + y² s.t. x + y − 2 = 0 → 2 at (1,1). + r = sos_minimize( + {(2, 0): 1.0, (0, 2): 1.0}, + equalities=[{(1, 0): 1.0, (0, 1): 1.0, (0, 0): -2.0}], + ) + assert r.success + assert abs(r.lower_bound - 2.0) < 1e-5 + + +def test_explicit_n_vars_and_order(): + # A constant in 2 vars: n_vars can't be inferred from a single (0,0) term + # ambiguously, but order can be raised without changing the bound. + r = sos_minimize({(0, 0): 5.0}, n_vars=2, order=2) + assert r.success + assert abs(r.lower_bound - 5.0) < 1e-6 + + +def test_mismatched_exponent_length_raises(): + with pytest.raises(ValueError): + sos_minimize({(2, 0): 1.0, (1,): -2.0}) # inconsistent tuple lengths diff --git a/scripts/publish-crates.sh b/scripts/publish-crates.sh index ae906459..d5976602 100755 --- a/scripts/publish-crates.sh +++ b/scripts/publish-crates.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # Publish POUNCE crates to crates.io in dependency order. # -# The first publish of all 18 crates will hit the crates.io rate limit +# The first publish of all 21 crates will hit the crates.io rate limit # for *new* crate names (5 burst then 1 per ~10 min). Before the initial # release email help@crates.io and ask for a temporary exemption for # this batch — they typically grant within a day. See @@ -42,10 +42,13 @@ CRATES=( pounce-l1penalty pounce-presolve pounce-qp + pounce-convex pounce-observability pounce-solve-report pounce-studio-core pounce-algorithm + pounce-simplex + pounce-global pounce-restoration pounce-sensitivity pounce-cinterface