From 7f81ed08e2552eff2135cc754cd37d8ae6a574aa Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Wed, 29 Apr 2026 12:00:56 -0600
Subject: [PATCH 01/27] Stub out new nonlinear PCG solver.

---
 src/smith/numerics/equation_solver.cpp | 150 ++++++++++++++++++++++++-
 src/smith/numerics/solver_config.hpp   |  49 ++++++++
 2 files changed, 198 insertions(+), 1 deletion(-)

diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index 0024cbafae..6edefb9328 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -889,6 +889,144 @@ class TrustRegion : public mfem::NewtonSolver {
   }
 };
 
+/**
+ * @brief Skeleton for a nonlinear preconditioned conjugate-gradient block solver.
+ *
+ * The full algorithm is added in a follow-on chunk. This class establishes the Smith/MFEM integration points used by
+ * that implementation: residual evaluation, Jacobian assembly, Hessian-vector products, preconditioning, counters, and
+ * standard nonlinear convergence bookkeeping.
+ */
+class PcgBlockSolver : public mfem::NewtonSolver {
+ protected:
+  /// Trial solution vector
+  mutable mfem::Vector x_trial;
+  /// Trial residual vector
+  mutable mfem::Vector r_trial;
+  /// Scratch vector
+  mutable mfem::Vector scratch;
+
+  /// Nonlinear solution options
+  NonlinearSolverOptions nonlinear_options;
+
+  /// Preconditioner used by the PCG-block recurrence
+  Solver& pcg_precond;
+
+  /// Reconstructed Smith print level
+  mutable size_t print_level = 0;
+
+ public:
+  /// Internal counter for hess-vecs
+  mutable size_t num_hess_vecs = 0;
+  /// Internal counter for preconditions
+  mutable size_t num_preconds = 0;
+  /// Internal counter for residuals
+  mutable size_t num_residuals = 0;
+  /// Internal counter for matrix assembles
+  mutable size_t num_jacobian_assembles = 0;
+
+#ifdef MFEM_USE_MPI
+  /// Constructor
+  PcgBlockSolver(MPI_Comm comm_, const NonlinearSolverOptions& nonlinear_opts, Solver& preconditioner)
+      : mfem::NewtonSolver(comm_), nonlinear_options(nonlinear_opts), pcg_precond(preconditioner)
+  {
+  }
+#endif
+
+  /// Assemble the Jacobian at x.
+  void assembleJacobian(const mfem::Vector& x) const
+  {
+    SMITH_MARK_FUNCTION;
+    ++num_jacobian_assembles;
+    grad = &oper->GetGradient(x);
+    if (nonlinear_options.force_monolithic) {
+      auto* grad_blocked = dynamic_cast<mfem::BlockOperator*>(grad);
+      if (grad_blocked) grad = buildMonolithicMatrix(*grad_blocked).release();
+    }
+  }
+
+  /// Evaluate the nonlinear residual.
+  mfem::real_t computeResidual(const mfem::Vector& x, mfem::Vector& residual) const
+  {
+    SMITH_MARK_FUNCTION;
+    ++num_residuals;
+    oper->Mult(x, residual);
+    return Norm(residual);
+  }
+
+  /// Apply the assembled Jacobian to a vector.
+  void hessVec(const mfem::Vector& x, mfem::Vector& v) const
+  {
+    SMITH_MARK_FUNCTION;
+    ++num_hess_vecs;
+    grad->Mult(x, v);
+  }
+
+  /// Apply the configured nonlinear PCG preconditioner.
+  void precond(const mfem::Vector& x, mfem::Vector& v) const
+  {
+    SMITH_MARK_FUNCTION;
+    ++num_preconds;
+    pcg_precond.Mult(x, v);
+  }
+
+  /// @overload
+  void Mult(const mfem::Vector&, mfem::Vector& X) const
+  {
+    MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator).");
+    MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver).");
+
+    print_level = print_options.iterations ? 1 : print_level;
+    print_level = print_options.summary ? 2 : print_level;
+
+    num_hess_vecs = 0;
+    num_preconds = 0;
+    num_residuals = 0;
+    num_jacobian_assembles = 0;
+
+    mfem::real_t norm = 0.0;
+    norm = initial_norm = computeResidual(X, r);
+    if (norm == 0.0) {
+      converged = true;
+      final_iter = 0;
+      final_norm = norm;
+      return;
+    }
+
+    const mfem::real_t norm_goal = std::max(rel_tol * initial_norm, abs_tol);
+
+    if (print_level == 1) {
+      mfem::out << "PcgBlock iteration " << std::setw(3) << 0 << " : ||r|| = " << std::setw(13) << norm << "\n";
+    }
+
+    pcg_precond.iterative_mode = false;
+
+    x_trial.SetSize(X.Size());
+    x_trial = 0.0;
+    r_trial.SetSize(X.Size());
+    r_trial = 0.0;
+    scratch.SetSize(X.Size());
+    scratch = 0.0;
+
+    if (print_level >= 2) {
+      mfem::out << "PcgBlock iteration " << std::setw(3) << 0 << " : ||r|| = " << std::setw(13) << norm
+                << ", norm goal = " << std::setw(13) << norm_goal << '\n';
+    }
+
+    if (norm <= norm_goal && nonlinear_options.min_iterations == 0) {
+      converged = true;
+    } else {
+      converged = false;
+    }
+
+    final_iter = 0;
+    final_norm = norm;
+
+    if (!converged && print_level >= 1) {
+      mfem::out << "PcgBlock: No convergence! Algorithm implementation pending.\n";
+    }
+  }
+};
+
 EquationSolver::EquationSolver(NonlinearSolverOptions nonlinear_opts, LinearSolverOptions lin_opts, MPI_Comm comm)
 {
   auto [lin_solver, preconditioner] = buildLinearSolverAndPreconditioner(lin_opts, comm);
@@ -1041,6 +1179,8 @@ std::unique_ptr<mfem::NewtonSolver> buildNonlinearSolver(NonlinearSolverOptions
     nonlinear_solver = std::make_unique<NewtonSolver>(comm, nonlinear_opts);
   } else if (nonlinear_opts.nonlin_solver == NonlinearSolver::TrustRegion) {
     nonlinear_solver = std::make_unique<TrustRegion>(comm, nonlinear_opts, linear_opts, prec);
+  } else if (nonlinear_opts.nonlin_solver == NonlinearSolver::PcgBlock) {
+    nonlinear_solver = std::make_unique<PcgBlockSolver>(comm, nonlinear_opts, prec);
 #ifdef SMITH_USE_PETSC
   } else if (nonlinear_opts.nonlin_solver == NonlinearSolver::PetscNewton) {
     nonlinear_solver = std::make_unique<mfem_ext::PetscNewtonSolver>(comm, nonlinear_opts);
@@ -1298,7 +1438,9 @@ void EquationSolver::defineInputFileSchema(axom::inlet::Container& container)
   nonlinear_container.addDouble("abs_tol", "Absolute tolerance for the Newton solve.").defaultValue(1.0e-4);
   nonlinear_container.addInt("max_iter", "Maximum iterations for the Newton solve.").defaultValue(500);
   nonlinear_container.addInt("print_level", "Nonlinear print level.").defaultValue(0);
-  nonlinear_container.addString("solver_type", "Solver type (Newton|KINFullStep|KINLineSearch)").defaultValue("Newton");
+  nonlinear_container
+      .addString("solver_type", "Solver type (Newton|NewtonLineSearch|TrustRegion|PcgBlock|KINFullStep|KINLineSearch)")
+      .defaultValue("Newton");
 }
 
 }  // namespace smith
@@ -1373,6 +1515,12 @@ smith::NonlinearSolverOptions FromInlet<smith::NonlinearSolverOptions>::operator
   const std::string solver_type = base["solver_type"];
   if (solver_type == "Newton") {
     options.nonlin_solver = smith::NonlinearSolver::Newton;
+  } else if (solver_type == "NewtonLineSearch") {
+    options.nonlin_solver = smith::NonlinearSolver::NewtonLineSearch;
+  } else if (solver_type == "TrustRegion") {
+    options.nonlin_solver = smith::NonlinearSolver::TrustRegion;
+  } else if (solver_type == "PcgBlock") {
+    options.nonlin_solver = smith::NonlinearSolver::PcgBlock;
   } else if (solver_type == "KINFullStep") {
     options.nonlin_solver = smith::NonlinearSolver::KINFullStep;
   } else if (solver_type == "KINLineSearch") {
diff --git a/src/smith/numerics/solver_config.hpp b/src/smith/numerics/solver_config.hpp
index aebf795305..e7c26bda35 100644
--- a/src/smith/numerics/solver_config.hpp
+++ b/src/smith/numerics/solver_config.hpp
@@ -152,6 +152,7 @@ enum class NonlinearSolver
   LBFGS,                     /**< MFEM-native Limited memory BFGS */
   NewtonLineSearch,          /**< Custom solver using preconditioned earch direction with backtracking line search */
   TrustRegion,               /**< Custom solver using a trust region solver */
+  PcgBlock,                  /**< Custom nonlinear preconditioned conjugate-gradient block solver */
   KINFullStep,               /**< KINSOL Full Newton (Sundials must be enabled) */
   KINBacktrackingLineSearch, /**< KINSOL Newton with Backtracking Line Search (Sundials must be enabled) */
   KINPicard,                 /**< KINSOL Picard (Sundials must be enabled) */
@@ -174,6 +175,8 @@ inline std::string nonlinearName(const NonlinearSolver& s)
       return "NewtonLineSearch";
     case NonlinearSolver::TrustRegion:
       return "TrustRegion";
+    case NonlinearSolver::PcgBlock:
+      return "PcgBlock";
     case NonlinearSolver::KINFullStep:
       return "KINFullStep";
     case NonlinearSolver::KINBacktrackingLineSearch:
@@ -202,6 +205,7 @@ inline std::map<std::string, NonlinearSolver> nonlinearSolverMap = {
     {"LBFGS", NonlinearSolver::LBFGS},
     {"NewtonLineSearch", NonlinearSolver::NewtonLineSearch},
     {"TrustRegion", NonlinearSolver::TrustRegion},
+    {"PcgBlock", NonlinearSolver::PcgBlock},
     {"KINFullStep", NonlinearSolver::KINFullStep},
     {"KINBacktrackingLineSearch", NonlinearSolver::KINBacktrackingLineSearch},
     {"KINPicard", NonlinearSolver::KINPicard},
@@ -469,6 +473,51 @@ struct NonlinearSolverOptions {
 
   /// Should the gradient be converted to a monolithic matrix
   bool force_monolithic = false;
+
+  /// Number of speculative nonlinear PCG steps per accepted/rejected block
+  int pcg_block_len = 10;
+
+  /// Powell restart threshold for nonlinear PCG residual orthogonality
+  double pcg_powell_eta = 0.2;
+
+  /// Trust-ratio threshold below which the PCG-block trust scale shrinks
+  double pcg_trust_eta_bad = 0.1;
+
+  /// Trust-ratio threshold above which the PCG-block trust scale grows
+  double pcg_trust_eta_good = 0.75;
+
+  /// PCG-block trust-scale shrink factor
+  double pcg_shrink = 0.5;
+
+  /// PCG-block trust-scale growth factor
+  double pcg_growth = 1.25;
+
+  /// Initial PCG-block trust scale
+  double pcg_h_scale_init = 1.0;
+
+  /// Minimum PCG-block trust scale before declaring failure
+  double pcg_min_h_scale = 1e-8;
+
+  /// Maximum number of rejected PCG blocks before declaring failure
+  int pcg_max_block_retries = 20;
+
+  /// Nonmonotone cumulative gradient-work acceptance window
+  int pcg_window = 5;
+
+  /// Armijo coefficient for PCG-block inner step backtracking
+  double pcg_ls_armijo_c = 1e-4;
+
+  /// Maximum number of PCG-block inner step backtracks
+  int pcg_ls_max_backtracks = 8;
+
+  /// PCG-block inner step backtracking shrink factor
+  double pcg_ls_shrink = 0.5;
+
+  /// Descent and model denominator tolerance for PCG-block guards
+  double pcg_eps_descent = 1e-12;
+
+  /// Running-mean window for successful PCG-block trust-radius reference steps
+  int pcg_delta_avg_window = 5;
 };
 // _nonlinear_options_end
 

From 70c55fd75e3ea79d23ae6d7b881d512b7a953afd Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Wed, 29 Apr 2026 12:19:37 -0600
Subject: [PATCH 02/27] Initial implementation of nonlinear pcg.

---
 src/smith/numerics/equation_solver.cpp | 292 ++++++++++++++++++++++++-
 1 file changed, 280 insertions(+), 12 deletions(-)

diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index 6edefb9328..03eff72240 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -983,8 +983,13 @@ class PcgBlockSolver : public mfem::NewtonSolver {
     num_residuals = 0;
     num_jacobian_assembles = 0;
 
-    mfem::real_t norm = 0.0;
-    norm = initial_norm = computeResidual(X, r);
+    SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_block_len <= 0, "PcgBlock requires pcg_block_len > 0");
+    SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_window <= 0, "PcgBlock requires pcg_window > 0");
+    SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_ls_max_backtracks < 0, "PcgBlock requires pcg_ls_max_backtracks >= 0");
+    SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_delta_avg_window <= 0, "PcgBlock requires pcg_delta_avg_window > 0");
+
+    mfem::real_t norm = computeResidual(X, r);
+    initial_norm = norm;
     if (norm == 0.0) {
       converged = true;
       final_iter = 0;
@@ -1007,22 +1012,285 @@ class PcgBlockSolver : public mfem::NewtonSolver {
     scratch.SetSize(X.Size());
     scratch = 0.0;
 
-    if (print_level >= 2) {
-      mfem::out << "PcgBlock iteration " << std::setw(3) << 0 << " : ||r|| = " << std::setw(13) << norm
-                << ", norm goal = " << std::setw(13) << norm_goal << '\n';
-    }
+    mfem::Vector r_block(X.Size());
+    mfem::Vector r_candidate(X.Size());
+    mfem::Vector force(X.Size());
+    mfem::Vector z(X.Size());
+    mfem::Vector z_old(X.Size());
+    mfem::Vector p(X.Size());
+    mfem::Vector p_old(X.Size());
+    mfem::Vector Hp(X.Size());
+    mfem::Vector step(X.Size());
+    mfem::Vector x_candidate(X.Size());
+
+    bool have_momentum = false;
+    double rho_old = 0.0;
+    double h_scale = nonlinear_options.pcg_h_scale_init;
+    int retries_remaining = nonlinear_options.pcg_max_block_retries;
+    int it = 0;
+    double cumulative_work = 0.0;
+    std::vector<double> work_history{cumulative_work};
+    std::vector<double> accepted_step_norms;
+
+    auto reset_momentum = [&]() {
+      have_momentum = false;
+      rho_old = 0.0;
+      p_old = 0.0;
+      z_old = 0.0;
+    };
+
+    auto window_max = [&](const std::vector<double>& history) {
+      const int window = nonlinear_options.pcg_window;
+      const auto begin = history.size() > static_cast<size_t>(window) ? history.end() - window : history.begin();
+      return *std::max_element(begin, history.end());
+    };
+
+    auto current_delta_ref = [&]() {
+      if (accepted_step_norms.empty()) {
+        return 0.0;
+      }
+      const int window = nonlinear_options.pcg_delta_avg_window;
+      const auto begin = accepted_step_norms.size() > static_cast<size_t>(window) ? accepted_step_norms.end() - window
+                                                                                  : accepted_step_norms.begin();
+      double sum = 0.0;
+      for (auto iter = begin; iter != accepted_step_norms.end(); ++iter) {
+        sum += *iter;
+      }
+      return sum / static_cast<double>(accepted_step_norms.end() - begin);
+    };
 
-    if (norm <= norm_goal && nonlinear_options.min_iterations == 0) {
-      converged = true;
-    } else {
-      converged = false;
+    for (; true;) {
+      MFEM_ASSERT(mfem::IsFinite(norm), "norm = " << norm);
+      if (print_level >= 2) {
+        mfem::out << "PcgBlock iteration " << std::setw(3) << it << " : ||r|| = " << std::setw(13) << norm;
+        if (it > 0) {
+          mfem::out << ", ||r||/||r_0|| = " << std::setw(13) << (initial_norm != 0.0 ? norm / initial_norm : norm);
+        } else {
+          mfem::out << ", norm goal = " << std::setw(13) << norm_goal;
+        }
+        mfem::out << '\n';
+      }
+
+      if (print_level >= 1 && (norm != norm)) {
+        mfem::out << "Initial residual for PCG-block iteration is undefined/nan." << std::endl;
+        mfem::out << "PcgBlock: No convergence!\n";
+        converged = false;
+        break;
+      }
+
+      if (norm <= norm_goal && it >= nonlinear_options.min_iterations) {
+        converged = true;
+        break;
+      } else if (it >= max_iter) {
+        converged = false;
+        break;
+      } else if (retries_remaining <= 0 || h_scale < nonlinear_options.pcg_min_h_scale) {
+        converged = false;
+        break;
+      }
+
+      assembleJacobian(X);
+      pcg_precond.SetOperator(*grad);
+
+      r_block = r;
+      const double norm_block = norm;
+      bool block_finished = false;
+
+      while (!block_finished) {
+        x_trial = X;
+        r = r_block;
+        norm = norm_block;
+
+        double block_predicted = 0.0;
+        double block_actual = 0.0;
+        double trial_cumulative_work = cumulative_work;
+        int trial_steps = 0;
+        bool trial_failed = false;
+        std::vector<double> trial_step_norms;
+        auto trial_work_history = work_history;
+
+        for (int block_it = 0; block_it < nonlinear_options.pcg_block_len && it + trial_steps < max_iter; ++block_it) {
+          force = r;
+          force *= -1.0;
+          precond(force, z);
+
+          const double rho = Dot(force, z);
+          if (!mfem::IsFinite(rho) || rho <= nonlinear_options.pcg_eps_descent) {
+            trial_failed = true;
+            break;
+          }
+
+          double beta = 0.0;
+          if (have_momentum) {
+            const double force_dot_z_old = Dot(force, z_old);
+            beta = std::max(0.0, (rho - force_dot_z_old) / rho_old);
+            if (std::abs(force_dot_z_old) > nonlinear_options.pcg_powell_eta * rho) {
+              beta = 0.0;
+            }
+          }
+
+          p = z;
+          if (have_momentum && beta != 0.0) {
+            p.Add(beta, p_old);
+          }
+
+          double force_dot_p = Dot(force, p);
+          if (force_dot_p <= nonlinear_options.pcg_eps_descent * rho) {
+            beta = 0.0;
+            p = z;
+            force_dot_p = rho;
+          }
+
+          hessVec(p, Hp);
+          const double pHp = Dot(p, Hp);
+
+          double alpha = 0.0;
+          double alpha_quad = std::numeric_limits<double>::quiet_NaN();
+          const bool positive_curvature = pHp > nonlinear_options.pcg_eps_descent && mfem::IsFinite(pHp);
+          if (positive_curvature) {
+            alpha_quad = force_dot_p / pHp;
+            alpha = alpha_quad;
+          }
+
+          const double p_norm = Norm(p);
+          double delta_ref = current_delta_ref();
+          if (delta_ref <= 0.0 && alpha > 0.0 && mfem::IsFinite(alpha) && p_norm > 0.0) {
+            delta_ref = alpha * p_norm;
+          } else if (delta_ref <= 0.0) {
+            delta_ref = 1.0;
+          }
+
+          const bool apply_trust_cap = !positive_curvature || h_scale < nonlinear_options.pcg_h_scale_init;
+          if (apply_trust_cap && p_norm > 0.0) {
+            const double alpha_cap = h_scale * delta_ref / p_norm;
+            if (alpha > 0.0 && mfem::IsFinite(alpha)) {
+              alpha = std::min(alpha, alpha_cap);
+            } else {
+              alpha = alpha_cap;
+            }
+          }
+
+          if (!(alpha > 0.0) || !mfem::IsFinite(alpha)) {
+            trial_failed = true;
+            break;
+          }
+
+          bool accepted_step = false;
+          double accepted_work = 0.0;
+          double accepted_predicted = 0.0;
+          double accepted_step_norm = 0.0;
+
+          for (int ls = 0; ls <= nonlinear_options.pcg_ls_max_backtracks; ++ls) {
+            step = p;
+            step *= alpha;
+            add(x_trial, step, x_candidate);
+
+            const double norm_candidate = computeResidual(x_candidate, r_candidate);
+            const double work = -0.5 * Dot(r, step) - 0.5 * Dot(r_candidate, step);
+            const double cumulative_candidate = trial_cumulative_work + work;
+            const double work_ref = window_max(trial_work_history);
+            const bool finite_candidate = mfem::IsFinite(norm_candidate) && mfem::IsFinite(work);
+            const bool sufficient_work =
+                cumulative_candidate >= work_ref - nonlinear_options.pcg_ls_armijo_c * alpha * force_dot_p;
+
+            if (finite_candidate && (sufficient_work || norm_candidate <= norm_goal)) {
+              const double predicted = alpha * force_dot_p - 0.5 * alpha * alpha * pHp;
+              accepted_predicted = std::max(predicted, 0.0);
+              accepted_work = work;
+              accepted_step_norm = Norm(step);
+              norm = norm_candidate;
+              accepted_step = true;
+              break;
+            }
+
+            alpha *= nonlinear_options.pcg_ls_shrink;
+          }
+
+          if (!accepted_step) {
+            trial_failed = true;
+            break;
+          }
+
+          x_trial = x_candidate;
+          r = r_candidate;
+          trial_cumulative_work += accepted_work;
+          trial_work_history.push_back(trial_cumulative_work);
+          trial_step_norms.push_back(accepted_step_norm);
+          block_predicted += accepted_predicted;
+          block_actual += accepted_work;
+
+          p_old = p;
+          z_old = z;
+          rho_old = rho;
+          have_momentum = true;
+          ++trial_steps;
+
+          if (norm <= norm_goal && it + trial_steps >= nonlinear_options.min_iterations) {
+            break;
+          }
+        }
+
+        double trust_ratio = 1.0;
+        if (block_predicted > nonlinear_options.pcg_eps_descent) {
+          trust_ratio = block_actual / block_predicted;
+        } else if (block_actual < 0.0) {
+          trust_ratio = -std::numeric_limits<double>::infinity();
+        }
+
+        const bool block_converged = norm <= norm_goal && it + trial_steps >= nonlinear_options.min_iterations;
+        const bool accept_block =
+            trial_steps > 0 && !trial_failed &&
+            (block_converged || (block_actual >= 0.0 && trust_ratio >= nonlinear_options.pcg_trust_eta_bad));
+
+        if (accept_block) {
+          X = x_trial;
+          cumulative_work = trial_cumulative_work;
+          work_history = std::move(trial_work_history);
+          accepted_step_norms.insert(accepted_step_norms.end(), trial_step_norms.begin(), trial_step_norms.end());
+          it += trial_steps;
+
+          if (trust_ratio < nonlinear_options.pcg_trust_eta_bad) {
+            h_scale = std::max(h_scale * nonlinear_options.pcg_shrink, nonlinear_options.pcg_min_h_scale);
+            reset_momentum();
+          } else if (trust_ratio >= nonlinear_options.pcg_trust_eta_good) {
+            h_scale = std::min(h_scale * nonlinear_options.pcg_growth, nonlinear_options.pcg_h_scale_init);
+          }
+
+          if (print_level >= 2) {
+            mfem::out << "PcgBlock block accepted: steps = " << trial_steps << ", rho = " << std::setw(13)
+                      << trust_ratio << ", h_scale = " << std::setw(13) << h_scale << '\n';
+          }
+
+          block_finished = true;
+        } else {
+          r = r_block;
+          norm = norm_block;
+          h_scale *= nonlinear_options.pcg_shrink;
+          reset_momentum();
+          --retries_remaining;
+
+          if (print_level >= 2) {
+            mfem::out << "PcgBlock block rejected: steps = " << trial_steps << ", rho = " << std::setw(13)
+                      << trust_ratio << ", h_scale = " << std::setw(13) << h_scale
+                      << ", retries left = " << retries_remaining << '\n';
+          }
+
+          if (retries_remaining <= 0 || h_scale < nonlinear_options.pcg_min_h_scale) {
+            block_finished = true;
+          }
+        }
+      }
     }
 
-    final_iter = 0;
+    final_iter = it;
     final_norm = norm;
 
+    if (print_level == 1) {
+      mfem::out << "PcgBlock iteration " << std::setw(3) << final_iter << " : ||r|| = " << std::setw(13) << norm
+                << '\n';
+    }
     if (!converged && print_level >= 1) {
-      mfem::out << "PcgBlock: No convergence! Algorithm implementation pending.\n";
+      mfem::out << "PcgBlock: No convergence!\n";
     }
   }
 };

From 079054cb82eec6c8a53bdcfe2325dda105751202 Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Wed, 29 Apr 2026 16:30:56 -0600
Subject: [PATCH 03/27] Start implement a new potential solver.

---
 src/smith/numerics/equation_solver.cpp        | 158 ++++++++++++++++--
 src/smith/numerics/equation_solver.hpp        |  33 ++++
 src/smith/physics/solid_mechanics.hpp         |   6 +
 src/smith/physics/tests/CMakeLists.txt        |   1 +
 .../physics/tests/shallow_arch_buckling.cpp   | 128 ++++++++++++++
 src/smith/physics/tests/solid.cpp             |  60 +++++++
 .../physics/tests/solid_statics_patch.cpp     |  79 +++++++++
 7 files changed, 448 insertions(+), 17 deletions(-)
 create mode 100644 src/smith/physics/tests/shallow_arch_buckling.cpp

diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index 03eff72240..4840b0a1ef 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -92,8 +92,9 @@ class NewtonSolver : public mfem::NewtonSolver {
     MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator).");
     MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver).");
 
-    print_level = print_options.iterations ? 1 : print_level;
-    print_level = print_options.summary ? 2 : print_level;
+    print_level = static_cast<size_t>(std::max(nonlinear_options.print_level, 0));
+    print_level = print_options.iterations ? std::max<size_t>(1, print_level) : print_level;
+    print_level = print_options.summary ? std::max<size_t>(2, print_level) : print_level;
 
     using real_t = mfem::real_t;
 
@@ -636,8 +637,9 @@ class TrustRegion : public mfem::NewtonSolver {
     MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator).");
     MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver).");
 
-    print_level = print_options.iterations ? 1 : print_level;
-    print_level = print_options.summary ? 2 : print_level;
+    print_level = static_cast<size_t>(std::max(nonlinear_options.print_level, 0));
+    print_level = print_options.iterations ? std::max<size_t>(1, print_level) : print_level;
+    print_level = print_options.summary ? std::max<size_t>(2, print_level) : print_level;
 
     using real_t = mfem::real_t;
 
@@ -923,6 +925,28 @@ class PcgBlockSolver : public mfem::NewtonSolver {
   mutable size_t num_residuals = 0;
   /// Internal counter for matrix assembles
   mutable size_t num_jacobian_assembles = 0;
+  /// Internal counter for accepted blocks
+  mutable size_t num_blocks = 0;
+  /// Internal counter for rejected blocks
+  mutable size_t num_block_rejects = 0;
+  /// Internal counter for Powell restarts
+  mutable size_t num_powell_restarts = 0;
+  /// Internal counter for descent-guard restarts
+  mutable size_t num_descent_restarts = 0;
+  /// Internal counter for non-positive curvature directions
+  mutable size_t num_negative_curvature = 0;
+  /// Internal counter for line-search backtracks
+  mutable size_t num_line_search_backtracks = 0;
+  /// Internal counter for positive-curvature steps capped by the trust radius
+  mutable size_t num_trust_capped_steps = 0;
+  /// Internal counter for accepted inner PCG steps
+  mutable size_t num_accepted_steps = 0;
+  /// Internal counter for trial inner PCG steps
+  mutable size_t num_trial_steps = 0;
+  /// Last trust scale used by the solver
+  mutable double final_h_scale = 1.0;
+  /// Last accepted block trust ratio
+  mutable double last_trust_ratio = 0.0;
 
 #ifdef MFEM_USE_MPI
   /// Constructor
@@ -969,19 +993,47 @@ class PcgBlockSolver : public mfem::NewtonSolver {
     pcg_precond.Mult(x, v);
   }
 
+  /// Return solver diagnostic counters.
+  PcgBlockDiagnostics diagnostics() const
+  {
+    return {.num_blocks = num_blocks,
+            .num_block_rejects = num_block_rejects,
+            .num_powell_restarts = num_powell_restarts,
+            .num_descent_restarts = num_descent_restarts,
+            .num_negative_curvature = num_negative_curvature,
+            .num_line_search_backtracks = num_line_search_backtracks,
+            .num_trust_capped_steps = num_trust_capped_steps,
+            .num_accepted_steps = num_accepted_steps,
+            .num_trial_steps = num_trial_steps,
+            .final_h_scale = final_h_scale,
+            .last_trust_ratio = last_trust_ratio};
+  }
+
   /// @overload
   void Mult(const mfem::Vector&, mfem::Vector& X) const
   {
     MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator).");
     MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver).");
 
-    print_level = print_options.iterations ? 1 : print_level;
-    print_level = print_options.summary ? 2 : print_level;
+    print_level = static_cast<size_t>(std::max(nonlinear_options.print_level, 0));
+    print_level = print_options.iterations ? std::max<size_t>(1, print_level) : print_level;
+    print_level = print_options.summary ? std::max<size_t>(2, print_level) : print_level;
 
     num_hess_vecs = 0;
     num_preconds = 0;
     num_residuals = 0;
     num_jacobian_assembles = 0;
+    num_blocks = 0;
+    num_block_rejects = 0;
+    num_powell_restarts = 0;
+    num_descent_restarts = 0;
+    num_negative_curvature = 0;
+    num_line_search_backtracks = 0;
+    num_trust_capped_steps = 0;
+    num_accepted_steps = 0;
+    num_trial_steps = 0;
+    final_h_scale = nonlinear_options.pcg_h_scale_init;
+    last_trust_ratio = 0.0;
 
     SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_block_len <= 0, "PcgBlock requires pcg_block_len > 0");
     SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_window <= 0, "PcgBlock requires pcg_window > 0");
@@ -1032,6 +1084,15 @@ class PcgBlockSolver : public mfem::NewtonSolver {
     std::vector<double> work_history{cumulative_work};
     std::vector<double> accepted_step_norms;
 
+    auto append_bounded = [](std::vector<double>& history, double value, int max_size) {
+      history.push_back(value);
+      const auto bound = static_cast<size_t>(max_size);
+      if (history.size() > bound) {
+        const auto num_to_remove = static_cast<std::vector<double>::difference_type>(history.size() - bound);
+        history.erase(history.begin(), history.begin() + num_to_remove);
+      }
+    };
+
     auto reset_momentum = [&]() {
       have_momentum = false;
       rho_old = 0.0;
@@ -1103,9 +1164,12 @@ class PcgBlockSolver : public mfem::NewtonSolver {
 
         double block_predicted = 0.0;
         double block_actual = 0.0;
+        double block_delta_ref = current_delta_ref();
+        double block_trust_size = h_scale * (block_delta_ref > 0.0 ? block_delta_ref : 1.0);
         double trial_cumulative_work = cumulative_work;
         int trial_steps = 0;
         bool trial_failed = false;
+        bool trial_ended_after_inner_failure = false;
         std::vector<double> trial_step_norms;
         auto trial_work_history = work_history;
 
@@ -1113,10 +1177,12 @@ class PcgBlockSolver : public mfem::NewtonSolver {
           force = r;
           force *= -1.0;
           precond(force, z);
+          ++num_trial_steps;
 
           const double rho = Dot(force, z);
-          if (!mfem::IsFinite(rho) || rho <= nonlinear_options.pcg_eps_descent) {
-            trial_failed = true;
+          if (!mfem::IsFinite(rho) || rho <= 0.0) {
+            trial_ended_after_inner_failure = trial_steps > 0;
+            trial_failed = trial_steps == 0;
             break;
           }
 
@@ -1126,6 +1192,7 @@ class PcgBlockSolver : public mfem::NewtonSolver {
             beta = std::max(0.0, (rho - force_dot_z_old) / rho_old);
             if (std::abs(force_dot_z_old) > nonlinear_options.pcg_powell_eta * rho) {
               beta = 0.0;
+              ++num_powell_restarts;
             }
           }
 
@@ -1139,6 +1206,7 @@ class PcgBlockSolver : public mfem::NewtonSolver {
             beta = 0.0;
             p = z;
             force_dot_p = rho;
+            ++num_descent_restarts;
           }
 
           hessVec(p, Hp);
@@ -1146,10 +1214,12 @@ class PcgBlockSolver : public mfem::NewtonSolver {
 
           double alpha = 0.0;
           double alpha_quad = std::numeric_limits<double>::quiet_NaN();
-          const bool positive_curvature = pHp > nonlinear_options.pcg_eps_descent && mfem::IsFinite(pHp);
+          const bool positive_curvature = pHp > 0.0 && mfem::IsFinite(pHp);
           if (positive_curvature) {
             alpha_quad = force_dot_p / pHp;
             alpha = alpha_quad;
+          } else {
+            ++num_negative_curvature;
           }
 
           const double p_norm = Norm(p);
@@ -1159,19 +1229,28 @@ class PcgBlockSolver : public mfem::NewtonSolver {
           } else if (delta_ref <= 0.0) {
             delta_ref = 1.0;
           }
+          block_delta_ref = delta_ref;
+          block_trust_size = h_scale * delta_ref;
 
           const bool apply_trust_cap = !positive_curvature || h_scale < nonlinear_options.pcg_h_scale_init;
+          bool trust_capped = false;
           if (apply_trust_cap && p_norm > 0.0) {
             const double alpha_cap = h_scale * delta_ref / p_norm;
             if (alpha > 0.0 && mfem::IsFinite(alpha)) {
+              if (alpha_cap < alpha) {
+                ++num_trust_capped_steps;
+                trust_capped = true;
+              }
               alpha = std::min(alpha, alpha_cap);
             } else {
               alpha = alpha_cap;
+              trust_capped = true;
             }
           }
 
           if (!(alpha > 0.0) || !mfem::IsFinite(alpha)) {
-            trial_failed = true;
+            trial_ended_after_inner_failure = trial_steps > 0;
+            trial_failed = trial_steps == 0;
             break;
           }
 
@@ -1179,6 +1258,7 @@ class PcgBlockSolver : public mfem::NewtonSolver {
           double accepted_work = 0.0;
           double accepted_predicted = 0.0;
           double accepted_step_norm = 0.0;
+          int accepted_ls_count = 0;
 
           for (int ls = 0; ls <= nonlinear_options.pcg_ls_max_backtracks; ++ls) {
             step = p;
@@ -1198,6 +1278,7 @@ class PcgBlockSolver : public mfem::NewtonSolver {
               accepted_predicted = std::max(predicted, 0.0);
               accepted_work = work;
               accepted_step_norm = Norm(step);
+              accepted_ls_count = ls;
               norm = norm_candidate;
               accepted_step = true;
               break;
@@ -1207,23 +1288,33 @@ class PcgBlockSolver : public mfem::NewtonSolver {
           }
 
           if (!accepted_step) {
-            trial_failed = true;
+            trial_ended_after_inner_failure = trial_steps > 0;
+            trial_failed = trial_steps == 0;
             break;
           }
 
           x_trial = x_candidate;
           r = r_candidate;
           trial_cumulative_work += accepted_work;
-          trial_work_history.push_back(trial_cumulative_work);
-          trial_step_norms.push_back(accepted_step_norm);
+          append_bounded(trial_work_history, trial_cumulative_work, nonlinear_options.pcg_window);
+          append_bounded(trial_step_norms, accepted_step_norm, nonlinear_options.pcg_delta_avg_window);
           block_predicted += accepted_predicted;
           block_actual += accepted_work;
+          num_line_search_backtracks += static_cast<size_t>(accepted_ls_count);
+
+          if (print_level >= 2) {
+            mfem::out << "  PcgBlock step " << std::setw(3) << (it + trial_steps + 1) << " : alpha = " << std::setw(13)
+                      << alpha << ", approx work = " << std::setw(13) << accepted_predicted
+                      << ", achieved work = " << std::setw(13) << accepted_work << ", trust size = " << std::setw(13)
+                      << block_trust_size << ", capped = " << trust_capped << ", ls = " << accepted_ls_count << '\n';
+          }
 
           p_old = p;
           z_old = z;
           rho_old = rho;
           have_momentum = true;
           ++trial_steps;
+          ++num_accepted_steps;
 
           if (norm <= norm_goal && it + trial_steps >= nonlinear_options.min_iterations) {
             break;
@@ -1242,24 +1333,42 @@ class PcgBlockSolver : public mfem::NewtonSolver {
             trial_steps > 0 && !trial_failed &&
             (block_converged || (block_actual >= 0.0 && trust_ratio >= nonlinear_options.pcg_trust_eta_bad));
 
+        const double old_h_scale = h_scale;
+        const bool prefix_accept = accept_block && trial_ended_after_inner_failure;
+        bool reset_next_momentum = false;
         if (accept_block) {
           X = x_trial;
           cumulative_work = trial_cumulative_work;
           work_history = std::move(trial_work_history);
           accepted_step_norms.insert(accepted_step_norms.end(), trial_step_norms.begin(), trial_step_norms.end());
+          if (accepted_step_norms.size() > static_cast<size_t>(nonlinear_options.pcg_delta_avg_window)) {
+            accepted_step_norms.erase(accepted_step_norms.begin(),
+                                      accepted_step_norms.end() - nonlinear_options.pcg_delta_avg_window);
+          }
           it += trial_steps;
+          ++num_blocks;
 
           if (trust_ratio < nonlinear_options.pcg_trust_eta_bad) {
             h_scale = std::max(h_scale * nonlinear_options.pcg_shrink, nonlinear_options.pcg_min_h_scale);
             reset_momentum();
+            reset_next_momentum = true;
+          } else if (trial_ended_after_inner_failure) {
+            reset_momentum();
+            reset_next_momentum = true;
           } else if (trust_ratio >= nonlinear_options.pcg_trust_eta_good) {
             h_scale = std::min(h_scale * nonlinear_options.pcg_growth, nonlinear_options.pcg_h_scale_init);
           }
+          const double next_trust_size = h_scale * block_delta_ref;
 
           if (print_level >= 2) {
-            mfem::out << "PcgBlock block accepted: steps = " << trial_steps << ", rho = " << std::setw(13)
-                      << trust_ratio << ", h_scale = " << std::setw(13) << h_scale << '\n';
+            mfem::out << "PcgBlock block accepted: steps = " << std::setw(3) << trial_steps
+                      << ", prefix = " << prefix_accept << ", approx work = " << std::setw(13) << block_predicted
+                      << ", achieved work = " << std::setw(13) << block_actual << ", rho = " << std::setw(13)
+                      << trust_ratio << ", h_scale = " << std::setw(13) << old_h_scale << " -> " << std::setw(13)
+                      << h_scale << ", trust size = " << std::setw(13) << block_trust_size << " -> " << std::setw(13)
+                      << next_trust_size << ", reset momentum = " << reset_next_momentum << '\n';
           }
+          last_trust_ratio = trust_ratio;
 
           block_finished = true;
         } else {
@@ -1268,10 +1377,15 @@ class PcgBlockSolver : public mfem::NewtonSolver {
           h_scale *= nonlinear_options.pcg_shrink;
           reset_momentum();
           --retries_remaining;
+          ++num_block_rejects;
+          const double next_trust_size = h_scale * block_delta_ref;
 
           if (print_level >= 2) {
-            mfem::out << "PcgBlock block rejected: steps = " << trial_steps << ", rho = " << std::setw(13)
-                      << trust_ratio << ", h_scale = " << std::setw(13) << h_scale
+            mfem::out << "PcgBlock block rejected: steps = " << std::setw(3) << trial_steps
+                      << ", approx work = " << std::setw(13) << block_predicted << ", achieved work = " << std::setw(13)
+                      << block_actual << ", rho = " << std::setw(13) << trust_ratio << ", h_scale = " << std::setw(13)
+                      << old_h_scale << " -> " << std::setw(13) << h_scale << ", trust size = " << std::setw(13)
+                      << block_trust_size << " -> " << std::setw(13) << next_trust_size << ", reset momentum = 1"
                       << ", retries left = " << retries_remaining << '\n';
           }
 
@@ -1284,6 +1398,7 @@ class PcgBlockSolver : public mfem::NewtonSolver {
 
     final_iter = it;
     final_norm = norm;
+    final_h_scale = h_scale;
 
     if (print_level == 1) {
       mfem::out << "PcgBlock iteration " << std::setw(3) << final_iter << " : ||r|| = " << std::setw(13) << norm
@@ -1336,6 +1451,15 @@ void EquationSolver::solve(mfem::Vector& x) const
   nonlin_solver_->Mult(zero, x);
 }
 
+std::optional<PcgBlockDiagnostics> EquationSolver::pcgBlockDiagnostics() const
+{
+  auto* pcg_block = dynamic_cast<const PcgBlockSolver*>(nonlin_solver_.get());
+  if (!pcg_block) {
+    return std::nullopt;
+  }
+  return pcg_block->diagnostics();
+}
+
 void SuperLUSolver::Mult(const mfem::Vector& input, mfem::Vector& output) const
 {
   SLIC_ERROR_ROOT_IF(!superlu_mat_, "Operator must be set prior to solving with SuperLU");
diff --git a/src/smith/numerics/equation_solver.hpp b/src/smith/numerics/equation_solver.hpp
index 33dcc42621..3700cad532 100644
--- a/src/smith/numerics/equation_solver.hpp
+++ b/src/smith/numerics/equation_solver.hpp
@@ -12,6 +12,7 @@
 
 #pragma once
 
+#include <cstddef>
 #include <memory>
 #include <optional>
 #include <variant>
@@ -26,6 +27,32 @@
 
 namespace smith {
 
+/// Diagnostic counters for the nonlinear PCG-block solver
+struct PcgBlockDiagnostics {
+  /// Number of accepted blocks
+  size_t num_blocks = 0;
+  /// Number of rejected blocks
+  size_t num_block_rejects = 0;
+  /// Number of Powell restarts
+  size_t num_powell_restarts = 0;
+  /// Number of descent-guard restarts
+  size_t num_descent_restarts = 0;
+  /// Number of non-positive curvature directions
+  size_t num_negative_curvature = 0;
+  /// Number of line-search backtracks
+  size_t num_line_search_backtracks = 0;
+  /// Number of positive-curvature steps capped by the trust radius
+  size_t num_trust_capped_steps = 0;
+  /// Number of accepted inner PCG steps
+  size_t num_accepted_steps = 0;
+  /// Number of trial inner PCG steps
+  size_t num_trial_steps = 0;
+  /// Last trust scale used by the solver
+  double final_h_scale = 1.0;
+  /// Last accepted block trust ratio
+  double last_trust_ratio = 0.0;
+};
+
 /**
  * @brief This class manages the objects typically required to solve a nonlinear set of equations arising from
  * discretization of a PDE of the form F(x) = 0. Specifically, it has
@@ -94,6 +121,12 @@ class EquationSolver {
    */
   const mfem::NewtonSolver& nonlinearSolver() const { return *nonlin_solver_; }
 
+  /**
+   * Returns diagnostic counters when the nonlinear solver is PcgBlock.
+   * @return Optional PCG-block diagnostics; empty for other nonlinear solvers
+   */
+  std::optional<PcgBlockDiagnostics> pcgBlockDiagnostics() const;
+
   /**
    * Returns the underlying linear solver object
    * @return A non-owning reference to the underlying linear solver
diff --git a/src/smith/physics/solid_mechanics.hpp b/src/smith/physics/solid_mechanics.hpp
index f6867ee72b..e717272274 100644
--- a/src/smith/physics/solid_mechanics.hpp
+++ b/src/smith/physics/solid_mechanics.hpp
@@ -1383,6 +1383,12 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
   /// @brief getter for nodal forces (before zeroing-out essential dofs)
   const smith::FiniteElementDual& reactions() const { return reactions_; };
 
+  /// @brief Get the equation solver used by this physics module
+  smith::EquationSolver& equationSolver() { return *nonlin_solver_; }
+
+  /// @overload
+  const smith::EquationSolver& equationSolver() const { return *nonlin_solver_; }
+
  protected:
   /// The compile-time finite element trial space for displacement and velocity (H1 of order p)
   using trial = H1<order, dim>;
diff --git a/src/smith/physics/tests/CMakeLists.txt b/src/smith/physics/tests/CMakeLists.txt
index a0ba90546e..292b9140fa 100644
--- a/src/smith/physics/tests/CMakeLists.txt
+++ b/src/smith/physics/tests/CMakeLists.txt
@@ -80,6 +80,7 @@ set(physics_parallel_test_sources
     dynamic_thermal_adjoint.cpp
     solid_reaction_adjoint.cpp
     thermal_nonlinear_solve.cpp
+    shallow_arch_buckling.cpp
     )
 set(physics_parallel_tribol_test_sources
     contact_patch.cpp
diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp
new file mode 100644
index 0000000000..3c36ec1a66
--- /dev/null
+++ b/src/smith/physics/tests/shallow_arch_buckling.cpp
@@ -0,0 +1,128 @@
+// Copyright (c) Lawrence Livermore National Security, LLC and
+// other Smith Project Developers. See the top-level LICENSE file for
+// details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+
+#include <cmath>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "mpi.h"
+#include "mfem.hpp"
+
+#include "smith/infrastructure/application_manager.hpp"
+#include "smith/numerics/functional/domain.hpp"
+#include "smith/numerics/functional/tensor.hpp"
+#include "smith/numerics/solver_config.hpp"
+#include "smith/physics/materials/solid_material.hpp"
+#include "smith/physics/mesh.hpp"
+#include "smith/physics/solid_mechanics.hpp"
+#include "smith/physics/state/state_manager.hpp"
+
+namespace smith {
+namespace {
+
+constexpr double length = 10.0;
+constexpr double thickness = 0.25;
+constexpr double rise = 0.75;
+constexpr double end_tol = 1.0e-8;
+
+void warpToShallowArch(smith::Mesh& mesh)
+{
+  auto& mfem_mesh = mesh.mfemParMesh();
+  for (int i = 0; i < mfem_mesh.GetNV(); ++i) {
+    auto* vertex = mfem_mesh.GetVertex(i);
+    const double xi = 2.0 * vertex[0] / length - 1.0;
+    vertex[1] += rise * (1.0 - xi * xi);
+  }
+
+  mesh.mfemParMesh().DeleteGeometricFactors();
+  auto* nodes = mesh.mfemParMesh().GetNodes();
+  auto* coords = nodes->ReadWrite();
+  const int vdim = nodes->VectorDim();
+  const int scalar_size = nodes->Size() / vdim;
+
+  for (int i = 0; i < scalar_size; ++i) {
+    const double x = coords[i];
+    const double y = coords[i + scalar_size];
+    const double xi = 2.0 * x / length - 1.0;
+    coords[i + scalar_size] = y + rise * (1.0 - xi * xi);
+  }
+}
+
+}  // namespace
+
+TEST(ShallowArchBuckling, NeoHookeanTractionControlled)
+{
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  constexpr int p = 1;
+  constexpr int dim = 2;
+  constexpr int nx = 48;
+  constexpr int ny = 4;
+
+  axom::sidre::DataStore datastore;
+  smith::StateManager::initialize(datastore, "shallow_arch_buckling");
+
+  auto mesh = std::make_shared<smith::Mesh>(
+      mfem::Mesh::MakeCartesian2D(nx, ny, mfem::Element::QUADRILATERAL, true, length, thickness), "arch_mesh", 0, 0);
+  warpToShallowArch(*mesh);
+
+  mesh->addDomainOfBoundaryElements("left_end",
+                                    [](std::vector<vec2> vertices, int) { return average(vertices)[0] < end_tol; });
+  mesh->addDomainOfBoundaryElements(
+      "right_end", [](std::vector<vec2> vertices, int) { return average(vertices)[0] > length - end_tol; });
+  mesh->addDomainOfBoundaryElements("top_face", [](std::vector<vec2>, int attr) { return attr == 3; });
+  EXPECT_GT(mesh->domain("top_face").total_elements(), 0);
+
+  smith::LinearSolverOptions linear_options{.linear_solver = LinearSolver::CG,
+                                            .preconditioner = Preconditioner::HypreAMG,
+                                            .relative_tol = 1.0e-8,
+                                            .absolute_tol = 1.0e-14,
+                                            .max_iterations = 500,
+                                            .print_level = 0};
+
+  smith::NonlinearSolverOptions nonlinear_options{.nonlin_solver = NonlinearSolver::PcgBlock,
+                                                  .relative_tol = 1.0e-8,
+                                                  .absolute_tol = 1.0e-10,
+                                                  .max_iterations = 500,
+                                                  .print_level = 2,
+                                                  .pcg_block_len = 10,
+                                                  .pcg_max_block_retries = 40};
+
+  SolidMechanics<p, dim> solid(nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options,
+                               "shallow_arch", mesh);
+
+  solid_mechanics::NeoHookean mat{.density = 1.0, .K = 100.0, .G = 10.0};
+  solid.setMaterial(mat, mesh->entireBody());
+  solid.setFixedBCs(mesh->domain("left_end"));
+  solid.setFixedBCs(mesh->domain("right_end"));
+
+  constexpr double final_traction = 0.2;
+  solid.setTraction([](auto, auto, double t) { return vec2{{0.0, -final_traction * t}}; }, mesh->domain("top_face"));
+
+  solid.completeSetup();
+  solid.outputStateToDisk("shallow_arch_buckling");
+
+  constexpr int num_steps = 40;
+  for (int step = 0; step < num_steps; ++step) {
+    EXPECT_NO_THROW(solid.advanceTimestep(1.0 / num_steps));
+    solid.outputStateToDisk("shallow_arch_buckling");
+  }
+
+  const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics();
+  ASSERT_TRUE(diagnostics.has_value());
+  EXPECT_GT(diagnostics->num_accepted_steps, 0);
+}
+
+}  // namespace smith
+
+int main(int argc, char* argv[])
+{
+  ::testing::InitGoogleTest(&argc, argv);
+  smith::ApplicationManager applicationManager(argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/smith/physics/tests/solid.cpp b/src/smith/physics/tests/solid.cpp
index e48bed601f..44a68c4240 100644
--- a/src/smith/physics/tests/solid.cpp
+++ b/src/smith/physics/tests/solid.cpp
@@ -236,6 +236,66 @@ TEST(SolidMechanics, 2DQuadParameterizedStatic) { functional_parameterized_solid
 
 TEST(SolidMechanics, 3DQuadStaticJ2) { functional_solid_test_static_J2(); }
 
+TEST(SolidMechanics, PcgBlockLinearElasticity)
+{
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  constexpr int p = 1;
+  constexpr int dim = 2;
+  constexpr int serial_refinement = 1;
+  constexpr int parallel_refinement = 0;
+
+  axom::sidre::DataStore datastore;
+  smith::StateManager::initialize(datastore, "pcg_block_linear_elasticity");
+
+  std::string filename = SMITH_REPO_DIR "/data/meshes/square.mesh";
+  auto mesh =
+      std::make_shared<smith::Mesh>(buildMeshFromFile(filename), "mesh", serial_refinement, parallel_refinement);
+  mesh->addDomainOfBoundaryElements("fixed", by_attr<dim>(1));
+
+  smith::LinearSolverOptions linear_options{.linear_solver = LinearSolver::CG,
+                                            .preconditioner = Preconditioner::HypreL1Jacobi,
+                                            .relative_tol = 1.0e-14,
+                                            .absolute_tol = 1.0e-16,
+                                            .max_iterations = 500,
+                                            .print_level = 0};
+
+  smith::NonlinearSolverOptions nonlinear_options{.nonlin_solver = NonlinearSolver::PcgBlock,
+                                                  .relative_tol = 1.0e-12,
+                                                  .absolute_tol = 1.0e-14,
+                                                  .max_iterations = 200,
+                                                  .print_level = 0,
+                                                  .pcg_block_len = 10};
+
+  SolidMechanics<p, dim> solid(nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options,
+                               "pcg_block_solid", mesh);
+
+  solid_mechanics::LinearIsotropic mat{.density = 1.0, .K = 0.5, .G = 1.0};
+  solid.setMaterial(mat, mesh->entireBody());
+  solid.setFixedBCs(mesh->domain("fixed"));
+
+  tensor<double, dim> constant_force{};
+  constant_force[0] = 0.1;
+  constant_force[1] = -0.05;
+  solid_mechanics::ConstantBodyForce<dim> force{constant_force};
+  solid.addBodyForce(force, mesh->entireBody());
+
+  solid.completeSetup();
+  solid.advanceTimestep(1.0);
+
+  const auto& nonlinear_solver = solid.equationSolver().nonlinearSolver();
+  const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics();
+
+  ASSERT_TRUE(diagnostics.has_value());
+  EXPECT_TRUE(nonlinear_solver.GetConverged());
+  EXPECT_LE(nonlinear_solver.GetNumIterations(), solid.displacement().space().GlobalTrueVSize());
+  EXPECT_LT(nonlinear_solver.GetFinalRelNorm(), 1.0e-10);
+  EXPECT_EQ(diagnostics->num_block_rejects, 0u);
+  EXPECT_EQ(diagnostics->num_powell_restarts, 0u);
+  EXPECT_EQ(diagnostics->num_negative_curvature, 0u);
+  EXPECT_EQ(diagnostics->num_line_search_backtracks, 0u);
+}
+
 TEST(SolidMechanics, TDofBoundaryCondition)
 {
   /*
diff --git a/src/smith/physics/tests/solid_statics_patch.cpp b/src/smith/physics/tests/solid_statics_patch.cpp
index 9ed9daa247..2d09ab2cff 100644
--- a/src/smith/physics/tests/solid_statics_patch.cpp
+++ b/src/smith/physics/tests/solid_statics_patch.cpp
@@ -241,6 +241,78 @@ double solution_error(PatchBoundaryCondition bc)
   return computeL2Error(solid.displacement(), exact_solution_coef);
 }
 
+template <typename element_type>
+double pcg_block_solution_error(PatchBoundaryCondition bc)
+{
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  axom::sidre::DataStore datastore;
+  smith::StateManager::initialize(datastore, "solid_static_pcg_block_solve");
+
+  constexpr int p = element_type::order;
+  constexpr int dim = dimension_of(element_type::geometry);
+
+  static_assert(dim == 2 || dim == 3, "Dimension must be 2 or 3 for solid test");
+
+  AffineSolution<dim> exact_displacement;
+
+  std::string meshdir = std::string(SMITH_REPO_DIR) + "/data/meshes/";
+  std::string filename;
+  switch (element_type::geometry) {
+    case mfem::Geometry::TRIANGLE:
+      filename = meshdir + "patch2D_tris.mesh";
+      break;
+    case mfem::Geometry::SQUARE:
+      filename = meshdir + "patch2D_quads.mesh";
+      break;
+    case mfem::Geometry::TETRAHEDRON:
+      filename = meshdir + "patch3D_tets.mesh";
+      break;
+    case mfem::Geometry::CUBE:
+      filename = meshdir + "patch3D_hexes.mesh";
+      break;
+    default:
+      SLIC_ERROR_ROOT("unsupported element type for patch test");
+      break;
+  }
+
+  auto mesh = std::make_shared<smith::Mesh>(buildMeshFromFile(filename), "mesh_tag");
+
+  smith::NonlinearSolverOptions nonlin_solver_options{.nonlin_solver = NonlinearSolver::PcgBlock,
+                                                      .relative_tol = 0.0,
+                                                      .absolute_tol = 5.0e-14,
+                                                      .max_iterations = 200,
+                                                      .print_level = 0,
+                                                      .pcg_block_len = 10,
+                                                      .pcg_ls_max_backtracks = 8};
+
+  auto equation_solver = std::make_unique<EquationSolver>(
+      nonlin_solver_options, smith::solid_mechanics::default_linear_options, mesh->getComm());
+
+  SolidMechanics<p, dim> solid(std::move(equation_solver), solid_mechanics::default_quasistatic_options, "solid", mesh);
+
+  solid_mechanics::NeoHookean mat{.density = 1.0, .K = 1.0, .G = 1.0};
+  solid.setMaterial(mat, mesh->entireBody());
+
+  mesh->addDomainOfBoundaryElements("essential_boundary", by_attr<dim>(essentialBoundaryAttributes<dim>(bc)));
+  exact_displacement.applyLoads(mat, solid, mesh->domain("essential_boundary"));
+
+  solid.completeSetup();
+  solid.advanceTimestep(1.0);
+
+  const auto& nonlinear_solver = solid.equationSolver().nonlinearSolver();
+  const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics();
+  EXPECT_TRUE(nonlinear_solver.GetConverged());
+  EXPECT_LT(nonlinear_solver.GetFinalRelNorm(), 1.0e-10);
+  EXPECT_TRUE(diagnostics.has_value());
+  if (diagnostics.has_value()) {
+    EXPECT_GT(diagnostics->num_blocks, 0u);
+  }
+
+  mfem::VectorFunctionCoefficient exact_solution_coef(dim, exact_displacement);
+  return computeL2Error(solid.displacement(), exact_solution_coef);
+}
+
 /**
  * @brief Solve pressure-driven problem with 10% uniaxial strain and compare numerical solution to exact answer
  *
@@ -464,6 +536,13 @@ TEST(SolidMechanics, PatchTest2dQ1EssentialAndNaturalBcs)
   EXPECT_LT(quad_error, tol);
 }
 
+TEST(SolidMechanics, PcgBlockPatchTest2dQ1EssentialAndNaturalBcs)
+{
+  using quadrilateral = finite_element<mfem::Geometry::SQUARE, H1<LINEAR> >;
+  double quad_error = pcg_block_solution_error<quadrilateral>(PatchBoundaryCondition::EssentialAndNatural);
+  EXPECT_LT(quad_error, 1.0e-6);
+}
+
 TEST(SolidMechanics, PatchTest3dQ1EssentialAndNaturalBcs)
 {
   using tetrahedron = finite_element<mfem::Geometry::TETRAHEDRON, H1<LINEAR> >;

From 3eda3a3c51d28745a395befdc57c80ccaaa8ffe7 Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Thu, 30 Apr 2026 10:35:27 -0600
Subject: [PATCH 04/27] Start implementing a more efficient interface with
 matrix-free operations that we can use with our new matrix-free nonlinear
 solver.

---
 src/smith/numerics/equation_solver.cpp        |  66 +++++++-
 src/smith/numerics/equation_solver.hpp        |  37 +++++
 src/smith/numerics/functional/functional.hpp  |  66 +++++++-
 .../tests/functional_comparisons.cpp          |  20 +++
 src/smith/numerics/solver_config.hpp          |   2 +-
 .../numerics/tests/test_equationsolver.cpp    |  81 ++++++++++
 src/smith/physics/solid_mechanics.hpp         |  23 +++
 .../physics/tests/shallow_arch_buckling.cpp   | 150 +++++++++++++-----
 8 files changed, 398 insertions(+), 47 deletions(-)

diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index 4840b0a1ef..d6ccbe5b36 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -7,6 +7,7 @@
 #include "smith/numerics/equation_solver.hpp"
 
 #include <cstdlib>
+#include <functional>
 #include <iomanip>
 #include <iostream>
 #include <algorithm>
@@ -925,6 +926,16 @@ class PcgBlockSolver : public mfem::NewtonSolver {
   mutable size_t num_residuals = 0;
   /// Internal counter for matrix assembles
   mutable size_t num_jacobian_assembles = 0;
+  /// Internal counter for preconditioner operator updates
+  mutable size_t num_preconditioner_updates = 0;
+  /// Internal counter for accepted prefix blocks
+  mutable size_t num_prefix_accepts = 0;
+  /// Internal counter for momentum resets
+  mutable size_t num_momentum_resets = 0;
+  /// Internal counter for nonzero PCG beta values
+  mutable size_t num_nonzero_beta = 0;
+  /// Internal counter for zero PCG beta values
+  mutable size_t num_zero_beta = 0;
   /// Internal counter for accepted blocks
   mutable size_t num_blocks = 0;
   /// Internal counter for rejected blocks
@@ -948,6 +959,9 @@ class PcgBlockSolver : public mfem::NewtonSolver {
   /// Last accepted block trust ratio
   mutable double last_trust_ratio = 0.0;
 
+  /// Optional matrix-free tangent action, y = J(x) dx
+  MatrixFreeTangentAction matrix_free_tangent_action;
+
 #ifdef MFEM_USE_MPI
   /// Constructor
   PcgBlockSolver(MPI_Comm comm_, const NonlinearSolverOptions& nonlinear_opts, Solver& preconditioner)
@@ -977,12 +991,22 @@ class PcgBlockSolver : public mfem::NewtonSolver {
     return Norm(residual);
   }
 
-  /// Apply the assembled Jacobian to a vector.
-  void hessVec(const mfem::Vector& x, mfem::Vector& v) const
+  /// Set an optional matrix-free tangent action.
+  void setMatrixFreeTangentAction(MatrixFreeTangentAction tangent_action)
+  {
+    matrix_free_tangent_action = std::move(tangent_action);
+  }
+
+  /// Apply the tangent at x to dx.
+  void hessVec(const mfem::Vector& x, const mfem::Vector& dx, mfem::Vector& y) const
   {
     SMITH_MARK_FUNCTION;
     ++num_hess_vecs;
-    grad->Mult(x, v);
+    if (matrix_free_tangent_action) {
+      matrix_free_tangent_action(x, dx, y);
+    } else {
+      grad->Mult(dx, y);
+    }
   }
 
   /// Apply the configured nonlinear PCG preconditioner.
@@ -996,7 +1020,16 @@ class PcgBlockSolver : public mfem::NewtonSolver {
   /// Return solver diagnostic counters.
   PcgBlockDiagnostics diagnostics() const
   {
-    return {.num_blocks = num_blocks,
+    return {.num_residuals = num_residuals,
+            .num_hess_vecs = num_hess_vecs,
+            .num_preconds = num_preconds,
+            .num_jacobian_assembles = num_jacobian_assembles,
+            .num_preconditioner_updates = num_preconditioner_updates,
+            .num_prefix_accepts = num_prefix_accepts,
+            .num_momentum_resets = num_momentum_resets,
+            .num_nonzero_beta = num_nonzero_beta,
+            .num_zero_beta = num_zero_beta,
+            .num_blocks = num_blocks,
             .num_block_rejects = num_block_rejects,
             .num_powell_restarts = num_powell_restarts,
             .num_descent_restarts = num_descent_restarts,
@@ -1023,6 +1056,11 @@ class PcgBlockSolver : public mfem::NewtonSolver {
     num_preconds = 0;
     num_residuals = 0;
     num_jacobian_assembles = 0;
+    num_preconditioner_updates = 0;
+    num_prefix_accepts = 0;
+    num_momentum_resets = 0;
+    num_nonzero_beta = 0;
+    num_zero_beta = 0;
     num_blocks = 0;
     num_block_rejects = 0;
     num_powell_restarts = 0;
@@ -1098,6 +1136,7 @@ class PcgBlockSolver : public mfem::NewtonSolver {
       rho_old = 0.0;
       p_old = 0.0;
       z_old = 0.0;
+      ++num_momentum_resets;
     };
 
     auto window_max = [&](const std::vector<double>& history) {
@@ -1151,6 +1190,7 @@ class PcgBlockSolver : public mfem::NewtonSolver {
       }
 
       assembleJacobian(X);
+      ++num_preconditioner_updates;
       pcg_precond.SetOperator(*grad);
 
       r_block = r;
@@ -1208,8 +1248,13 @@ class PcgBlockSolver : public mfem::NewtonSolver {
             force_dot_p = rho;
             ++num_descent_restarts;
           }
+          if (beta == 0.0) {
+            ++num_zero_beta;
+          } else {
+            ++num_nonzero_beta;
+          }
 
-          hessVec(p, Hp);
+          hessVec(X, p, Hp);
           const double pHp = Dot(p, Hp);
 
           double alpha = 0.0;
@@ -1337,6 +1382,9 @@ class PcgBlockSolver : public mfem::NewtonSolver {
         const bool prefix_accept = accept_block && trial_ended_after_inner_failure;
         bool reset_next_momentum = false;
         if (accept_block) {
+          if (prefix_accept) {
+            ++num_prefix_accepts;
+          }
           X = x_trial;
           cumulative_work = trial_cumulative_work;
           work_history = std::move(trial_work_history);
@@ -1442,6 +1490,14 @@ void EquationSolver::setOperator(const mfem::Operator& op)
   }
 }
 
+void EquationSolver::setMatrixFreeTangentAction(MatrixFreeTangentAction tangent_action)
+{
+  auto* pcg_block = dynamic_cast<PcgBlockSolver*>(nonlin_solver_.get());
+  if (pcg_block) {
+    pcg_block->setMatrixFreeTangentAction(std::move(tangent_action));
+  }
+}
+
 void EquationSolver::solve(mfem::Vector& x) const
 {
   mfem::Vector zero(x);
diff --git a/src/smith/numerics/equation_solver.hpp b/src/smith/numerics/equation_solver.hpp
index 3700cad532..7e5882a74a 100644
--- a/src/smith/numerics/equation_solver.hpp
+++ b/src/smith/numerics/equation_solver.hpp
@@ -13,6 +13,7 @@
 #pragma once
 
 #include <cstddef>
+#include <functional>
 #include <memory>
 #include <optional>
 #include <variant>
@@ -27,8 +28,34 @@
 
 namespace smith {
 
+/**
+ * @brief Matrix-free tangent action callback.
+ *
+ * The callback evaluates y = J(x) dx for the current nonlinear state x
+ * without requiring EquationSolver to assemble J.
+ */
+using MatrixFreeTangentAction = std::function<void(const mfem::Vector& x, const mfem::Vector& dx, mfem::Vector& y)>;
+
 /// Diagnostic counters for the nonlinear PCG-block solver
 struct PcgBlockDiagnostics {
+  /// Number of nonlinear residual evaluations
+  size_t num_residuals = 0;
+  /// Number of assembled Jacobian-vector products
+  size_t num_hess_vecs = 0;
+  /// Number of preconditioner applications
+  size_t num_preconds = 0;
+  /// Number of assembled Jacobians
+  size_t num_jacobian_assembles = 0;
+  /// Number of preconditioner operator updates
+  size_t num_preconditioner_updates = 0;
+  /// Number of accepted prefix blocks
+  size_t num_prefix_accepts = 0;
+  /// Number of momentum resets
+  size_t num_momentum_resets = 0;
+  /// Number of steps with nonzero PCG beta
+  size_t num_nonzero_beta = 0;
+  /// Number of steps with zero PCG beta
+  size_t num_zero_beta = 0;
   /// Number of accepted blocks
   size_t num_blocks = 0;
   /// Number of rejected blocks
@@ -103,6 +130,16 @@ class EquationSolver {
    */
   void setOperator(const mfem::Operator& op);
 
+  /**
+   * @brief Sets an optional matrix-free tangent action for nonlinear solvers that can use J(x) dx directly.
+   *
+   * Solvers that do not support matrix-free tangent actions ignore this callback. Supported solvers retain their
+   * assembled-gradient fallback when no callback is set.
+   *
+   * @param[in] tangent_action Callback evaluating y = J(x) dx.
+   */
+  void setMatrixFreeTangentAction(MatrixFreeTangentAction tangent_action);
+
   /**
    * Solves the system F(x) = 0
    * @param[in,out] x Solution to the system of nonlinear equations
diff --git a/src/smith/numerics/functional/functional.hpp b/src/smith/numerics/functional/functional.hpp
index f5a40d8259..730d4570d6 100644
--- a/src/smith/numerics/functional/functional.hpp
+++ b/src/smith/numerics/functional/functional.hpp
@@ -828,7 +828,7 @@ class Functional<test(trials...), exec> {
       }
     };
 
-    uint64_t max_buffer_size()
+    uint64_t max_buffer_size() const
     {
       uint64_t max_entries = 0;
       for (auto& integral : form_.integrals_) {
@@ -849,6 +849,69 @@ class Functional<test(trials...), exec> {
       return max_entries;
     }
 
+    void AssembleDiagonal(mfem::Vector& diag) const override
+    {
+      SLIC_ERROR_ROOT_IF(form_.test_function_space_.family != Family::H1 ||
+                             form_.trial_function_spaces_[which_argument].family != Family::H1,
+                         "Functional gradient diagonal assembly currently supports H1 test/trial spaces only.");
+      SLIC_ERROR_ROOT_IF(test_space_ != trial_space_,
+                         "Functional gradient diagonal assembly currently requires the same test/trial FE space.");
+      SLIC_ERROR_ROOT_IF(form_.output_L_.Size() != form_.input_L_[which_argument].Size(),
+                         "Functional gradient diagonal assembly requires square local operators.");
+
+      mfem::Vector local_diag(form_.output_L_.Size(), form_.mem_type);
+      local_diag = 0.0;
+
+      std::vector<double> K_elem_buffer(max_buffer_size());
+
+      for (auto& integral : form_.integrals_) {
+        // if this integral's derivative isn't identically zero
+        if (integral.functional_to_integral_index_.count(which_argument) > 0) {
+          Domain& dom = integral.domain_;
+
+          uint32_t id = integral.functional_to_integral_index_.at(which_argument);
+          const auto& G_test = dom.get_restriction(form_.test_function_space_);
+          const auto& G_trial = dom.get_restriction(form_.trial_function_spaces_[which_argument]);
+          for (const auto& [geom, calculate_element_matrices_func] : integral.element_gradient_[id]) {
+            const auto& test_restriction = G_test.restrictions.at(geom);
+            const auto& trial_restriction = G_trial.restrictions.at(geom);
+
+            CPUArrayView<double, 3> K_e(K_elem_buffer.data(), test_restriction.num_elements,
+                                        trial_restriction.nodes_per_elem * trial_restriction.components,
+                                        test_restriction.nodes_per_elem * test_restriction.components);
+            detail::zero_out(K_e);
+
+            calculate_element_matrices_func(K_e);
+
+            uint32_t rows_per_elem = uint32_t(test_restriction.nodes_per_elem * test_restriction.components);
+            uint32_t cols_per_elem = uint32_t(trial_restriction.nodes_per_elem * trial_restriction.components);
+
+            std::vector<DoF> test_vdofs(rows_per_elem);
+            std::vector<DoF> trial_vdofs(cols_per_elem);
+
+            for (uint32_t e = 0; e < test_restriction.num_elements; e++) {
+              test_restriction.GetElementVDofs(int(e), test_vdofs);
+              trial_restriction.GetElementVDofs(int(e), trial_vdofs);
+
+              for (uint32_t i = 0; i < cols_per_elem; i++) {
+                int col = int(trial_vdofs[i].index());
+
+                for (uint32_t j = 0; j < rows_per_elem; j++) {
+                  int row = int(test_vdofs[j].index());
+                  if (row == col) {
+                    local_diag(row) += K_e(e, i, j);
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+
+      diag.SetSize(Height(), form_.mem_type);
+      form_.P_test_->MultTranspose(local_diag, diag);
+    }
+
     std::unique_ptr<mfem::HypreParMatrix> assemble()
     {
       if (row_ptr.empty()) {
@@ -977,6 +1040,7 @@ class Functional<test(trials...), exec> {
     };
 
     friend auto assemble(Gradient& g) { return g.assemble(); }
+    friend void assemble_diagonal(const Gradient& g, mfem::Vector& diag) { g.AssembleDiagonal(diag); }
 
    private:
     /// @brief The "parent" @p Functional to calculate gradients with
diff --git a/src/smith/numerics/functional/tests/functional_comparisons.cpp b/src/smith/numerics/functional/tests/functional_comparisons.cpp
index 12314e45d0..48a8638fb6 100644
--- a/src/smith/numerics/functional/tests/functional_comparisons.cpp
+++ b/src/smith/numerics/functional/tests/functional_comparisons.cpp
@@ -184,6 +184,15 @@ void functional_test(mfem::ParMesh& mesh, H1<p> test, H1<p> trial, Dimension<dim
 
   std::unique_ptr<mfem::HypreParMatrix> J_func = assemble(drdU);
 
+  mfem::Vector diag_direct(U.Size());
+  drdU.AssembleDiagonal(diag_direct);
+
+  mfem::Vector diag_assembled(U.Size());
+  J_func->GetDiag(diag_assembled);
+
+  mfem::Vector diag_diff(U.Size());
+  subtract(diag_direct, diag_assembled, diag_diff);
+
   // Compute the gradient action using standard MFEM and functional
   // mfem::Vector g1 = (*J_mfem) * U;
   mfem::Vector g1(U.Size());
@@ -209,6 +218,7 @@ void functional_test(mfem::ParMesh& mesh, H1<p> test, H1<p> trial, Dimension<dim
   }
 
   // Ensure the two methods generate the same result
+  EXPECT_NEAR(0.0, diag_diff.Norml2() / diag_assembled.Norml2(), 1.e-14);
   EXPECT_NEAR(0.0, diff1.Norml2() / g1.Norml2(), 1.e-14);
   EXPECT_NEAR(0.0, diff2.Norml2() / g1.Norml2(), 1.e-14);
 }
@@ -300,6 +310,15 @@ void functional_test(mfem::ParMesh& mesh, H1<p, dim> test, H1<p, dim> trial, Dim
 
   std::unique_ptr<mfem::HypreParMatrix> J_func = assemble(drdU);
 
+  mfem::Vector diag_direct(U.Size());
+  drdU.AssembleDiagonal(diag_direct);
+
+  mfem::Vector diag_assembled(U.Size());
+  J_func->GetDiag(diag_assembled);
+
+  mfem::Vector diag_diff(U.Size());
+  subtract(diag_direct, diag_assembled, diag_diff);
+
   // mfem::Vector g1 = (*J_mfem) * U;
   mfem::Vector g1(U.Size());
   J_mfem->Mult(U, g1);
@@ -325,6 +344,7 @@ void functional_test(mfem::ParMesh& mesh, H1<p, dim> test, H1<p, dim> trial, Dim
     std::cout << "||g1-g3||/||g1||: " << diff2.Norml2() / g1.Norml2() << std::endl;
   }
 
+  EXPECT_NEAR(0., diag_diff.Norml2() / diag_assembled.Norml2(), 1.e-14);
   EXPECT_NEAR(0., diff1.Norml2() / g1.Norml2(), 1.e-14);
   EXPECT_NEAR(0., diff2.Norml2() / g1.Norml2(), 1.e-14);
 }
diff --git a/src/smith/numerics/solver_config.hpp b/src/smith/numerics/solver_config.hpp
index e7c26bda35..ecbfde4cd9 100644
--- a/src/smith/numerics/solver_config.hpp
+++ b/src/smith/numerics/solver_config.hpp
@@ -478,7 +478,7 @@ struct NonlinearSolverOptions {
   int pcg_block_len = 10;
 
   /// Powell restart threshold for nonlinear PCG residual orthogonality
-  double pcg_powell_eta = 0.2;
+  double pcg_powell_eta = 0.005;
 
   /// Trust-ratio threshold below which the PCG-block trust scale shrinks
   double pcg_trust_eta_bad = 0.1;
diff --git a/src/smith/numerics/tests/test_equationsolver.cpp b/src/smith/numerics/tests/test_equationsolver.cpp
index edab4fd012..6ee649716c 100644
--- a/src/smith/numerics/tests/test_equationsolver.cpp
+++ b/src/smith/numerics/tests/test_equationsolver.cpp
@@ -124,6 +124,87 @@ TEST_P(EquationSolverSuite, All)
   }
 }
 
+TEST(EquationSolver, PcgBlockUsesMatrixFreeTangentAction)
+{
+  auto mesh = mfem::Mesh::MakeCartesian2D(1, 1, mfem::Element::QUADRILATERAL);
+  auto pmesh = mfem::ParMesh(MPI_COMM_WORLD, mesh);
+
+  pmesh.EnsureNodes();
+  pmesh.ExchangeFaceNbrData();
+
+  constexpr int p = 1;
+  constexpr int dim = 2;
+  using test_space = H1<p>;
+  using trial_space = H1<p>;
+
+  auto [fes, fec] = smith::generateParFiniteElementSpace<test_space>(&pmesh);
+
+  mfem::HypreParVector x_exact(fes.get());
+  mfem::HypreParVector x_computed(fes.get());
+  x_exact.Randomize(0);
+  x_computed = 0.0;
+
+  std::unique_ptr<mfem::HypreParMatrix> J;
+
+  Functional<test_space(trial_space)> residual(fes.get(), {fes.get()});
+  Domain domain = EntireDomain(pmesh);
+  residual.AddDomainIntegral(
+      Dimension<dim>{}, DependsOn<0>{},
+      [](double /*t*/, auto, auto scalar) {
+        auto [u, du_dx] = scalar;
+        return smith::tuple{u, du_dx};
+      },
+      domain);
+
+  StdFunctionOperator residual_opr(
+      fes->TrueVSize(),
+      [&x_exact, &residual](const mfem::Vector& x, mfem::Vector& r) {
+        constexpr double time = 0.0;
+        r = residual(time, x);
+        r -= residual(time, x_exact);
+      },
+      [&residual, &J](const mfem::Vector& x) -> mfem::Operator& {
+        constexpr double time = 0.0;
+        auto [val, grad] = residual(time, differentiate_wrt(x));
+        J = assemble(grad);
+        return *J;
+      });
+
+  const LinearSolverOptions lin_opts = {.linear_solver = LinearSolver::CG,
+                                        .preconditioner = Preconditioner::HypreJacobi,
+                                        .relative_tol = 1.0e-12,
+                                        .absolute_tol = 1.0e-14,
+                                        .max_iterations = 500,
+                                        .print_level = 0};
+
+  const NonlinearSolverOptions nonlin_opts = {.nonlin_solver = NonlinearSolver::PcgBlock,
+                                              .relative_tol = 1.0e-12,
+                                              .absolute_tol = 1.0e-14,
+                                              .max_iterations = 500,
+                                              .print_level = 0};
+
+  EquationSolver eq_solver(nonlin_opts, lin_opts);
+  eq_solver.setOperator(residual_opr);
+
+  int num_tangent_actions = 0;
+  eq_solver.setMatrixFreeTangentAction(
+      [&residual, &num_tangent_actions](const mfem::Vector& x, const mfem::Vector& dx, mfem::Vector& y) {
+        constexpr double time = 0.0;
+        auto [val, grad] = residual(time, differentiate_wrt(x));
+        grad.Mult(dx, y);
+        ++num_tangent_actions;
+      });
+
+  eq_solver.solve(x_computed);
+
+  const auto diagnostics = eq_solver.pcgBlockDiagnostics();
+  ASSERT_TRUE(diagnostics.has_value());
+  EXPECT_GT(num_tangent_actions, 0);
+  EXPECT_EQ(diagnostics->num_hess_vecs, static_cast<size_t>(num_tangent_actions));
+  EXPECT_TRUE(eq_solver.nonlinearSolver().GetConverged());
+  EXPECT_LT(x_computed.DistanceTo(x_exact.GetData()), 1.0e-10);
+}
+
 /**
  * @brief Nonlinear solvers to test. Always includes NonlinearSolver::Newton and NonlinearSolver::LBFGS
  * If SMITH_USE_SUNDIALS is set, adds: NonlinearSolver::KINFullStep, NonlinearSolver::KINBacktrackingLineSearch, and
diff --git a/src/smith/physics/solid_mechanics.hpp b/src/smith/physics/solid_mechanics.hpp
index e717272274..c635f67f89 100644
--- a/src/smith/physics/solid_mechanics.hpp
+++ b/src/smith/physics/solid_mechanics.hpp
@@ -1061,6 +1061,25 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
         });
   }
 
+  /// @brief Matrix-free action of the quasistatic tangent with essential boundary conditions applied.
+  void quasistaticTangentAction(const mfem::Vector& u, const mfem::Vector& du, mfem::Vector& dr) const
+  {
+    SMITH_MARK_FUNCTION;
+
+    mfem::Vector du_interior(du);
+    du_interior.SetSubVector(bcs_.allEssentialTrueDofs(), 0.0);
+
+    auto [r, drdu] = (*residual_)(time_, shapeDisplacement(), differentiate_wrt(u), acceleration_,
+                                  *parameters_[parameter_indices].state...);
+    drdu.Mult(du_interior, dr);
+
+    const auto& constrained_dofs = bcs_.allEssentialTrueDofs();
+    for (int i = 0; i < constrained_dofs.Size(); ++i) {
+      const int dof = constrained_dofs[i];
+      dr[dof] = du[dof];
+    }
+  }
+
   /**
    * @brief Return the assembled stiffness matrix
    *
@@ -1139,6 +1158,10 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
 #endif
 
     nonlin_solver_->setOperator(*residual_with_bcs_);
+    if (is_quasistatic_) {
+      nonlin_solver_->setMatrixFreeTangentAction([this](const mfem::Vector& u, const mfem::Vector& du,
+                                                        mfem::Vector& dr) { quasistaticTangentAction(u, du, dr); });
+    }
 
     if (checkpoint_to_disk_) {
       outputStateToDisk();
diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp
index 3c36ec1a66..b9514b74ed 100644
--- a/src/smith/physics/tests/shallow_arch_buckling.cpp
+++ b/src/smith/physics/tests/shallow_arch_buckling.cpp
@@ -4,8 +4,8 @@
 //
 // SPDX-License-Identifier: (BSD-3-Clause)
 
-#include <cmath>
 #include <memory>
+#include <stdexcept>
 #include <string>
 #include <vector>
 
@@ -27,101 +27,171 @@ namespace {
 
 constexpr double length = 10.0;
 constexpr double thickness = 0.25;
-constexpr double rise = 0.75;
 constexpr double end_tol = 1.0e-8;
-
-void warpToShallowArch(smith::Mesh& mesh)
+constexpr double top_tol = 1.0e-8;
+std::string solver_name = "TrustRegion";
+int print_level = 2;
+int pcg_block_len = 10;
+double pcg_powell_eta = 0.005;
+int nonlinear_max_iterations = 30000;
+
+NonlinearSolver selectedNonlinearSolver()
 {
-  auto& mfem_mesh = mesh.mfemParMesh();
-  for (int i = 0; i < mfem_mesh.GetNV(); ++i) {
-    auto* vertex = mfem_mesh.GetVertex(i);
-    const double xi = 2.0 * vertex[0] / length - 1.0;
-    vertex[1] += rise * (1.0 - xi * xi);
+  if (solver_name == "NewtonLineSearch") {
+    return NonlinearSolver::NewtonLineSearch;
+  }
+  if (solver_name == "TrustRegion") {
+    return NonlinearSolver::TrustRegion;
   }
+  if (solver_name == "PcgBlock") {
+    return NonlinearSolver::PcgBlock;
+  }
+
+  throw std::runtime_error("Unknown --solver value '" + solver_name +
+                           "'. Use NewtonLineSearch, TrustRegion, or PcgBlock.");
+}
 
-  mesh.mfemParMesh().DeleteGeometricFactors();
-  auto* nodes = mesh.mfemParMesh().GetNodes();
-  auto* coords = nodes->ReadWrite();
-  const int vdim = nodes->VectorDim();
-  const int scalar_size = nodes->Size() / vdim;
-
-  for (int i = 0; i < scalar_size; ++i) {
-    const double x = coords[i];
-    const double y = coords[i + scalar_size];
-    const double xi = 2.0 * x / length - 1.0;
-    coords[i + scalar_size] = y + rise * (1.0 - xi * xi);
+void parseCommandLine(int& argc, char** argv)
+{
+  int write_arg = 1;
+  for (int read_arg = 1; read_arg < argc; ++read_arg) {
+    const std::string arg = argv[read_arg];
+    if (arg.rfind("--solver=", 0) == 0) {
+      solver_name = arg.substr(std::string("--solver=").size());
+    } else if (arg.rfind("--print-level=", 0) == 0) {
+      print_level = std::stoi(arg.substr(std::string("--print-level=").size()));
+    } else if (arg.rfind("--pcg-block-len=", 0) == 0) {
+      pcg_block_len = std::stoi(arg.substr(std::string("--pcg-block-len=").size()));
+    } else if (arg.rfind("--pcg-powell-eta=", 0) == 0) {
+      pcg_powell_eta = std::stod(arg.substr(std::string("--pcg-powell-eta=").size()));
+    } else if (arg.rfind("--nonlinear-max-iterations=", 0) == 0) {
+      nonlinear_max_iterations = std::stoi(arg.substr(std::string("--nonlinear-max-iterations=").size()));
+    } else {
+      argv[write_arg] = argv[read_arg];
+      ++write_arg;
+    }
   }
+  argc = write_arg;
 }
 
 }  // namespace
 
-TEST(ShallowArchBuckling, NeoHookeanTractionControlled)
+TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough)
 {
   MPI_Barrier(MPI_COMM_WORLD);
 
   constexpr int p = 1;
   constexpr int dim = 2;
-  constexpr int nx = 48;
+  constexpr int nx = 96;
   constexpr int ny = 4;
 
   axom::sidre::DataStore datastore;
   smith::StateManager::initialize(datastore, "shallow_arch_buckling");
 
   auto mesh = std::make_shared<smith::Mesh>(
-      mfem::Mesh::MakeCartesian2D(nx, ny, mfem::Element::QUADRILATERAL, true, length, thickness), "arch_mesh", 0, 0);
-  warpToShallowArch(*mesh);
+      mfem::Mesh::MakeCartesian2D(nx, ny, mfem::Element::QUADRILATERAL, true, length, thickness),
+      "compressed_beam_mesh", 0, 0);
 
   mesh->addDomainOfBoundaryElements("left_end",
                                     [](std::vector<vec2> vertices, int) { return average(vertices)[0] < end_tol; });
   mesh->addDomainOfBoundaryElements(
       "right_end", [](std::vector<vec2> vertices, int) { return average(vertices)[0] > length - end_tol; });
-  mesh->addDomainOfBoundaryElements("top_face", [](std::vector<vec2>, int attr) { return attr == 3; });
+  mesh->addDomainOfBoundaryElements(
+      "top_face", [](std::vector<vec2> vertices, int) { return average(vertices)[1] > thickness - top_tol; });
+  EXPECT_GT(mesh->domain("left_end").total_elements(), 0);
+  EXPECT_GT(mesh->domain("right_end").total_elements(), 0);
   EXPECT_GT(mesh->domain("top_face").total_elements(), 0);
 
   smith::LinearSolverOptions linear_options{.linear_solver = LinearSolver::CG,
-                                            .preconditioner = Preconditioner::HypreAMG,
+                                            .preconditioner = Preconditioner::HypreJacobi,
                                             .relative_tol = 1.0e-8,
                                             .absolute_tol = 1.0e-14,
-                                            .max_iterations = 500,
+                                            .max_iterations = 10000,
                                             .print_level = 0};
 
-  smith::NonlinearSolverOptions nonlinear_options{.nonlin_solver = NonlinearSolver::PcgBlock,
+  smith::NonlinearSolverOptions nonlinear_options{.nonlin_solver = selectedNonlinearSolver(),
                                                   .relative_tol = 1.0e-8,
                                                   .absolute_tol = 1.0e-10,
-                                                  .max_iterations = 500,
-                                                  .print_level = 2,
-                                                  .pcg_block_len = 10,
+                                                  .max_iterations = nonlinear_max_iterations,
+                                                  .print_level = print_level,
+                                                  .pcg_block_len = pcg_block_len,
+                                                  .pcg_powell_eta = pcg_powell_eta,
                                                   .pcg_max_block_retries = 40};
 
   SolidMechanics<p, dim> solid(nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options,
-                               "shallow_arch", mesh);
+                               "compressed_beam", mesh);
 
   solid_mechanics::NeoHookean mat{.density = 1.0, .K = 100.0, .G = 10.0};
   solid.setMaterial(mat, mesh->entireBody());
   solid.setFixedBCs(mesh->domain("left_end"));
-  solid.setFixedBCs(mesh->domain("right_end"));
 
-  constexpr double final_traction = 0.2;
-  solid.setTraction([](auto, auto, double t) { return vec2{{0.0, -final_traction * t}}; }, mesh->domain("top_face"));
+  constexpr double final_compression = 0.2;
+  constexpr double seed_down_traction = 1.0e-5;
+  constexpr double final_snap_up_traction = 0.02;
+  solid.setDisplacementBCs([](auto, double t) { return vec2{{-final_compression * t, 0.0}}; },
+                           mesh->domain("right_end"), Component::X);
+  solid.setFixedBCs(mesh->domain("right_end"), Component::Y);
+  solid.setTraction(
+      [](auto, auto, double t) {
+        if (t < 0.5) {
+          return vec2{{0.0, -seed_down_traction * (t / 0.5)}};
+        }
+        const double snap_ramp = (t - 0.5) / 0.5;
+        return vec2{{0.0, -seed_down_traction * (1.0 - snap_ramp) + final_snap_up_traction * snap_ramp}};
+      },
+      mesh->domain("top_face"));
 
   solid.completeSetup();
   solid.outputStateToDisk("shallow_arch_buckling");
 
-  constexpr int num_steps = 40;
+  mfem::out << "Compressed thin beam snap-through run: solver = " << solver_name << '\n';
+
+  constexpr int num_steps = 20;
+  int num_converged_steps = 0;
   for (int step = 0; step < num_steps; ++step) {
-    EXPECT_NO_THROW(solid.advanceTimestep(1.0 / num_steps));
+    solid.advanceTimestep(1.0 / num_steps);
+    const auto& nonlinear_solver = solid.equationSolver().nonlinearSolver();
+    if (nonlinear_solver.GetConverged()) {
+      ++num_converged_steps;
+    }
+    mfem::out << "Load step " << step + 1 << "/" << num_steps << ": converged = " << nonlinear_solver.GetConverged()
+              << ", nonlinear iterations = " << nonlinear_solver.GetNumIterations()
+              << ", final relative residual = " << nonlinear_solver.GetFinalRelNorm() << '\n';
     solid.outputStateToDisk("shallow_arch_buckling");
+    if (const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics()) {
+      mfem::out << "  PCG diagnostics: residuals = " << diagnostics->num_residuals
+                << ", hess-vecs = " << diagnostics->num_hess_vecs
+                << ", preconditioner applications = " << diagnostics->num_preconds
+                << ", Jacobian assemblies = " << diagnostics->num_jacobian_assembles
+                << ", preconditioner updates = " << diagnostics->num_preconditioner_updates
+                << ", accepted blocks = " << diagnostics->num_blocks
+                << ", accepted steps = " << diagnostics->num_accepted_steps
+                << ", block rejects = " << diagnostics->num_block_rejects
+                << ", prefix accepts = " << diagnostics->num_prefix_accepts
+                << ", momentum resets = " << diagnostics->num_momentum_resets
+                << ", nonzero beta = " << diagnostics->num_nonzero_beta
+                << ", zero beta = " << diagnostics->num_zero_beta
+                << ", Powell restarts = " << diagnostics->num_powell_restarts
+                << ", descent restarts = " << diagnostics->num_descent_restarts
+                << ", negative curvature = " << diagnostics->num_negative_curvature
+                << ", trust capped steps = " << diagnostics->num_trust_capped_steps
+                << ", line-search backtracks = " << diagnostics->num_line_search_backtracks
+                << ", final h_scale = " << diagnostics->final_h_scale
+                << ", last trust ratio = " << diagnostics->last_trust_ratio << '\n';
+    }
+    if (!nonlinear_solver.GetConverged()) {
+      throw std::runtime_error("Nonlinear solve failed to converge at load step " + std::to_string(step + 1));
+    }
   }
 
-  const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics();
-  ASSERT_TRUE(diagnostics.has_value());
-  EXPECT_GT(diagnostics->num_accepted_steps, 0);
+  mfem::out << "Converged load steps: " << num_converged_steps << "/" << num_steps << '\n';
 }
 
 }  // namespace smith
 
 int main(int argc, char* argv[])
 {
+  smith::parseCommandLine(argc, argv);
   ::testing::InitGoogleTest(&argc, argv);
   smith::ApplicationManager applicationManager(argc, argv);
   return RUN_ALL_TESTS();

From 0f438f0a6ffcd1803dd3d619a1b964e70187cfb9 Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Thu, 30 Apr 2026 12:33:42 -0600
Subject: [PATCH 05/27] Trying to implement JacobianOperator abstraction.

---
 src/smith/numerics/equation_solver.cpp        |  31 ++++-
 src/smith/numerics/equation_solver.hpp        |  72 +++++++++++
 src/smith/numerics/functional/functional.hpp  |   3 +-
 .../tests/functional_comparisons.cpp          | 100 ++++++++++++++-
 .../numerics/tests/test_equationsolver.cpp    | 116 ++++++++++++++++++
 5 files changed, 317 insertions(+), 5 deletions(-)

diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index d6ccbe5b36..923f94c182 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -926,6 +926,10 @@ class PcgBlockSolver : public mfem::NewtonSolver {
   mutable size_t num_residuals = 0;
   /// Internal counter for matrix assembles
   mutable size_t num_jacobian_assembles = 0;
+  /// Internal counter for JacobianOperator evaluations
+  mutable size_t num_jacobian_operator_evals = 0;
+  /// Internal counter for direct diagonal assemblies
+  mutable size_t num_diagonal_assembles = 0;
   /// Internal counter for preconditioner operator updates
   mutable size_t num_preconditioner_updates = 0;
   /// Internal counter for accepted prefix blocks
@@ -961,6 +965,8 @@ class PcgBlockSolver : public mfem::NewtonSolver {
 
   /// Optional matrix-free tangent action, y = J(x) dx
   MatrixFreeTangentAction matrix_free_tangent_action;
+  /// Optional JacobianOperator factory
+  JacobianOperatorFactory jacobian_operator_factory;
 
 #ifdef MFEM_USE_MPI
   /// Constructor
@@ -997,12 +1003,23 @@ class PcgBlockSolver : public mfem::NewtonSolver {
     matrix_free_tangent_action = std::move(tangent_action);
   }
 
+  /// Set an optional JacobianOperator factory.
+  void setJacobianOperator(JacobianOperatorFactory jacobian_operator)
+  {
+    jacobian_operator_factory = std::move(jacobian_operator);
+  }
+
   /// Apply the tangent at x to dx.
   void hessVec(const mfem::Vector& x, const mfem::Vector& dx, mfem::Vector& y) const
   {
     SMITH_MARK_FUNCTION;
     ++num_hess_vecs;
-    if (matrix_free_tangent_action) {
+    if (jacobian_operator_factory) {
+      ++num_jacobian_operator_evals;
+      std::unique_ptr<JacobianOperator> jacobian_operator = jacobian_operator_factory(x);
+      SLIC_ERROR_ROOT_IF(!jacobian_operator, "JacobianOperator factory returned a null operator.");
+      jacobian_operator->Mult(dx, y);
+    } else if (matrix_free_tangent_action) {
       matrix_free_tangent_action(x, dx, y);
     } else {
       grad->Mult(dx, y);
@@ -1024,6 +1041,8 @@ class PcgBlockSolver : public mfem::NewtonSolver {
             .num_hess_vecs = num_hess_vecs,
             .num_preconds = num_preconds,
             .num_jacobian_assembles = num_jacobian_assembles,
+            .num_jacobian_operator_evals = num_jacobian_operator_evals,
+            .num_diagonal_assembles = num_diagonal_assembles,
             .num_preconditioner_updates = num_preconditioner_updates,
             .num_prefix_accepts = num_prefix_accepts,
             .num_momentum_resets = num_momentum_resets,
@@ -1056,6 +1075,8 @@ class PcgBlockSolver : public mfem::NewtonSolver {
     num_preconds = 0;
     num_residuals = 0;
     num_jacobian_assembles = 0;
+    num_jacobian_operator_evals = 0;
+    num_diagonal_assembles = 0;
     num_preconditioner_updates = 0;
     num_prefix_accepts = 0;
     num_momentum_resets = 0;
@@ -1498,6 +1519,14 @@ void EquationSolver::setMatrixFreeTangentAction(MatrixFreeTangentAction tangent_
   }
 }
 
+void EquationSolver::setJacobianOperator(JacobianOperatorFactory jacobian_operator)
+{
+  auto* pcg_block = dynamic_cast<PcgBlockSolver*>(nonlin_solver_.get());
+  if (pcg_block) {
+    pcg_block->setJacobianOperator(std::move(jacobian_operator));
+  }
+}
+
 void EquationSolver::solve(mfem::Vector& x) const
 {
   mfem::Vector zero(x);
diff --git a/src/smith/numerics/equation_solver.hpp b/src/smith/numerics/equation_solver.hpp
index 7e5882a74a..febc40754b 100644
--- a/src/smith/numerics/equation_solver.hpp
+++ b/src/smith/numerics/equation_solver.hpp
@@ -23,11 +23,63 @@
 #include "mfem.hpp"
 
 #include "smith/infrastructure/input.hpp"
+#include "smith/infrastructure/logger.hpp"
 #include "smith/numerics/solver_config.hpp"
 #include "smith/numerics/petsc_solvers.hpp"
 
 namespace smith {
 
+/**
+ * @brief Solver-facing interface for Jacobian operations.
+ *
+ * A JacobianOperator represents the operations available on J(x) after differentiating a residual but before
+ * necessarily assembling a sparse matrix. Concrete implementations may support matrix-free products, sparse assembly,
+ * diagonal extraction, or all of them. Unsupported operations should throw.
+ */
+class JacobianOperator : public mfem::Operator {
+ public:
+  using mfem::Operator::Operator;
+
+  /// Assemble the sparse Jacobian representation.
+  virtual std::unique_ptr<mfem::HypreParMatrix> assemble()
+  {
+    SLIC_ERROR("This JacobianOperator does not support sparse assembly.");
+    return nullptr;
+  }
+
+  /// Assemble the scalar true-dof diagonal of the Jacobian.
+  virtual void assembleDiagonal(mfem::Vector&) const
+  {
+    SLIC_ERROR("This JacobianOperator does not support diagonal assembly.");
+  }
+};
+
+/**
+ * @brief Adapter from a smith::functional Gradient object to the solver-facing JacobianOperator interface.
+ */
+template <typename Gradient>
+class FunctionalJacobianOperator : public JacobianOperator {
+ public:
+  explicit FunctionalJacobianOperator(Gradient& gradient)
+      : JacobianOperator(gradient.Height(), gradient.Width()), gradient_(gradient)
+  {
+  }
+
+  void Mult(const mfem::Vector& dx, mfem::Vector& y) const override { gradient_.Mult(dx, y); }
+
+  void AddMult(const mfem::Vector& dx, mfem::Vector& y, const double a = 1.0) const override
+  {
+    gradient_.AddMult(dx, y, a);
+  }
+
+  std::unique_ptr<mfem::HypreParMatrix> assemble() override { return gradient_.assemble(); }
+
+  void assembleDiagonal(mfem::Vector& diag) const override { gradient_.assembleDiagonal(diag); }
+
+ private:
+  Gradient& gradient_;
+};
+
 /**
  * @brief Matrix-free tangent action callback.
  *
@@ -36,6 +88,11 @@ namespace smith {
  */
 using MatrixFreeTangentAction = std::function<void(const mfem::Vector& x, const mfem::Vector& dx, mfem::Vector& y)>;
 
+/**
+ * @brief Callback that evaluates and returns a JacobianOperator at the supplied nonlinear state.
+ */
+using JacobianOperatorFactory = std::function<std::unique_ptr<JacobianOperator>(const mfem::Vector& x)>;
+
 /// Diagnostic counters for the nonlinear PCG-block solver
 struct PcgBlockDiagnostics {
   /// Number of nonlinear residual evaluations
@@ -46,6 +103,10 @@ struct PcgBlockDiagnostics {
   size_t num_preconds = 0;
   /// Number of assembled Jacobians
   size_t num_jacobian_assembles = 0;
+  /// Number of solver-facing JacobianOperator evaluations
+  size_t num_jacobian_operator_evals = 0;
+  /// Number of direct diagonal assemblies
+  size_t num_diagonal_assembles = 0;
   /// Number of preconditioner operator updates
   size_t num_preconditioner_updates = 0;
   /// Number of accepted prefix blocks
@@ -140,6 +201,17 @@ class EquationSolver {
    */
   void setMatrixFreeTangentAction(MatrixFreeTangentAction tangent_action);
 
+  /**
+   * @brief Sets an optional JacobianOperator factory for nonlinear solvers that can use matrix-free Jacobian products.
+   *
+   * This is the preferred replacement for the narrower matrix-free tangent-action callback. During migration,
+   * PCG-block uses this callback first when it is registered and otherwise falls back to MatrixFreeTangentAction or
+   * assembled gradients.
+   *
+   * @param[in] jacobian_operator Callback evaluating and returning J(x).
+   */
+  void setJacobianOperator(JacobianOperatorFactory jacobian_operator);
+
   /**
    * Solves the system F(x) = 0
    * @param[in,out] x Solution to the system of nonlinear equations
diff --git a/src/smith/numerics/functional/functional.hpp b/src/smith/numerics/functional/functional.hpp
index 730d4570d6..7e611182b8 100644
--- a/src/smith/numerics/functional/functional.hpp
+++ b/src/smith/numerics/functional/functional.hpp
@@ -912,6 +912,8 @@ class Functional<test(trials...), exec> {
       form_.P_test_->MultTranspose(local_diag, diag);
     }
 
+    void assembleDiagonal(mfem::Vector& diag) const { AssembleDiagonal(diag); }
+
     std::unique_ptr<mfem::HypreParMatrix> assemble()
     {
       if (row_ptr.empty()) {
@@ -1040,7 +1042,6 @@ class Functional<test(trials...), exec> {
     };
 
     friend auto assemble(Gradient& g) { return g.assemble(); }
-    friend void assemble_diagonal(const Gradient& g, mfem::Vector& diag) { g.AssembleDiagonal(diag); }
 
    private:
     /// @brief The "parent" @p Functional to calculate gradients with
diff --git a/src/smith/numerics/functional/tests/functional_comparisons.cpp b/src/smith/numerics/functional/tests/functional_comparisons.cpp
index 48a8638fb6..95a7d95d42 100644
--- a/src/smith/numerics/functional/tests/functional_comparisons.cpp
+++ b/src/smith/numerics/functional/tests/functional_comparisons.cpp
@@ -4,9 +4,10 @@
 //
 // SPDX-License-Identifier: (BSD-3-Clause)
 
+#include <algorithm>
+#include <chrono>
 #include <cstdlib>
 #include <iostream>
-#include <algorithm>
 #include <map>
 #include <memory>
 #include <set>
@@ -32,6 +33,8 @@
 using namespace smith;
 
 int nsamples = 1;  // because mfem doesn't take in unsigned int
+bool run_diagonal_benchmark = false;
+int diagonal_benchmark_samples = 5;
 
 constexpr bool verbose = false;
 std::unique_ptr<mfem::ParMesh> mesh2D;
@@ -185,7 +188,7 @@ void functional_test(mfem::ParMesh& mesh, H1<p> test, H1<p> trial, Dimension<dim
   std::unique_ptr<mfem::HypreParMatrix> J_func = assemble(drdU);
 
   mfem::Vector diag_direct(U.Size());
-  drdU.AssembleDiagonal(diag_direct);
+  drdU.assembleDiagonal(diag_direct);
 
   mfem::Vector diag_assembled(U.Size());
   J_func->GetDiag(diag_assembled);
@@ -311,7 +314,7 @@ void functional_test(mfem::ParMesh& mesh, H1<p, dim> test, H1<p, dim> trial, Dim
   std::unique_ptr<mfem::HypreParMatrix> J_func = assemble(drdU);
 
   mfem::Vector diag_direct(U.Size());
-  drdU.AssembleDiagonal(diag_direct);
+  drdU.assembleDiagonal(diag_direct);
 
   mfem::Vector diag_assembled(U.Size());
   J_func->GetDiag(diag_assembled);
@@ -479,6 +482,93 @@ TEST(Elasticity, 3DLinear) { functional_test(*mesh3D, H1<1, 3>{}, H1<1, 3>{}, Di
 TEST(Elasticity, 3DQuadratic) { functional_test(*mesh3D, H1<2, 3>{}, H1<2, 3>{}, Dimension<3>{}); }
 TEST(Elasticity, 3DCubic) { functional_test(*mesh3D, H1<3, 3>{}, H1<3, 3>{}, Dimension<3>{}); }
 
+namespace {
+
+template <typename Function>
+double time_on_slowest_rank(Function&& function)
+{
+  auto [num_ranks, rank] = smith::getMPIInfo();
+  (void)rank;
+  if (num_ranks > 1) {
+    MPI_Barrier(MPI_COMM_WORLD);
+  }
+
+  auto start = std::chrono::steady_clock::now();
+  function();
+  auto stop = std::chrono::steady_clock::now();
+
+  double elapsed =
+      std::chrono::duration_cast<std::chrono::duration<double>>(stop - start).count();
+  double max_elapsed = elapsed;
+  if (num_ranks > 1) {
+    MPI_Allreduce(&elapsed, &max_elapsed, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+  }
+  return max_elapsed;
+}
+
+}  // namespace
+
+TEST(Elasticity, DiagonalAssemblyBenchmark)
+{
+  if (!run_diagonal_benchmark) {
+    GTEST_SKIP() << "Set --run-diagonal-benchmark to time direct diagonal assembly.";
+  }
+
+  static constexpr int dim = 3;
+  using test_space = H1<2, dim>;
+  using trial_space = H1<2, dim>;
+
+  auto [fespace, fec] = smith::generateParFiniteElementSpace<test_space>(mesh3D.get());
+  (void)fec;
+
+  mfem::ParGridFunction u_global(fespace.get());
+  int seed = 9;
+  u_global.Randomize(seed);
+
+  mfem::Vector U(fespace->TrueVSize());
+  u_global.GetTrueDofs(U);
+
+  Functional<test_space(trial_space), exec_space> residual(fespace.get(), {fespace.get()});
+  Domain domain = EntireDomain(*mesh3D);
+  residual.AddDomainIntegral(Dimension<dim>{}, DependsOn<0>{}, StressFunctor<dim>{}, domain);
+
+  auto [r, drdU] = residual(0.0, differentiate_wrt(U));
+
+  mfem::Vector diag_direct(U.Size());
+  mfem::Vector diag_assembled(U.Size());
+  drdU.assembleDiagonal(diag_direct);
+  std::unique_ptr<mfem::HypreParMatrix> J_warmup = assemble(drdU);
+  J_warmup->GetDiag(diag_assembled);
+
+  const int samples = std::max(diagonal_benchmark_samples, 1);
+  double direct_time = time_on_slowest_rank([&]() {
+    for (int sample = 0; sample < samples; sample++) {
+      drdU.assembleDiagonal(diag_direct);
+    }
+  });
+
+  double sparse_time = time_on_slowest_rank([&]() {
+    for (int sample = 0; sample < samples; sample++) {
+      std::unique_ptr<mfem::HypreParMatrix> J = assemble(drdU);
+      J->GetDiag(diag_assembled);
+    }
+  });
+
+  mfem::Vector diag_diff(U.Size());
+  subtract(diag_direct, diag_assembled, diag_diff);
+  EXPECT_NEAR(0.0, diag_diff.Norml2() / diag_assembled.Norml2(), 1.e-14);
+
+  auto [num_ranks, rank] = smith::getMPIInfo();
+  (void)num_ranks;
+  if (rank == 0) {
+    std::cout << "DiagonalAssemblyBenchmark direct_seconds=" << direct_time / samples
+              << " sparse_getdiag_seconds=" << sparse_time / samples
+              << " speedup=" << sparse_time / direct_time << std::endl;
+  }
+
+  EXPECT_GT(sparse_time / direct_time, 5.0);
+}
+
 // TODO: reenable these once hcurl implements of simplex elements is finished
 // TEST(Hcurl, 2DLinear) { functional_test(*mesh2D, Hcurl<1>{}, Hcurl<1>{}, Dimension<2>{}); }
 // TEST(Hcurl, 2DQuadratic) { functional_test(*mesh2D, Hcurl<2>{}, Hcurl<2>{}, Dimension<2>{}); }
@@ -501,6 +591,10 @@ int main(int argc, char* argv[])
   args.AddOption(&serial_refinement, "-r", "--ref", "");
   args.AddOption(&parallel_refinement, "-pr", "--pref", "");
   args.AddOption(&nsamples, "-n", "--n-samples", "Samples per test");
+  args.AddOption(&run_diagonal_benchmark, "-rdb", "--run-diagonal-benchmark", "-sdb", "--skip-diagonal-benchmark",
+                 "Run direct diagonal vs sparse assemble+GetDiag timing benchmark.");
+  args.AddOption(&diagonal_benchmark_samples, "-dbs", "--diagonal-benchmark-samples",
+                 "Samples for the diagonal assembly benchmark.");
 
   args.Parse();
   if (!args.Good()) {
diff --git a/src/smith/numerics/tests/test_equationsolver.cpp b/src/smith/numerics/tests/test_equationsolver.cpp
index 6ee649716c..f0b73bf53e 100644
--- a/src/smith/numerics/tests/test_equationsolver.cpp
+++ b/src/smith/numerics/tests/test_equationsolver.cpp
@@ -138,6 +138,7 @@ TEST(EquationSolver, PcgBlockUsesMatrixFreeTangentAction)
   using trial_space = H1<p>;
 
   auto [fes, fec] = smith::generateParFiniteElementSpace<test_space>(&pmesh);
+  (void)fec;
 
   mfem::HypreParVector x_exact(fes.get());
   mfem::HypreParVector x_computed(fes.get());
@@ -205,6 +206,121 @@ TEST(EquationSolver, PcgBlockUsesMatrixFreeTangentAction)
   EXPECT_LT(x_computed.DistanceTo(x_exact.GetData()), 1.0e-10);
 }
 
+TEST(EquationSolver, PcgBlockUsesJacobianOperator)
+{
+  class MatrixJacobianOperator : public JacobianOperator {
+   public:
+    explicit MatrixJacobianOperator(std::unique_ptr<mfem::HypreParMatrix> matrix)
+        : JacobianOperator(matrix->Height(), matrix->Width()), matrix_(std::move(matrix))
+    {
+    }
+
+    void Mult(const mfem::Vector& dx, mfem::Vector& y) const override { matrix_->Mult(dx, y); }
+
+    std::unique_ptr<mfem::HypreParMatrix> assemble() override { return std::move(matrix_); }
+
+    void assembleDiagonal(mfem::Vector& diag) const override { matrix_->GetDiag(diag); }
+
+   private:
+    std::unique_ptr<mfem::HypreParMatrix> matrix_;
+  };
+
+  auto mesh = mfem::Mesh::MakeCartesian2D(1, 1, mfem::Element::QUADRILATERAL);
+  auto pmesh = mfem::ParMesh(MPI_COMM_WORLD, mesh);
+
+  pmesh.EnsureNodes();
+  pmesh.ExchangeFaceNbrData();
+
+  constexpr int p = 1;
+  constexpr int dim = 2;
+  using test_space = H1<p>;
+  using trial_space = H1<p>;
+
+  auto [fes, fec] = smith::generateParFiniteElementSpace<test_space>(&pmesh);
+  (void)fec;
+
+  mfem::HypreParVector x_exact(fes.get());
+  mfem::HypreParVector x_computed(fes.get());
+  x_exact.Randomize(0);
+  x_computed = 0.0;
+
+  std::unique_ptr<mfem::HypreParMatrix> J;
+
+  Functional<test_space(trial_space)> residual(fes.get(), {fes.get()});
+  Domain domain = EntireDomain(pmesh);
+  residual.AddDomainIntegral(
+      Dimension<dim>{}, DependsOn<0>{},
+      [](double /*t*/, auto, auto scalar) {
+        auto [u, du_dx] = scalar;
+        return smith::tuple{u, du_dx};
+      },
+      domain);
+
+  {
+    constexpr double time = 0.0;
+    auto [val, grad] = residual(time, differentiate_wrt(x_exact));
+    FunctionalJacobianOperator<decltype(grad)> jacobian_operator(grad);
+
+    mfem::Vector dx(x_exact.Size());
+    mfem::Vector y_grad(x_exact.Size());
+    mfem::Vector y_operator(x_exact.Size());
+    dx.Randomize(1);
+    grad.Mult(dx, y_grad);
+    jacobian_operator.Mult(dx, y_operator);
+
+    EXPECT_LT(y_operator.DistanceTo(y_grad.GetData()), 1.0e-14);
+  }
+
+  StdFunctionOperator residual_opr(
+      fes->TrueVSize(),
+      [&x_exact, &residual](const mfem::Vector& x, mfem::Vector& r) {
+        constexpr double time = 0.0;
+        r = residual(time, x);
+        r -= residual(time, x_exact);
+      },
+      [&residual, &J](const mfem::Vector& x) -> mfem::Operator& {
+        constexpr double time = 0.0;
+        auto [val, grad] = residual(time, differentiate_wrt(x));
+        J = assemble(grad);
+        return *J;
+      });
+
+  const LinearSolverOptions lin_opts = {.linear_solver = LinearSolver::CG,
+                                        .preconditioner = Preconditioner::HypreJacobi,
+                                        .relative_tol = 1.0e-12,
+                                        .absolute_tol = 1.0e-14,
+                                        .max_iterations = 500,
+                                        .print_level = 0};
+
+  const NonlinearSolverOptions nonlin_opts = {.nonlin_solver = NonlinearSolver::PcgBlock,
+                                              .relative_tol = 1.0e-12,
+                                              .absolute_tol = 1.0e-14,
+                                              .max_iterations = 500,
+                                              .print_level = 0};
+
+  EquationSolver eq_solver(nonlin_opts, lin_opts);
+  eq_solver.setOperator(residual_opr);
+
+  int num_operator_evals = 0;
+  eq_solver.setJacobianOperator([&residual, &num_operator_evals](const mfem::Vector& x) {
+    constexpr double time = 0.0;
+    auto [val, grad] = residual(time, differentiate_wrt(x));
+    ++num_operator_evals;
+    return std::make_unique<MatrixJacobianOperator>(assemble(grad));
+  });
+
+  eq_solver.solve(x_computed);
+
+  const auto diagnostics = eq_solver.pcgBlockDiagnostics();
+  ASSERT_TRUE(diagnostics.has_value());
+  EXPECT_GT(num_operator_evals, 0);
+  EXPECT_EQ(diagnostics->num_hess_vecs, static_cast<size_t>(num_operator_evals));
+  EXPECT_EQ(diagnostics->num_jacobian_operator_evals, static_cast<size_t>(num_operator_evals));
+  EXPECT_EQ(diagnostics->num_diagonal_assembles, 0u);
+  EXPECT_TRUE(eq_solver.nonlinearSolver().GetConverged());
+  EXPECT_LT(x_computed.DistanceTo(x_exact.GetData()), 1.0e-10);
+}
+
 /**
  * @brief Nonlinear solvers to test. Always includes NonlinearSolver::Newton and NonlinearSolver::LBFGS
  * If SMITH_USE_SUNDIALS is set, adds: NonlinearSolver::KINFullStep, NonlinearSolver::KINBacktrackingLineSearch, and

From d94f4b3d1e1d9c276f856bcc60f3b8ec49f09bb8 Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Thu, 30 Apr 2026 12:53:07 -0600
Subject: [PATCH 06/27] Implement JacobianOperator for weak form.

---
 src/smith/numerics/equation_solver.hpp        | 25 +++++++++++----
 src/smith/physics/dfem_weak_form.hpp          | 12 +++++++
 src/smith/physics/functional_weak_form.hpp    | 21 ++++++++++++
 .../tests/test_functional_weak_form.cpp       | 32 +++++++++++++++++++
 src/smith/physics/weak_form.hpp               | 18 +++++++++++
 5 files changed, 101 insertions(+), 7 deletions(-)

diff --git a/src/smith/numerics/equation_solver.hpp b/src/smith/numerics/equation_solver.hpp
index febc40754b..8d67cc64a5 100644
--- a/src/smith/numerics/equation_solver.hpp
+++ b/src/smith/numerics/equation_solver.hpp
@@ -16,6 +16,7 @@
 #include <functional>
 #include <memory>
 #include <optional>
+#include <type_traits>
 #include <variant>
 #include <utility>
 
@@ -59,25 +60,35 @@ class JacobianOperator : public mfem::Operator {
  */
 template <typename Gradient>
 class FunctionalJacobianOperator : public JacobianOperator {
+  using GradientT = std::remove_reference_t<Gradient>;
+
  public:
-  explicit FunctionalJacobianOperator(Gradient& gradient)
-      : JacobianOperator(gradient.Height(), gradient.Width()), gradient_(gradient)
+  explicit FunctionalJacobianOperator(GradientT& gradient)
+      : JacobianOperator(gradient.Height(), gradient.Width()), gradient_(&gradient)
+  {
+  }
+
+  explicit FunctionalJacobianOperator(GradientT&& gradient)
+      : JacobianOperator(gradient.Height(), gradient.Width()),
+        owned_gradient_(std::make_unique<GradientT>(std::move(gradient))),
+        gradient_(owned_gradient_.get())
   {
   }
 
-  void Mult(const mfem::Vector& dx, mfem::Vector& y) const override { gradient_.Mult(dx, y); }
+  void Mult(const mfem::Vector& dx, mfem::Vector& y) const override { gradient_->Mult(dx, y); }
 
   void AddMult(const mfem::Vector& dx, mfem::Vector& y, const double a = 1.0) const override
   {
-    gradient_.AddMult(dx, y, a);
+    gradient_->AddMult(dx, y, a);
   }
 
-  std::unique_ptr<mfem::HypreParMatrix> assemble() override { return gradient_.assemble(); }
+  std::unique_ptr<mfem::HypreParMatrix> assemble() override { return gradient_->assemble(); }
 
-  void assembleDiagonal(mfem::Vector& diag) const override { gradient_.assembleDiagonal(diag); }
+  void assembleDiagonal(mfem::Vector& diag) const override { gradient_->assembleDiagonal(diag); }
 
  private:
-  Gradient& gradient_;
+  std::unique_ptr<GradientT> owned_gradient_;
+  GradientT* gradient_;
 };
 
 /**
diff --git a/src/smith/physics/dfem_weak_form.hpp b/src/smith/physics/dfem_weak_form.hpp
index 83a55d6ddd..f55598039c 100644
--- a/src/smith/physics/dfem_weak_form.hpp
+++ b/src/smith/physics/dfem_weak_form.hpp
@@ -213,6 +213,18 @@ class DfemWeakForm : public WeakForm {
     return std::make_unique<mfem::HypreParMatrix>();
   }
 
+  /// @overload
+  std::unique_ptr<JacobianOperator> jacobianOperator(
+      TimeInfo time_info, ConstFieldPtr /*shape_disp*/, const std::vector<ConstFieldPtr>& /*fields*/,
+      size_t /*input_col*/, const std::vector<ConstQuadratureFieldPtr>& /*quad_fields*/ = {}) const override
+  {
+    SLIC_ERROR_ROOT("DfemWeakForm does not support JacobianOperator construction");
+    dt_ = time_info.dt();
+    cycle_ = time_info.cycle();
+
+    return nullptr;
+  }
+
   /// @overload
   void jvp(TimeInfo time_info, ConstFieldPtr /*shape_disp*/, const std::vector<ConstFieldPtr>& /*fields*/,
            const std::vector<ConstQuadratureFieldPtr>& /*quad_fields*/, ConstFieldPtr /*v_shape_disp*/,
diff --git a/src/smith/physics/functional_weak_form.hpp b/src/smith/physics/functional_weak_form.hpp
index 18c292dcab..8e99e71afc 100644
--- a/src/smith/physics/functional_weak_form.hpp
+++ b/src/smith/physics/functional_weak_form.hpp
@@ -15,6 +15,7 @@
 
 #include "smith/physics/weak_form.hpp"
 #include "smith/physics/mesh.hpp"
+#include "smith/numerics/equation_solver.hpp"
 #include "smith/numerics/functional/shape_aware_functional.hpp"
 #include "smith/physics/state/finite_element_state.hpp"
 #include "smith/physics/state/finite_element_dual.hpp"
@@ -331,6 +332,26 @@ class FunctionalWeakForm<spatial_dim, OutputSpace, Parameters<InputSpaces...>,
     return J;
   }
 
+  /// @overload
+  std::unique_ptr<JacobianOperator> jacobianOperator(
+      TimeInfo time_info, ConstFieldPtr shape_disp, const std::vector<ConstFieldPtr>& fields, size_t input_col,
+      [[maybe_unused]] const std::vector<ConstQuadratureFieldPtr>& quad_fields = {}) const override
+  {
+    SLIC_ERROR_IF(input_col >= fields.size(), "Invalid JacobianOperator input column.");
+
+    dt_ = time_info.dt();
+    cycle_ = time_info.cycle();
+
+    auto jacs = jacobianFunctions(std::make_integer_sequence<int, sizeof...(input_indices)>{}, time_info.time(),
+                                  shape_disp, fields);
+    auto K = smith::get<DERIVATIVE>(jacs[input_col](time_info.time(), shape_disp, fields));
+
+    SLIC_ERROR_IF(K.Height() != K.Width(),
+                  "WeakForm::jacobianOperator currently supports square one-field derivatives only.");
+
+    return std::make_unique<FunctionalJacobianOperator<decltype(K)>>(std::move(K));
+  }
+
   /// @overload
   void jvp(TimeInfo time_info, ConstFieldPtr shape_disp, const std::vector<ConstFieldPtr>& fields,
            [[maybe_unused]] const std::vector<ConstQuadratureFieldPtr>& quad_fields,
diff --git a/src/smith/physics/tests/test_functional_weak_form.cpp b/src/smith/physics/tests/test_functional_weak_form.cpp
index 61ea04e68d..0dc318ab82 100644
--- a/src/smith/physics/tests/test_functional_weak_form.cpp
+++ b/src/smith/physics/tests/test_functional_weak_form.cpp
@@ -247,6 +247,38 @@ TEST_F(WeakFormFixture, JvpConsistency)
   }
 }
 
+TEST_F(WeakFormFixture, JacobianOperatorConsistency)
+{
+  auto input_fields = getConstFieldPointers(states, params);
+  auto field_tangents = getConstFieldPointers(state_tangents, param_tangents);
+
+  std::vector<double> jacobian_weights(input_fields.size());
+  jacobian_weights[DISP] = 1.0;
+
+  auto J = weak_form->jacobian(time_info, shape_disp.get(), input_fields, jacobian_weights);
+  auto J_op = weak_form->jacobianOperator(time_info, shape_disp.get(), input_fields, DISP);
+
+  smith::FiniteElementDual jvp_slow(states[DISP].space(), "jvp_slow");
+  smith::FiniteElementDual jvp_op(states[DISP].space(), "jvp_op");
+  J->Mult(*field_tangents[DISP], jvp_slow);
+  J_op->Mult(*field_tangents[DISP], jvp_op);
+  EXPECT_NEAR(jvp_slow.Norml2(), jvp_op.Norml2(), 1e-12);
+
+  std::unique_ptr<mfem::HypreParMatrix> J_op_assembled = J_op->assemble();
+  smith::FiniteElementDual jvp_op_assembled(states[DISP].space(), "jvp_op_assembled");
+  J_op_assembled->Mult(*field_tangents[DISP], jvp_op_assembled);
+  EXPECT_NEAR(jvp_slow.Norml2(), jvp_op_assembled.Norml2(), 1e-12);
+
+  mfem::Vector diag_direct(J_op->Height());
+  mfem::Vector diag_assembled(J->Height());
+  J_op->assembleDiagonal(diag_direct);
+  J->GetDiag(diag_assembled);
+
+  mfem::Vector diag_diff(diag_direct.Size());
+  subtract(diag_direct, diag_assembled, diag_diff);
+  EXPECT_NEAR(0.0, diag_diff.Norml2() / diag_assembled.Norml2(), 1.e-14);
+}
+
 int main(int argc, char* argv[])
 {
   ::testing::InitGoogleTest(&argc, argv);
diff --git a/src/smith/physics/weak_form.hpp b/src/smith/physics/weak_form.hpp
index 2e766de4b3..1675545ba2 100644
--- a/src/smith/physics/weak_form.hpp
+++ b/src/smith/physics/weak_form.hpp
@@ -12,6 +12,7 @@
 
 #pragma once
 
+#include <cstddef>
 #include <vector>
 #include <string>
 #include <memory>
@@ -25,6 +26,7 @@ class HypreParMatrix;
 
 namespace smith {
 
+class JacobianOperator;
 class FiniteElementState;
 class FiniteElementDual;
 
@@ -69,6 +71,22 @@ class WeakForm {
       const std::vector<double>& field_argument_tangents,
       const std::vector<ConstQuadratureFieldPtr>& quad_fields = {}) const = 0;
 
+  /** @brief Derivative of the residual with respect to one field argument as a solver-facing JacobianOperator.
+   *
+   * The returned operator represents one derivative column, d{r}/d{fields}_field_argument_index. The first supported
+   * use case is the square solved-field derivative used by PCG-block tangent products and diagonal extraction.
+   *
+   * @param time_info time and timestep information
+   * @param shape_disp smith::FiniteElementState*, change in model coordinates relative to the initially read in mesh
+   * @param fields vector of smith::FiniteElementState*
+   * @param field_argument_index field argument to differentiate with respect to
+   * @param quad_fields vector of ConstQuadratureFieldPtr
+   * @return std::unique_ptr<JacobianOperator> returns d{r}/d{fields}_field_argument_index
+   */
+  virtual std::unique_ptr<JacobianOperator> jacobianOperator(
+      TimeInfo time_info, ConstFieldPtr shape_disp, const std::vector<ConstFieldPtr>& fields,
+      size_t field_argument_index, const std::vector<ConstQuadratureFieldPtr>& quad_fields = {}) const = 0;
+
   /**
    * @brief Jacobian-vector product, will overwrite any existing values in jvp_reactions
    * @param time_info time and timestep information

From 3b6a6adf2b066d72b93f0d8bf2940455c3d4740c Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Thu, 30 Apr 2026 12:54:01 -0600
Subject: [PATCH 07/27] style.

---
 .../numerics/functional/tests/functional_comparisons.cpp   | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/smith/numerics/functional/tests/functional_comparisons.cpp b/src/smith/numerics/functional/tests/functional_comparisons.cpp
index 95a7d95d42..031bb56ee5 100644
--- a/src/smith/numerics/functional/tests/functional_comparisons.cpp
+++ b/src/smith/numerics/functional/tests/functional_comparisons.cpp
@@ -497,8 +497,7 @@ double time_on_slowest_rank(Function&& function)
   function();
   auto stop = std::chrono::steady_clock::now();
 
-  double elapsed =
-      std::chrono::duration_cast<std::chrono::duration<double>>(stop - start).count();
+  double elapsed = std::chrono::duration_cast<std::chrono::duration<double>>(stop - start).count();
   double max_elapsed = elapsed;
   if (num_ranks > 1) {
     MPI_Allreduce(&elapsed, &max_elapsed, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
@@ -562,8 +561,8 @@ TEST(Elasticity, DiagonalAssemblyBenchmark)
   (void)num_ranks;
   if (rank == 0) {
     std::cout << "DiagonalAssemblyBenchmark direct_seconds=" << direct_time / samples
-              << " sparse_getdiag_seconds=" << sparse_time / samples
-              << " speedup=" << sparse_time / direct_time << std::endl;
+              << " sparse_getdiag_seconds=" << sparse_time / samples << " speedup=" << sparse_time / direct_time
+              << std::endl;
   }
 
   EXPECT_GT(sparse_time / direct_time, 5.0);

From 538cc489e10f9e8148a96f67288d015cdafc2175 Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Fri, 8 May 2026 09:13:08 -0600
Subject: [PATCH 08/27] Working on various potential improvements to the
 trust-region solver.

---
 src/smith/numerics/CMakeLists.txt             |    4 +-
 src/smith/numerics/equation_solver.cpp        | 1205 +++++++++++++++--
 src/smith/numerics/equation_solver.hpp        |  186 +++
 .../numerics/mfem_trust_region_subspace.cpp   |  589 ++++++++
 ...er.cpp => petsc_trust_region_subspace.cpp} |  150 +-
 src/smith/numerics/solver_config.hpp          |   24 +
 src/smith/numerics/tests/CMakeLists.txt       |    3 +-
 .../numerics/tests/test_equationsolver.cpp    |    7 +-
 .../tests/test_trust_region_solver_mfem.cpp   |  500 +++++++
 ...cpp => test_trust_region_solver_petsc.cpp} |   78 +-
 .../numerics/trust_region_cubic_subspace.cpp  |  461 +++++++
 src/smith/numerics/trust_region_solver.hpp    |   71 +-
 src/smith/physics/solid_mechanics.hpp         |  115 ++
 .../physics/tests/shallow_arch_buckling.cpp   |  253 +++-
 14 files changed, 3344 insertions(+), 302 deletions(-)
 create mode 100644 src/smith/numerics/mfem_trust_region_subspace.cpp
 rename src/smith/numerics/{trust_region_solver.cpp => petsc_trust_region_subspace.cpp} (64%)
 create mode 100644 src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
 rename src/smith/numerics/tests/{test_trust_region_solver.cpp => test_trust_region_solver_petsc.cpp} (62%)
 create mode 100644 src/smith/numerics/trust_region_cubic_subspace.cpp

diff --git a/src/smith/numerics/CMakeLists.txt b/src/smith/numerics/CMakeLists.txt
index 6df0f7eb12..e8c767394d 100644
--- a/src/smith/numerics/CMakeLists.txt
+++ b/src/smith/numerics/CMakeLists.txt
@@ -19,7 +19,9 @@ set(numerics_headers
 
 set(numerics_sources
     equation_solver.cpp
-    trust_region_solver.cpp
+    petsc_trust_region_subspace.cpp
+    mfem_trust_region_subspace.cpp
+    trust_region_cubic_subspace.cpp
     odes.cpp
     petsc_solvers.cpp
     block_preconditioner.cpp
diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index 923f94c182..29d9af1e3f 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -6,6 +6,7 @@
 
 #include "smith/numerics/equation_solver.hpp"
 
+#include <chrono>
 #include <cstdlib>
 #include <functional>
 #include <iomanip>
@@ -16,6 +17,7 @@
 #include <limits>
 #include <string>
 #include <tuple>
+#include <utility>
 
 #include "smith/smith_config.hpp"
 #include "smith/infrastructure/profiling.hpp"
@@ -24,6 +26,17 @@
 
 namespace smith {
 
+namespace {
+
+using Clock = std::chrono::steady_clock;
+
+double secondsSince(Clock::time_point start)
+{
+  return std::chrono::duration_cast<std::chrono::duration<double>>(Clock::now() - start).count();
+}
+
+}  // namespace
+
 /// Newton solver with a 2-way line-search.  Reverts to regular Newton if max_line_search_iterations is set to 0.
 class NewtonSolver : public mfem::NewtonSolver {
  protected:
@@ -251,11 +264,22 @@ struct TrustRegionResults {
     H_z.SetSize(size);
     d_old.SetSize(size);
     H_d_old.SetSize(size);
+    H_d_old_at_accept.SetSize(size);
     d.SetSize(size);
     H_d.SetSize(size);
     Pr.SetSize(size);
     cauchy_point.SetSize(size);
     H_cauchy_point.SetSize(size);
+    z = 0.0;
+    H_z = 0.0;
+    d_old = 0.0;
+    H_d_old = 0.0;
+    H_d_old_at_accept = 0.0;
+    d = 0.0;
+    H_d = 0.0;
+    Pr = 0.0;
+    cauchy_point = 0.0;
+    H_cauchy_point = 0.0;
   }
 
   /// resets trust region results for a new outer iteration
@@ -282,6 +306,10 @@ struct TrustRegionResults {
   mfem::Vector d_old;
   /// action of hessian on previous step z_old
   mfem::Vector H_d_old;
+  /// action of previous accepted hessian on previous step z_old
+  mfem::Vector H_d_old_at_accept;
+  /// true after at least one accepted line-search step has populated d_old
+  bool has_d_old = false;
   /// incrementalCG direction
   mfem::Vector d;
   /// action of hessian on direction d
@@ -299,9 +327,9 @@ struct TrustRegionResults {
 };
 
 /// trust region printing utility function
-void printTrustRegionInfo(double realObjective, double modelObjective, size_t cgIters, double trSize, bool willAccept)
+void printTrustRegionInfo(double realWork, double modelObjective, size_t cgIters, double trSize, bool willAccept)
 {
-  mfem::out << "real energy = " << std::setw(13) << realObjective << ", model energy = " << std::setw(13)
+  mfem::out << "real work = " << std::setw(13) << realWork << ", model energy = " << std::setw(13)
             << modelObjective << ", cg iter = " << std::setw(7) << cgIters << ", next tr size = " << std::setw(8)
             << trSize << ", accepting = " << willAccept << std::endl;
 }
@@ -327,6 +355,14 @@ class TrustRegion : public mfem::NewtonSolver {
   mutable std::vector<std::shared_ptr<mfem::Vector>> left_mosts;
   /// the action of the stiffness/hessian (H) on the left most eigenvectors
   mutable std::vector<std::shared_ptr<mfem::Vector>> H_left_mosts;
+  /// previous accepted-iteration Hessian actions on the retained left most eigenvectors
+  mutable std::vector<std::shared_ptr<mfem::Vector>> previous_H_left_mosts;
+  /// accepted TrustRegion steps, newest first
+  mutable std::vector<std::shared_ptr<mfem::Vector>> accepted_step_history;
+  /// initial state for this nonlinear solve, used as an optional history direction
+  mutable mfem::Vector solve_start_x;
+  mutable mfem::Vector min_residual_x;
+  mutable double min_residual_norm = -1.0;
 
   /// nonlinear solution options
   NonlinearSolverOptions nonlinear_options;
@@ -343,14 +379,148 @@ class TrustRegion : public mfem::NewtonSolver {
  public:
   /// internal counter for hess-vecs
   mutable size_t num_hess_vecs = 0;
+  /// internal counter for model CG hess-vecs
+  mutable size_t num_model_hess_vecs = 0;
+  /// internal counter for Cauchy-point hess-vecs
+  mutable size_t num_cauchy_hess_vecs = 0;
+  /// internal counter for line-search hess-vecs
+  mutable size_t num_line_search_hess_vecs = 0;
   /// internal counter for preconditions
   mutable size_t num_preconds = 0;
   /// internal counter for residuals
   mutable size_t num_residuals = 0;
   /// internal counter for subspace solves
   mutable size_t num_subspace_solves = 0;
+  /// internal counter for retained-leftmost Hessian-vector products used by subspace solves
+  mutable size_t num_subspace_leftmost_hess_vecs = 0;
+  /// internal counter for batched Hessian-vector groups used by subspace solves
+  mutable size_t num_subspace_hess_vec_batches = 0;
+  /// internal counter for Hessian-vector products inside subspace batches
+  mutable size_t num_subspace_batched_hess_vecs = 0;
+  /// internal counter for accepted-step history vectors added to subspace solves
+  mutable size_t num_subspace_past_step_vectors = 0;
+  /// internal counter for accepted-step history Hessian-vector products
+  mutable size_t num_subspace_past_step_hess_vecs = 0;
+  /// internal counter for nonlinear-solve-start directions added to subspace solves
+  mutable size_t num_subspace_solve_start_vectors = 0;
+  /// internal counter for nonlinear-solve-start Hessian-vector products
+  mutable size_t num_subspace_solve_start_hess_vecs = 0;
+  /// internal counter for quadratic subspace backend solves
+  mutable size_t num_quadratic_subspace_solves = 0;
+  /// internal counter for cubic subspace backend attempts
+  mutable size_t num_cubic_subspace_attempts = 0;
+  /// internal counter for cubic subspace candidates used
+  mutable size_t num_cubic_subspace_uses = 0;
+  /// internal counter for cubic attempts that returned quadratic candidate
+  mutable size_t num_cubic_subspace_quadratic_fallbacks = 0;
   /// internal counter for matrix assembles
   mutable size_t num_jacobian_assembles = 0;
+  /// internal counter for JacobianOperator evaluations
+  mutable size_t num_jacobian_operator_evals = 0;
+  /// internal counter for direct diagonal assemblies
+  mutable size_t num_diagonal_assembles = 0;
+  /// internal counter for model CG iterations
+  mutable size_t num_cg_iterations = 0;
+  /// internal counter for preconditioner operator updates
+  mutable size_t num_preconditioner_updates = 0;
+  /// internal counter for nonmonotone accepted steps
+  mutable size_t num_nonmonotone_work_accepts = 0;
+  /// internal counter for accepted steps that monotone acceptance would reject
+  mutable size_t num_monotone_work_would_reject = 0;
+  /// time spent evaluating residuals
+  mutable double residual_seconds = 0.0;
+  /// time spent applying Hessian-vector products
+  mutable double hess_vec_seconds = 0.0;
+  /// time spent applying model CG Hessian-vector products
+  mutable double model_hess_vec_seconds = 0.0;
+  /// time spent applying Cauchy-point Hessian-vector products
+  mutable double cauchy_hess_vec_seconds = 0.0;
+  /// time spent applying line-search Hessian-vector products
+  mutable double line_search_hess_vec_seconds = 0.0;
+  /// time spent applying JacobianOperator Hessian-vector products
+  mutable double jacobian_operator_hess_vec_seconds = 0.0;
+  /// time spent evaluating JacobianOperator factories
+  mutable double jacobian_operator_eval_seconds = 0.0;
+  /// time spent directly assembling diagonals
+  mutable double diagonal_assembly_seconds = 0.0;
+  /// time spent inverting direct diagonals
+  mutable double diagonal_invert_seconds = 0.0;
+  /// time spent applying preconditioners
+  mutable double preconditioner_seconds = 0.0;
+  /// total time spent in the nonlinear solve
+  mutable double total_seconds = 0.0;
+  /// time spent solving trust-region model problems
+  mutable double model_solve_seconds = 0.0;
+  /// total time spent in trust-region subspace solves
+  mutable double subspace_seconds = 0.0;
+  /// time spent building retained leftmost subspace directions
+  mutable double subspace_leftmost_seconds = 0.0;
+  /// time spent in subspace Hessian-vector batches
+  mutable double subspace_hess_vec_batch_seconds = 0.0;
+  /// time spent removing dependent directions for subspace solves
+  mutable double subspace_filter_seconds = 0.0;
+  /// time spent in dense subspace backend assembly/solve work
+  mutable double subspace_backend_seconds = 0.0;
+  /// time spent in subspace postprocessing and model-energy comparison
+  mutable double subspace_finalize_seconds = 0.0;
+  /// time spent building the Cauchy point
+  mutable double cauchy_point_seconds = 0.0;
+  /// time spent constructing dogleg steps
+  mutable double dogleg_seconds = 0.0;
+  /// time spent in line-search and trust-radius acceptance logic
+  mutable double line_search_seconds = 0.0;
+  /// time spent in dot products
+  mutable double dot_seconds = 0.0;
+  /// number of dot products
+  mutable size_t num_dot_products = 0;
+  /// number of dot product batches/reductions
+  mutable size_t num_dot_reductions = 0;
+  /// number of dot products in trust-region model solves
+  mutable size_t num_model_dot_products = 0;
+  /// number of dot products in Cauchy-point construction
+  mutable size_t num_cauchy_dot_products = 0;
+  /// number of dot products in dogleg construction
+  mutable size_t num_dogleg_dot_products = 0;
+  /// number of dot products in line-search and acceptance logic
+  mutable size_t num_line_search_dot_products = 0;
+  /// number of setup dot products outside the main per-step kernels
+  mutable size_t num_setup_dot_products = 0;
+  /// time spent in trust-region model-solve dot products
+  mutable double model_dot_seconds = 0.0;
+  /// time spent in Cauchy-point dot products
+  mutable double cauchy_dot_seconds = 0.0;
+  /// time spent in dogleg dot products
+  mutable double dogleg_dot_seconds = 0.0;
+  /// time spent in line-search dot products
+  mutable double line_search_dot_seconds = 0.0;
+  /// time spent in setup dot products
+  mutable double setup_dot_seconds = 0.0;
+  /// time spent in vector add/update operations
+  mutable double vector_update_seconds = 0.0;
+  /// time spent in vector copies and scaling operations
+  mutable double vector_copy_scale_seconds = 0.0;
+  /// time spent in boundary projection operations
+  mutable double projection_seconds = 0.0;
+  /// time spent assembling Jacobians
+  mutable double jacobian_assembly_seconds = 0.0;
+  /// time spent refreshing preconditioners
+  mutable double preconditioner_update_seconds = 0.0;
+  /// time spent in preconditioner SetOperator calls
+  mutable double preconditioner_setup_seconds = 0.0;
+  /// current accumulated actual work-surrogate level for nonmonotone acceptance
+  mutable double current_work_objective = 0.0;
+  /// last nonmonotone reference work surrogate
+  mutable double last_nonmonotone_work_reference = 0.0;
+  /// Optional JacobianOperator factory
+  JacobianOperatorFactory jacobian_operator_factory;
+  /// Cached JacobianOperator for current TrustRegion iteration
+  mutable std::unique_ptr<JacobianOperator> current_jacobian_operator;
+  /// Inverted scalar diagonal preconditioner for JacobianOperator mode
+  mutable mfem::Vector inverse_diagonal_preconditioner;
+  /// Current assembled Hessian clone used to preserve a valid previous Hessian
+  mutable std::unique_ptr<mfem::Operator> current_hessian;
+  /// Previous assembled Hessian used for cubic finite-difference subspace models
+  mutable std::unique_ptr<mfem::Operator> previous_hessian;
 
 #ifdef MFEM_USE_MPI
   /// constructor
@@ -361,15 +531,211 @@ class TrustRegion : public mfem::NewtonSolver {
   }
 #endif
 
+  /// Timed dot product with global and grouped accounting.
+  double timedDot(const mfem::Vector& a, const mfem::Vector& b, size_t& group_count, double& group_seconds) const
+  {
+    auto start = Clock::now();
+    const double value = Dot(a, b);
+    const double seconds = secondsSince(start);
+    ++num_dot_products;
+    ++num_dot_reductions;
+    ++group_count;
+    dot_seconds += seconds;
+    group_seconds += seconds;
+    return value;
+  }
+
+  /// Timed pair of dot products with one local vector pass and one MPI reduction when possible.
+  std::pair<double, double> timedDot2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
+                                      const mfem::Vector& b1, size_t& group_count, double& group_seconds) const
+  {
+    if (dot_oper) {
+      return {timedDot(a0, b0, group_count, group_seconds), timedDot(a1, b1, group_count, group_seconds)};
+    }
+
+    MFEM_ASSERT(a0.Size() == b0.Size(), "Incompatible vector sizes.");
+    MFEM_ASSERT(a1.Size() == b1.Size(), "Incompatible vector sizes.");
+
+    auto start = Clock::now();
+    mfem::real_t products[2] = {0.0, 0.0};
+    if (a0.Size() == a1.Size()) {
+      for (int i = 0; i < a0.Size(); ++i) {
+        products[0] += a0[i] * b0[i];
+        products[1] += a1[i] * b1[i];
+      }
+    } else {
+      for (int i = 0; i < a0.Size(); ++i) {
+        products[0] += a0[i] * b0[i];
+      }
+      for (int i = 0; i < a1.Size(); ++i) {
+        products[1] += a1[i] * b1[i];
+      }
+    }
+
+#ifdef MFEM_USE_MPI
+    const MPI_Comm dot_comm = GetComm();
+    if (dot_comm != MPI_COMM_NULL) {
+      mfem::real_t global_products[2] = {0.0, 0.0};
+      MPI_Allreduce(products, global_products, 2, MFEM_MPI_REAL_T, MPI_SUM, dot_comm);
+      products[0] = global_products[0];
+      products[1] = global_products[1];
+    }
+#endif
+
+    const double seconds = secondsSince(start);
+    num_dot_products += 2;
+    ++num_dot_reductions;
+    group_count += 2;
+    dot_seconds += seconds;
+    group_seconds += seconds;
+    return {products[0], products[1]};
+  }
+
+  struct Dot4Result {
+    double v0 = 0.0;
+    double v1 = 0.0;
+    double v2 = 0.0;
+    double v3 = 0.0;
+  };
+
+  /// Timed four-dot batch with one local vector pass and one MPI reduction when possible.
+  Dot4Result timedDot4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, const mfem::Vector& b1,
+                       const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3,
+                       const mfem::Vector& b3, size_t& group_count, double& group_seconds) const
+  {
+    if (dot_oper) {
+      return {.v0 = timedDot(a0, b0, group_count, group_seconds),
+              .v1 = timedDot(a1, b1, group_count, group_seconds),
+              .v2 = timedDot(a2, b2, group_count, group_seconds),
+              .v3 = timedDot(a3, b3, group_count, group_seconds)};
+    }
+
+    MFEM_ASSERT(a0.Size() == b0.Size(), "Incompatible vector sizes.");
+    MFEM_ASSERT(a1.Size() == b1.Size(), "Incompatible vector sizes.");
+    MFEM_ASSERT(a2.Size() == b2.Size(), "Incompatible vector sizes.");
+    MFEM_ASSERT(a3.Size() == b3.Size(), "Incompatible vector sizes.");
+    MFEM_ASSERT(a0.Size() == a1.Size() && a0.Size() == a2.Size() && a0.Size() == a3.Size(),
+                "timedDot4 currently requires equal vector sizes.");
+
+    auto start = Clock::now();
+    mfem::real_t products[4] = {0.0, 0.0, 0.0, 0.0};
+    for (int i = 0; i < a0.Size(); ++i) {
+      products[0] += a0[i] * b0[i];
+      products[1] += a1[i] * b1[i];
+      products[2] += a2[i] * b2[i];
+      products[3] += a3[i] * b3[i];
+    }
+
+#ifdef MFEM_USE_MPI
+    const MPI_Comm dot_comm = GetComm();
+    if (dot_comm != MPI_COMM_NULL) {
+      mfem::real_t global_products[4] = {0.0, 0.0, 0.0, 0.0};
+      MPI_Allreduce(products, global_products, 4, MFEM_MPI_REAL_T, MPI_SUM, dot_comm);
+      for (int i = 0; i < 4; ++i) {
+        products[i] = global_products[i];
+      }
+    }
+#endif
+
+    const double seconds = secondsSince(start);
+    num_dot_products += 4;
+    ++num_dot_reductions;
+    group_count += 4;
+    dot_seconds += seconds;
+    group_seconds += seconds;
+    return {.v0 = products[0], .v1 = products[1], .v2 = products[2], .v3 = products[3]};
+  }
+
+  template <typename HessVecFunc>
+  void batchedSubspaceHessVec(HessVecFunc hess_vec_func, const std::vector<const mfem::Vector*>& inputs,
+                              const std::vector<mfem::Vector*>& outputs) const
+  {
+    MFEM_VERIFY(inputs.size() == outputs.size(), "Subspace Hessian-vector batch input/output size mismatch");
+    if (inputs.empty()) {
+      return;
+    }
+
+    auto start = Clock::now();
+    ++num_subspace_hess_vec_batches;
+    num_subspace_batched_hess_vecs += inputs.size();
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      hess_vec_func(*inputs[i], *outputs[i]);
+    }
+    subspace_hess_vec_batch_seconds += secondsSince(start);
+  }
+
+  template <typename HessVecFunc>
+  void timedModelHessVec(HessVecFunc hess_vec_func, const mfem::Vector& input, mfem::Vector& output) const
+  {
+    auto start = Clock::now();
+    hess_vec_func(input, output);
+    model_hess_vec_seconds += secondsSince(start);
+    ++num_model_hess_vecs;
+  }
+
+  template <typename HessVecFunc>
+  void timedCauchyHessVec(HessVecFunc hess_vec_func, const mfem::Vector& input, mfem::Vector& output) const
+  {
+    auto start = Clock::now();
+    hess_vec_func(input, output);
+    cauchy_hess_vec_seconds += secondsSince(start);
+    ++num_cauchy_hess_vecs;
+  }
+
+  template <typename HessVecFunc>
+  void timedLineSearchHessVec(HessVecFunc hess_vec_func, const mfem::Vector& input, mfem::Vector& output) const
+  {
+    auto start = Clock::now();
+    hess_vec_func(input, output);
+    line_search_hess_vec_seconds += secondsSince(start);
+    ++num_line_search_hess_vecs;
+  }
+
+  double nonmonotoneWorkReference(const std::vector<double>& work_objective_history) const
+  {
+    if (work_objective_history.empty()) {
+      return current_work_objective;
+    }
+    return *std::max_element(work_objective_history.begin(), work_objective_history.end());
+  }
+
+  void pushWorkObjectiveHistory(std::vector<double>& work_objective_history, double objective) const
+  {
+    const int window = nonlinear_options.trust_nonmonotone_window;
+    if (window <= 0) {
+      return;
+    }
+    work_objective_history.push_back(objective);
+    while (work_objective_history.size() > static_cast<size_t>(window)) {
+      work_objective_history.erase(work_objective_history.begin());
+    }
+  }
+
+  void pushAcceptedStepHistory(const mfem::Vector& step) const
+  {
+    if (nonlinear_options.trust_num_past_steps <= 0) {
+      accepted_step_history.clear();
+      return;
+    }
+
+    accepted_step_history.insert(accepted_step_history.begin(), std::make_shared<mfem::Vector>(step));
+    const size_t max_size = static_cast<size_t>(nonlinear_options.trust_num_past_steps) + 1;
+    while (accepted_step_history.size() > max_size) {
+      accepted_step_history.pop_back();
+    }
+  }
+
   /// finds tau s.t. (z + tau*d)^2 = trSize^2
   void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd,
                                   double dd) const
   {
+    auto start = Clock::now();
     // find z + tau d
     double deltadelta_m_zz = delta * delta - zz;
     if (deltadelta_m_zz == 0) return;  // already on boundary
     double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd;
     z.Add(tau, d);
+    projection_seconds += secondsSince(start);
   }
 
   /// solve the exact trust-region subspace problem with directions ds, and the leftmosts
@@ -378,10 +744,14 @@ class TrustRegion : public mfem::NewtonSolver {
                                [[maybe_unused]] const std::vector<const mfem::Vector*> ds,
                                [[maybe_unused]] const std::vector<const mfem::Vector*> Hds,
                                [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] double delta,
-                               [[maybe_unused]] int num_leftmost) const
+                               [[maybe_unused]] int num_leftmost,
+                               [[maybe_unused]] std::vector<std::shared_ptr<mfem::Vector>>& candidate_left_mosts,
+                               [[maybe_unused]] const mfem::Vector& previous_step,
+                               [[maybe_unused]] const mfem::Vector* previous_H_previous_step,
+                               [[maybe_unused]] bool allow_cubic_subspace) const
   {
-#ifdef SMITH_USE_SLEPC
     SMITH_MARK_FUNCTION;
+    auto subspace_start = Clock::now();
     ++num_subspace_solves;
 
     std::vector<const mfem::Vector*> directions;
@@ -400,15 +770,6 @@ class TrustRegion : public mfem::NewtonSolver {
       H_directions.emplace_back(H_left.get());
     }
 
-    try {
-      std::tie(directions, H_directions) = removeDependentDirections(directions, H_directions);
-    } catch (const std::exception& e) {
-      if (print_level >= 2) {
-        mfem::out << "remove dependent directions failed with " << e.what() << std::endl;
-      }
-      return;
-    }
-
     mfem::Vector b(g);
     b *= -1;
 
@@ -418,18 +779,45 @@ class TrustRegion : public mfem::NewtonSolver {
     double energy_change;
 
     try {
-      std::tie(sol, leftvecs, leftvals, energy_change) =
-          solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost);
+      auto backend_start = Clock::now();
+      if (nonlinear_options.trust_use_cubic_subspace && allow_cubic_subspace && previous_hessian) {
+        std::vector<mfem::Vector> previous_H_vectors;
+        std::vector<const mfem::Vector*> previous_H_directions;
+        previous_H_vectors.reserve(directions.size());
+        previous_H_directions.reserve(directions.size());
+        for (const auto* direction : directions) {
+          previous_H_vectors.emplace_back(direction->Size());
+          previous_hessian->Mult(*direction, previous_H_vectors.back());
+          previous_H_directions.emplace_back(&previous_H_vectors.back());
+        }
+        ++num_cubic_subspace_attempts;
+        bool used_cubic = false;
+        std::tie(sol, leftvecs, leftvals, energy_change) = solveCubicSubspaceProblemMfem(
+            directions, H_directions, previous_H_directions, previous_step, b, delta, num_leftmost, &used_cubic);
+        if (used_cubic) {
+          ++num_cubic_subspace_uses;
+        } else {
+          ++num_cubic_subspace_quadratic_fallbacks;
+          ++num_quadratic_subspace_solves;
+        }
+      } else {
+        ++num_quadratic_subspace_solves;
+        std::tie(sol, leftvecs, leftvals, energy_change) =
+            solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost);
+      }
+      subspace_backend_seconds += secondsSince(backend_start);
     } catch (const std::exception& e) {
-      if (print_level == 1) {
+      if (print_level >= 1) {
         mfem::out << "subspace solve failed with " << e.what() << std::endl;
       }
+      subspace_seconds += secondsSince(subspace_start);
       return;
     }
 
-    left_mosts.clear();
+    auto finalize_start = Clock::now();
+    candidate_left_mosts.clear();
     for (auto& lv : leftvecs) {
-      left_mosts.emplace_back(std::move(lv));
+      candidate_left_mosts.emplace_back(std::move(lv));
     }
 
     double base_energy = computeEnergy(g, hess_vec_func, z);
@@ -444,43 +832,54 @@ class TrustRegion : public mfem::NewtonSolver {
     if (subspace_energy < base_energy) {
       z = sol;
     }
-#endif
+    subspace_finalize_seconds += secondsSince(finalize_start);
+    subspace_seconds += secondsSince(subspace_start);
   }
 
   /// finds tau s.t. (z + tau*(y-z))^2 = trSize^2
   void projectToBoundaryBetweenWithCoefs(mfem::Vector& z, const mfem::Vector& y, double trSize, double zz, double zy,
                                          double yy) const
   {
+    auto start = Clock::now();
     double dd = yy - 2 * zy + zz;
     double zd = zy - zz;
     double tau = (std::sqrt((trSize * trSize - zz) * dd + zd * zd) - zd) / dd;
     z.Add(-tau, z);
     z.Add(tau, y);
+    projection_seconds += secondsSince(start);
   }
 
   /// take a dogleg step in direction s, solution norm must be within trSize
   void doglegStep(const mfem::Vector& cp, const mfem::Vector& newtonP, double trSize, mfem::Vector& s) const
   {
     SMITH_MARK_FUNCTION;
-    // MRT, could optimize some of these eventually, compute on the outside and save
-    double cc = Dot(cp, cp);
-    double nn = Dot(newtonP, newtonP);
+    auto [cc, nn] = timedDot2(cp, cp, newtonP, newtonP, num_dogleg_dot_products, dogleg_dot_seconds);
     double tt = trSize * trSize;
 
+    auto update_start = Clock::now();
     s = 0.0;
+    vector_copy_scale_seconds += secondsSince(update_start);
     if (cc >= tt) {
+      update_start = Clock::now();
       add(s, std::sqrt(tt / cc), cp, s);
+      vector_update_seconds += secondsSince(update_start);
     } else if (cc > nn) {
       if (print_level >= 2) {
         mfem::out << "cp outside newton, preconditioner likely inaccurate\n";
       }
+      update_start = Clock::now();
       add(s, 1.0, cp, s);
+      vector_update_seconds += secondsSince(update_start);
     } else if (nn > tt) {  // on the dogleg (we have nn >= cc, and tt >= cc)
+      update_start = Clock::now();
       add(s, 1.0, cp, s);
-      double cn = Dot(cp, newtonP);
+      vector_update_seconds += secondsSince(update_start);
+      double cn = timedDot(cp, newtonP, num_dogleg_dot_products, dogleg_dot_seconds);
       projectToBoundaryBetweenWithCoefs(s, newtonP, trSize, cc, cn, nn);
     } else {
+      update_start = Clock::now();
       s = newtonP;
+      vector_copy_scale_seconds += secondsSince(update_start);
     }
   }
 
@@ -489,18 +888,18 @@ class TrustRegion : public mfem::NewtonSolver {
   double computeEnergy(const mfem::Vector& r_local, const HessVecFunc& H, const mfem::Vector& z) const
   {
     SMITH_MARK_FUNCTION;
-    double rz = Dot(r_local, z);
+    double rz = timedDot(r_local, z, num_line_search_dot_products, line_search_dot_seconds);
     mfem::Vector tmp(r_local);
     tmp = 0.0;
     H(z, tmp);
-    return rz + 0.5 * Dot(z, tmp);
+    return rz + 0.5 * timedDot(z, tmp, num_line_search_dot_products, line_search_dot_seconds);
   }
 
   /// Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner
   template <typename HessVecFunc, typename PrecondFunc>
   void solveTrustRegionModelProblem(const mfem::Vector& r0, mfem::Vector& rCurrent, HessVecFunc hess_vec_func,
                                     PrecondFunc precond, const TrustRegionSettings& settings, double& trSize,
-                                    TrustRegionResults& results) const
+                                    TrustRegionResults& results, double r0_norm_squared) const
   {
     SMITH_MARK_FUNCTION;
     // minimize r0@z + 0.5*z@J@z
@@ -515,7 +914,7 @@ class TrustRegion : public mfem::NewtonSolver {
 
     const double cg_tol_squared = settings.cg_tol * settings.cg_tol;
 
-    if (Dot(r0, r0) <= cg_tol_squared && settings.min_cg_iterations == 0) {
+    if (r0_norm_squared <= cg_tol_squared && settings.min_cg_iterations == 0) {
       if (print_level >= 2) {
         mfem::out << "Trust region solution state within tolerance on first iteration."
                   << "\n";
@@ -523,37 +922,43 @@ class TrustRegion : public mfem::NewtonSolver {
       return;
     }
 
+    auto copy_start = Clock::now();
     rCurrent = r0;
+    vector_copy_scale_seconds += secondsSince(copy_start);
     precond(rCurrent, Pr);
 
     // d = -Pr
+    copy_start = Clock::now();
     d = Pr;
     d *= -1.0;
 
     z = 0.0;
+    vector_copy_scale_seconds += secondsSince(copy_start);
     double zz = 0.;
-    double rPr = Dot(rCurrent, Pr);
-    double zd = 0.0;
-    double dd = Dot(d, d);
+    double rPr = timedDot(rCurrent, Pr, num_model_dot_products, model_dot_seconds);
 
     // std::cout << "initial energy = " << computeEnergy(r0, hess_vec_func, z) << std::endl;
 
     for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) {
-      // check if this is a descent direction
-      if (Dot(d, rCurrent) > 0) {
+      hess_vec_func(d, Hd);
+      const auto dots = timedDot4(d, rCurrent, d, Hd, z, d, d, d, num_model_dot_products, model_dot_seconds);
+      double descent_check = dots.v0;
+      double curvature = dots.v1;
+      double zd = dots.v2;
+      double dd = dots.v3;
+      if (descent_check > 0) {
+        copy_start = Clock::now();
         d *= -1;
+        Hd *= -1;
+        vector_copy_scale_seconds += secondsSince(copy_start);
         results.interior_status = TrustRegionResults::Status::NonDescentDirection;
+        descent_check *= -1.0;
+        curvature *= -1.0;
+        zd *= -1.0;
       }
 
-      hess_vec_func(d, Hd);
-      const double curvature = Dot(d, Hd);
       const double alphaCg = curvature != 0.0 ? rPr / curvature : 0.0;
-
-      auto& zPred = Pr;  // re-use Pr memory.
-                         // This predicted step will no longer be used by the time Pr is, so we can avoid an extra
-                         // vector floating around
-      add(z, alphaCg, d, zPred);
-      double zzNp1 = Dot(zPred, zPred);
+      const double zzNp1 = zz + 2.0 * alphaCg * zd + alphaCg * alphaCg * dd;
 
       const bool go_to_boundary = curvature <= 0 || zzNp1 >= trSize * trSize;
       if (go_to_boundary) {
@@ -566,7 +971,16 @@ class TrustRegion : public mfem::NewtonSolver {
         return;
       }
 
+      auto& zPred = Pr;  // re-use Pr memory.
+                         // This predicted step will no longer be used by the time Pr is, so we can avoid an extra
+                         // vector floating around
+      auto update_start = Clock::now();
+      add(z, alphaCg, d, zPred);
+      vector_update_seconds += secondsSince(update_start);
+
+      copy_start = Clock::now();
       z = zPred;
+      vector_copy_scale_seconds += secondsSince(copy_start);
 
       if (results.interior_status == TrustRegionResults::Status::NonDescentDirection) {
         if (print_level >= 2) {
@@ -575,68 +989,241 @@ class TrustRegion : public mfem::NewtonSolver {
         return;
       }
 
+      update_start = Clock::now();
       add(rCurrent, alphaCg, Hd, rCurrent);
+      vector_update_seconds += secondsSince(update_start);
 
       precond(rCurrent, Pr);
-      double rPrNp1 = Dot(rCurrent, Pr);
-
-      if (Dot(rCurrent, rCurrent) <= cg_tol_squared && cgIter >= settings.min_cg_iterations) {
+      auto [rPrNp1, r_current_norm_squared] =
+          timedDot2(rCurrent, Pr, rCurrent, rCurrent, num_model_dot_products, model_dot_seconds);
+      if (r_current_norm_squared <= cg_tol_squared && cgIter >= settings.min_cg_iterations) {
         return;
       }
 
       double beta = rPrNp1 / rPr;
       rPr = rPrNp1;
+      update_start = Clock::now();
       add(-1.0, Pr, beta, d, d);
+      vector_update_seconds += secondsSince(update_start);
 
       zz = zzNp1;
-      zd = Dot(z, d);
-      dd = Dot(d, d);
     }
     cgIter--;  // if all cg iterations are taken, correct for output
   }
 
+  std::unique_ptr<mfem::Operator> cloneAssembledOperator(const mfem::Operator& op) const
+  {
+    if (const auto* hypre_matrix = dynamic_cast<const mfem::HypreParMatrix*>(&op)) {
+      return std::make_unique<mfem::HypreParMatrix>(*hypre_matrix);
+    }
+    if (const auto* sparse_matrix = dynamic_cast<const mfem::SparseMatrix*>(&op)) {
+      return std::make_unique<mfem::SparseMatrix>(*sparse_matrix);
+    }
+    if (const auto* block_operator = dynamic_cast<const mfem::BlockOperator*>(&op)) {
+      return buildMonolithicMatrix(*block_operator);
+    }
+    return nullptr;
+  }
+
   /// assemble the jacobian
   void assembleJacobian(const mfem::Vector& x) const
   {
     SMITH_MARK_FUNCTION;
+    auto start = Clock::now();
     ++num_jacobian_assembles;
+    if (nonlinear_options.trust_use_cubic_subspace) {
+      previous_hessian = std::move(current_hessian);
+    }
     grad = &oper->GetGradient(x);
     if (nonlinear_options.force_monolithic) {
       auto* grad_blocked = dynamic_cast<mfem::BlockOperator*>(grad);
       if (grad_blocked) grad = buildMonolithicMatrix(*grad_blocked).release();
     }
+    if (nonlinear_options.trust_use_cubic_subspace) {
+      current_hessian = cloneAssembledOperator(*grad);
+    }
+    jacobian_assembly_seconds += secondsSince(start);
+  }
+
+  /// Set an optional JacobianOperator factory.
+  void setJacobianOperator(JacobianOperatorFactory jacobian_operator)
+  {
+    jacobian_operator_factory = std::move(jacobian_operator);
+  }
+
+  /// Evaluate and cache the JacobianOperator at x.
+  void updateJacobianOperator(const mfem::Vector& x) const
+  {
+    SMITH_MARK_FUNCTION;
+    SLIC_ERROR_ROOT_IF(!jacobian_operator_factory, "No JacobianOperator factory is registered.");
+    auto start = Clock::now();
+    ++num_jacobian_operator_evals;
+    current_jacobian_operator = jacobian_operator_factory(x);
+    SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "JacobianOperator factory returned a null operator.");
+    jacobian_operator_eval_seconds += secondsSince(start);
+  }
+
+  /// Assemble and invert the scalar diagonal preconditioner from the current JacobianOperator.
+  void updateDiagonalPreconditioner() const
+  {
+    SMITH_MARK_FUNCTION;
+    SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "Cannot build diagonal preconditioner without a JacobianOperator.");
+
+    auto diagonal_start = Clock::now();
+    current_jacobian_operator->assembleDiagonal(inverse_diagonal_preconditioner);
+    diagonal_assembly_seconds += secondsSince(diagonal_start);
+    ++num_diagonal_assembles;
+
+    auto invert_start = Clock::now();
+    double max_abs_diag = 0.0;
+    for (int i = 0; i < inverse_diagonal_preconditioner.Size(); ++i) {
+      max_abs_diag = std::max(max_abs_diag, std::abs(inverse_diagonal_preconditioner[i]));
+    }
+
+    const double floor = nonlinear_options.pcg_diagonal_floor * max_abs_diag;
+    SLIC_ERROR_ROOT_IF(!(floor > 0.0), "Cannot invert a zero Jacobian diagonal for TrustRegion preconditioning.");
+    for (int i = 0; i < inverse_diagonal_preconditioner.Size(); ++i) {
+      inverse_diagonal_preconditioner[i] = 1.0 / std::max(std::abs(inverse_diagonal_preconditioner[i]), floor);
+    }
+    diagonal_invert_seconds += secondsSince(invert_start);
   }
 
   /// evaluate the nonlinear residual
   mfem::real_t computeResidual(const mfem::Vector& x_, mfem::Vector& r_) const
   {
     SMITH_MARK_FUNCTION;
+    auto start = Clock::now();
     ++num_residuals;
     oper->Mult(x_, r_);
-    return Norm(r_);
+    const auto norm = Norm(r_);
+    residual_seconds += secondsSince(start);
+    return norm;
   }
 
-  /// apply the action of the assembled Jacobian matrix to a vector
+  /// apply the action of the current Jacobian representation to a vector
   void hessVec(const mfem::Vector& x_, mfem::Vector& v_) const
   {
     SMITH_MARK_FUNCTION;
+    auto start = Clock::now();
     ++num_hess_vecs;
-    grad->Mult(x_, v_);
+    if (nonlinear_options.trust_use_jacobian_operator) {
+      SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "TrustRegion JacobianOperator mode has no current operator.");
+      current_jacobian_operator->Mult(x_, v_);
+      const double seconds = secondsSince(start);
+      hess_vec_seconds += seconds;
+      jacobian_operator_hess_vec_seconds += seconds;
+    } else {
+      grad->Mult(x_, v_);
+      hess_vec_seconds += secondsSince(start);
+    }
   }
 
   /// apply trust region specific preconditioner
   void precond(const mfem::Vector& x_, mfem::Vector& v_) const
   {
     SMITH_MARK_FUNCTION;
+    auto start = Clock::now();
     ++num_preconds;
-    tr_precond.Mult(x_, v_);
+    if (nonlinear_options.trust_use_jacobian_operator) {
+      SLIC_ERROR_ROOT_IF(inverse_diagonal_preconditioner.Size() != x_.Size(),
+                         "TrustRegion JacobianOperator diagonal preconditioner is not initialized.");
+      v_.SetSize(x_.Size());
+      for (int i = 0; i < x_.Size(); ++i) {
+        v_[i] = inverse_diagonal_preconditioner[i] * x_[i];
+      }
+    } else {
+      tr_precond.Mult(x_, v_);
+    }
+    preconditioner_seconds += secondsSince(start);
   };
 
+  /// Return solver diagnostic counters.
+  TrustRegionDiagnostics diagnostics() const
+  {
+    return {.num_residuals = num_residuals,
+            .num_hess_vecs = num_hess_vecs,
+            .num_model_hess_vecs = num_model_hess_vecs,
+            .num_cauchy_hess_vecs = num_cauchy_hess_vecs,
+            .num_line_search_hess_vecs = num_line_search_hess_vecs,
+            .num_preconds = num_preconds,
+            .num_jacobian_assembles = num_jacobian_assembles,
+            .num_jacobian_operator_evals = num_jacobian_operator_evals,
+            .num_diagonal_assembles = num_diagonal_assembles,
+            .num_cg_iterations = num_cg_iterations,
+            .num_subspace_solves = num_subspace_solves,
+            .num_subspace_leftmost_hess_vecs = num_subspace_leftmost_hess_vecs,
+            .num_subspace_hess_vec_batches = num_subspace_hess_vec_batches,
+            .num_subspace_batched_hess_vecs = num_subspace_batched_hess_vecs,
+            .num_subspace_past_step_vectors = num_subspace_past_step_vectors,
+            .num_subspace_past_step_hess_vecs = num_subspace_past_step_hess_vecs,
+            .num_subspace_solve_start_vectors = num_subspace_solve_start_vectors,
+            .num_subspace_solve_start_hess_vecs = num_subspace_solve_start_hess_vecs,
+            .num_quadratic_subspace_solves = num_quadratic_subspace_solves,
+            .num_cubic_subspace_attempts = num_cubic_subspace_attempts,
+            .num_cubic_subspace_uses = num_cubic_subspace_uses,
+            .num_cubic_subspace_quadratic_fallbacks = num_cubic_subspace_quadratic_fallbacks,
+            .num_preconditioner_updates = num_preconditioner_updates,
+            .num_nonmonotone_work_accepts = num_nonmonotone_work_accepts,
+            .num_monotone_work_would_reject = num_monotone_work_would_reject,
+            .residual_seconds = residual_seconds,
+            .hess_vec_seconds = hess_vec_seconds,
+            .model_hess_vec_seconds = model_hess_vec_seconds,
+            .cauchy_hess_vec_seconds = cauchy_hess_vec_seconds,
+            .line_search_hess_vec_seconds = line_search_hess_vec_seconds,
+            .jacobian_operator_hess_vec_seconds = jacobian_operator_hess_vec_seconds,
+            .jacobian_operator_eval_seconds = jacobian_operator_eval_seconds,
+            .diagonal_assembly_seconds = diagonal_assembly_seconds,
+            .diagonal_invert_seconds = diagonal_invert_seconds,
+            .preconditioner_seconds = preconditioner_seconds,
+            .total_seconds = total_seconds,
+            .model_solve_seconds = model_solve_seconds,
+            .subspace_seconds = subspace_seconds,
+            .subspace_leftmost_seconds = subspace_leftmost_seconds,
+            .subspace_hess_vec_batch_seconds = subspace_hess_vec_batch_seconds,
+            .subspace_filter_seconds = subspace_filter_seconds,
+            .subspace_backend_seconds = subspace_backend_seconds,
+            .subspace_project_A_seconds = trustRegionSubspaceTimings().project_A_seconds,
+            .subspace_project_gram_seconds = trustRegionSubspaceTimings().project_gram_seconds,
+            .subspace_project_b_seconds = trustRegionSubspaceTimings().project_b_seconds,
+            .subspace_basis_seconds = trustRegionSubspaceTimings().basis_seconds,
+            .subspace_reduced_A_seconds = trustRegionSubspaceTimings().reduced_A_seconds,
+            .subspace_dense_eigensystem_seconds = trustRegionSubspaceTimings().dense_eigensystem_seconds,
+            .subspace_dense_trust_solve_seconds = trustRegionSubspaceTimings().dense_trust_solve_seconds,
+            .subspace_reconstruct_solution_seconds = trustRegionSubspaceTimings().reconstruct_solution_seconds,
+            .subspace_reconstruct_leftmost_seconds = trustRegionSubspaceTimings().reconstruct_leftmost_seconds,
+            .subspace_finalize_seconds = subspace_finalize_seconds,
+            .cauchy_point_seconds = cauchy_point_seconds,
+            .dogleg_seconds = dogleg_seconds,
+            .line_search_seconds = line_search_seconds,
+            .dot_seconds = dot_seconds,
+            .num_dot_products = num_dot_products,
+            .num_dot_reductions = num_dot_reductions,
+            .num_model_dot_products = num_model_dot_products,
+            .num_cauchy_dot_products = num_cauchy_dot_products,
+            .num_dogleg_dot_products = num_dogleg_dot_products,
+            .num_line_search_dot_products = num_line_search_dot_products,
+            .num_setup_dot_products = num_setup_dot_products,
+            .model_dot_seconds = model_dot_seconds,
+            .cauchy_dot_seconds = cauchy_dot_seconds,
+            .dogleg_dot_seconds = dogleg_dot_seconds,
+            .line_search_dot_seconds = line_search_dot_seconds,
+            .setup_dot_seconds = setup_dot_seconds,
+            .vector_update_seconds = vector_update_seconds,
+            .vector_copy_scale_seconds = vector_copy_scale_seconds,
+            .projection_seconds = projection_seconds,
+            .jacobian_assembly_seconds = jacobian_assembly_seconds,
+            .preconditioner_update_seconds = preconditioner_update_seconds,
+            .preconditioner_setup_seconds = preconditioner_setup_seconds,
+            .last_work_objective = current_work_objective,
+            .last_nonmonotone_work_reference = last_nonmonotone_work_reference};
+  }
+
   /// @overload
   void Mult(const mfem::Vector&, mfem::Vector& X) const
   {
     MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator).");
     MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver).");
+    auto total_start = Clock::now();
 
     print_level = static_cast<size_t>(std::max(nonlinear_options.print_level, 0));
     print_level = print_options.iterations ? std::max<size_t>(1, print_level) : print_level;
@@ -645,13 +1232,87 @@ class TrustRegion : public mfem::NewtonSolver {
     using real_t = mfem::real_t;
 
     num_hess_vecs = 0;
+    num_model_hess_vecs = 0;
+    num_cauchy_hess_vecs = 0;
+    num_line_search_hess_vecs = 0;
     num_preconds = 0;
     num_residuals = 0;
     num_subspace_solves = 0;
+    num_subspace_leftmost_hess_vecs = 0;
+    num_subspace_hess_vec_batches = 0;
+    num_subspace_batched_hess_vecs = 0;
+    num_subspace_past_step_vectors = 0;
+    num_subspace_past_step_hess_vecs = 0;
+    num_subspace_solve_start_vectors = 0;
+    num_subspace_solve_start_hess_vecs = 0;
+    num_quadratic_subspace_solves = 0;
+    num_cubic_subspace_attempts = 0;
+    num_cubic_subspace_uses = 0;
+    num_cubic_subspace_quadratic_fallbacks = 0;
     num_jacobian_assembles = 0;
+    num_jacobian_operator_evals = 0;
+    num_diagonal_assembles = 0;
+    num_cg_iterations = 0;
+    num_preconditioner_updates = 0;
+    num_nonmonotone_work_accepts = 0;
+    num_monotone_work_would_reject = 0;
+    residual_seconds = 0.0;
+    hess_vec_seconds = 0.0;
+    model_hess_vec_seconds = 0.0;
+    cauchy_hess_vec_seconds = 0.0;
+    line_search_hess_vec_seconds = 0.0;
+    jacobian_operator_hess_vec_seconds = 0.0;
+    jacobian_operator_eval_seconds = 0.0;
+    diagonal_assembly_seconds = 0.0;
+    diagonal_invert_seconds = 0.0;
+    preconditioner_seconds = 0.0;
+    total_seconds = 0.0;
+    model_solve_seconds = 0.0;
+    subspace_seconds = 0.0;
+    subspace_leftmost_seconds = 0.0;
+    subspace_hess_vec_batch_seconds = 0.0;
+    subspace_filter_seconds = 0.0;
+    subspace_backend_seconds = 0.0;
+    subspace_finalize_seconds = 0.0;
+    cauchy_point_seconds = 0.0;
+    dogleg_seconds = 0.0;
+    line_search_seconds = 0.0;
+    dot_seconds = 0.0;
+    num_dot_products = 0;
+    num_dot_reductions = 0;
+    num_model_dot_products = 0;
+    num_cauchy_dot_products = 0;
+    num_dogleg_dot_products = 0;
+    num_line_search_dot_products = 0;
+    num_setup_dot_products = 0;
+    model_dot_seconds = 0.0;
+    cauchy_dot_seconds = 0.0;
+    dogleg_dot_seconds = 0.0;
+    line_search_dot_seconds = 0.0;
+    setup_dot_seconds = 0.0;
+    vector_update_seconds = 0.0;
+    vector_copy_scale_seconds = 0.0;
+    projection_seconds = 0.0;
+    jacobian_assembly_seconds = 0.0;
+    preconditioner_update_seconds = 0.0;
+    preconditioner_setup_seconds = 0.0;
+    current_work_objective = 0.0;
+    last_nonmonotone_work_reference = 0.0;
+    accepted_step_history.clear();
+    resetTrustRegionSubspaceTimings();
+    solve_start_x.SetSize(X.Size());
+    solve_start_x = X;
+    min_residual_x.SetSize(X.Size());
+    min_residual_x = X;
+    current_jacobian_operator.reset();
+    inverse_diagonal_preconditioner.SetSize(0);
+    previous_H_left_mosts.clear();
+    current_hessian.reset();
+    previous_hessian.reset();
 
     real_t norm, norm_goal = 0.0;
     norm = initial_norm = computeResidual(X, r);
+    min_residual_norm = initial_norm;
     if (norm == 0.0) return;
 
     norm_goal = std::max(rel_tol * initial_norm, abs_tol);
@@ -660,6 +1321,11 @@ class TrustRegion : public mfem::NewtonSolver {
       mfem::out << "TrustRegion iteration " << std::setw(3) << 0 << " : ||r|| = " << std::setw(13) << norm << "\n";
     }
 
+    SLIC_ERROR_ROOT_IF(nonlinear_options.trust_nonmonotone_window < 0,
+                       "TrustRegion requires trust_nonmonotone_window >= 0");
+    std::vector<double> work_objective_history;
+    pushWorkObjectiveHistory(work_objective_history, current_work_objective);
+
     prec->iterative_mode = false;
     tr_precond.iterative_mode = false;
 
@@ -680,8 +1346,11 @@ class TrustRegion : public mfem::NewtonSolver {
     int subspace_option = nonlinear_options.subspace_option;
     int num_leftmost = nonlinear_options.num_leftmost;
 
+    auto copy_start = Clock::now();
     scratch = 1.0;
-    double tr_size = nonlinear_options.trust_region_scaling * std::sqrt(Dot(scratch, scratch));
+    vector_copy_scale_seconds += secondsSince(copy_start);
+    double tr_size = nonlinear_options.trust_region_scaling *
+                     std::sqrt(timedDot(scratch, scratch, num_setup_dot_products, setup_dot_seconds));
     size_t cumulative_cg_iters_from_last_precond_update = 0;
 
     int it = 0;
@@ -712,12 +1381,26 @@ class TrustRegion : public mfem::NewtonSolver {
         break;
       }
 
-      assembleJacobian(X);
-
-      if (it == 0 || (trResults.cg_iterations_count >= settings.max_cg_iterations ||
-                      cumulative_cg_iters_from_last_precond_update >= settings.max_cumulative_iteration)) {
-        tr_precond.SetOperator(*grad);
+      if (nonlinear_options.trust_use_jacobian_operator) {
+        SLIC_ERROR_ROOT_IF(!jacobian_operator_factory,
+                           "TrustRegion JacobianOperator mode requires a registered JacobianOperator factory.");
+        updateJacobianOperator(X);
+        updateDiagonalPreconditioner();
+        ++num_preconditioner_updates;
         cumulative_cg_iters_from_last_precond_update = 0;
+      } else {
+        assembleJacobian(X);
+
+        if (it == 0 || (trResults.cg_iterations_count >= settings.max_cg_iterations ||
+                        cumulative_cg_iters_from_last_precond_update >= settings.max_cumulative_iteration)) {
+          auto preconditioner_update_start = Clock::now();
+          auto preconditioner_setup_start = Clock::now();
+          tr_precond.SetOperator(*grad);
+          preconditioner_setup_seconds += secondsSince(preconditioner_setup_start);
+          preconditioner_update_seconds += secondsSince(preconditioner_update_start);
+          ++num_preconditioner_updates;
+          cumulative_cg_iters_from_last_precond_update = 0;
+        }
       }
 
       auto hess_vec_func = [&](const mfem::Vector& x_, mfem::Vector& v_) { hessVec(x_, v_); };
@@ -726,19 +1409,29 @@ class TrustRegion : public mfem::NewtonSolver {
       double cauchyPointNormSquared = tr_size * tr_size;
       trResults.reset();
 
-      hess_vec_func(r, trResults.H_d);
-      const double gKg = Dot(r, trResults.H_d);
-      if (gKg > 0) {
-        const double alphaCp = -Dot(r, r) / gKg;
-        add(trResults.cauchy_point, alphaCp, r, trResults.cauchy_point);
-        cauchyPointNormSquared = Dot(trResults.cauchy_point, trResults.cauchy_point);
-      } else {
-        const double alphaTr = -tr_size / std::sqrt(Dot(r, r));
-        add(trResults.cauchy_point, alphaTr, r, trResults.cauchy_point);
-        if (print_level >= 2) {
-          mfem::out << "Negative curvature un-preconditioned cauchy point direction found."
-                    << "\n";
+      {
+        auto cauchy_start = Clock::now();
+        timedCauchyHessVec(hess_vec_func, r, trResults.H_d);
+        const double gKg = timedDot(r, trResults.H_d, num_cauchy_dot_products, cauchy_dot_seconds);
+        const double residual_norm_squared = norm * norm;
+        if (gKg > 0) {
+          const double alphaCp = -residual_norm_squared / gKg;
+          auto update_start = Clock::now();
+          add(trResults.cauchy_point, alphaCp, r, trResults.cauchy_point);
+          vector_update_seconds += secondsSince(update_start);
+          cauchyPointNormSquared =
+              timedDot(trResults.cauchy_point, trResults.cauchy_point, num_cauchy_dot_products, cauchy_dot_seconds);
+        } else {
+          const double alphaTr = -tr_size / norm;
+          auto update_start = Clock::now();
+          add(trResults.cauchy_point, alphaTr, r, trResults.cauchy_point);
+          vector_update_seconds += secondsSince(update_start);
+          if (print_level >= 2) {
+            mfem::out << "Negative curvature un-preconditioned cauchy point direction found."
+                      << "\n";
+          }
         }
+        cauchy_point_seconds += secondsSince(cauchy_start);
       }
 
       if (cauchyPointNormSquared >= tr_size * tr_size) {
@@ -753,68 +1446,193 @@ class TrustRegion : public mfem::NewtonSolver {
         trResults.interior_status = TrustRegionResults::Status::OnBoundary;
       } else {
         settings.cg_tol = std::max(0.5 * norm_goal, 5e-5 * norm);
-        solveTrustRegionModelProblem(r, scratch, hess_vec_func, precond_func, settings, tr_size, trResults);
+        auto model_start = Clock::now();
+        auto model_hess_vec_func = [&](const mfem::Vector& x_, mfem::Vector& v_) {
+          timedModelHessVec(hess_vec_func, x_, v_);
+        };
+        solveTrustRegionModelProblem(r, scratch, model_hess_vec_func, precond_func, settings, tr_size, trResults,
+                                     norm * norm);
+        model_solve_seconds += secondsSince(model_start);
       }
       cumulative_cg_iters_from_last_precond_update += trResults.cg_iterations_count;
+      num_cg_iterations += trResults.cg_iterations_count;
 
       bool have_computed_Hvs = false;
+      bool have_computed_H_left_mosts = false;
+      std::vector<std::shared_ptr<mfem::Vector>> candidate_left_mosts;
 
       int lineSearchIter = 0;
       while (lineSearchIter <= nonlinear_options.max_line_search_iterations) {
+        auto line_search_start = Clock::now();
         ++lineSearchIter;
 
+        auto dogleg_start = Clock::now();
         doglegStep(trResults.cauchy_point, trResults.z, tr_size, trResults.d);
+        dogleg_seconds += secondsSince(dogleg_start);
 
+        const bool check_subspace_boundary = subspace_option >= 1;
+        const double d_norm =
+            check_subspace_boundary
+                ? std::sqrt(timedDot(trResults.d, trResults.d, num_line_search_dot_products, line_search_dot_seconds))
+                : 0.0;
         bool use_with_option1 =
             (subspace_option >= 1) && (trResults.interior_status == TrustRegionResults::Status::NonDescentDirection ||
                                        trResults.interior_status == TrustRegionResults::Status::NegativeCurvature ||
-                                       ((Norm(trResults.d) > (1.0 - 1.0e-6) * tr_size) && lineSearchIter > 1));
-        bool use_with_option2 = (subspace_option >= 2) && (Norm(trResults.d) > (1.0 - 1.0e-6) * tr_size);
+                                       ((d_norm > (1.0 - 1.0e-6) * tr_size) && lineSearchIter > 1));
+        bool use_with_option2 = (subspace_option >= 2) && (d_norm > (1.0 - 1.0e-6) * tr_size);
         bool use_with_option3 = (subspace_option >= 3);
+        const bool allow_cubic_subspace =
+            trResults.interior_status == TrustRegionResults::Status::NegativeCurvature || use_with_option2;
 
         if (use_with_option1 || use_with_option2 || use_with_option3) {
           if (!have_computed_Hvs) {
             have_computed_Hvs = true;
-            hess_vec_func(trResults.z, trResults.H_z);
-            hess_vec_func(trResults.d_old, trResults.H_d_old);
-            hess_vec_func(trResults.cauchy_point, trResults.H_cauchy_point);
+
+            std::vector<const mfem::Vector*> subspace_hess_inputs{&trResults.z, &trResults.cauchy_point};
+            std::vector<mfem::Vector*> subspace_hess_outputs{&trResults.H_z, &trResults.H_cauchy_point};
+            if (trResults.has_d_old) {
+              subspace_hess_inputs.push_back(&trResults.d_old);
+              subspace_hess_outputs.push_back(&trResults.H_d_old);
+            }
+
+            batchedSubspaceHessVec(hess_vec_func, subspace_hess_inputs, subspace_hess_outputs);
           }
 
-          H_left_mosts.clear();
-          for (auto& left : left_mosts) {
-            H_left_mosts.emplace_back(std::make_shared<mfem::Vector>(*left));
-            hess_vec_func(*left, *H_left_mosts.back());
+          if (!have_computed_H_left_mosts) {
+            have_computed_H_left_mosts = true;
+            auto leftmost_start = Clock::now();
+            previous_H_left_mosts = H_left_mosts;
+            H_left_mosts.clear();
+            std::vector<const mfem::Vector*> leftmost_inputs;
+            std::vector<mfem::Vector*> leftmost_outputs;
+            for (auto& left : left_mosts) {
+              H_left_mosts.emplace_back(std::make_shared<mfem::Vector>(*left));
+              leftmost_inputs.push_back(left.get());
+              leftmost_outputs.push_back(H_left_mosts.back().get());
+              ++num_subspace_leftmost_hess_vecs;
+            }
+            subspace_leftmost_seconds += secondsSince(leftmost_start);
+            batchedSubspaceHessVec(hess_vec_func, leftmost_inputs, leftmost_outputs);
           }
 
-          std::vector<const mfem::Vector*> ds{&trResults.z, &trResults.d_old, &trResults.cauchy_point};
-          std::vector<const mfem::Vector*> H_ds{&trResults.H_z, &trResults.H_d_old, &trResults.H_cauchy_point};
-          solveTheSubspaceProblem(trResults.d, hess_vec_func, ds, H_ds, r, tr_size, num_leftmost);
+          std::vector<const mfem::Vector*> ds{&trResults.z, &trResults.cauchy_point};
+          std::vector<const mfem::Vector*> H_ds{&trResults.H_z, &trResults.H_cauchy_point};
+          if (trResults.has_d_old) {
+            ds.push_back(&trResults.d_old);
+            H_ds.push_back(&trResults.H_d_old);
+          }
+
+          std::vector<mfem::Vector> H_past_steps;
+          std::vector<const mfem::Vector*> past_step_inputs;
+          std::vector<mfem::Vector*> past_step_outputs;
+          const size_t max_past_steps = static_cast<size_t>(std::max(nonlinear_options.trust_num_past_steps, 0));
+          const size_t num_past_steps =
+              accepted_step_history.size() > 1 ? std::min(max_past_steps, accepted_step_history.size() - 1) : 0;
+          H_past_steps.reserve(num_past_steps);
+          past_step_inputs.reserve(num_past_steps);
+          past_step_outputs.reserve(num_past_steps);
+          for (size_t i = 0; i < num_past_steps; ++i) {
+            const auto& past_step = accepted_step_history[i + 1];
+            H_past_steps.emplace_back(past_step->Size());
+            past_step_inputs.push_back(past_step.get());
+            past_step_outputs.push_back(&H_past_steps.back());
+          }
+          if (!past_step_inputs.empty()) {
+            num_subspace_past_step_vectors += past_step_inputs.size();
+            num_subspace_past_step_hess_vecs += past_step_inputs.size();
+            batchedSubspaceHessVec(hess_vec_func, past_step_inputs, past_step_outputs);
+            for (size_t i = 0; i < past_step_inputs.size(); ++i) {
+              ds.push_back(past_step_inputs[i]);
+              H_ds.push_back(past_step_outputs[i]);
+            }
+          }
+
+          mfem::Vector solve_start_direction;
+          mfem::Vector H_solve_start_direction;
+          if (nonlinear_options.trust_use_solve_start_direction && solve_start_x.Size() == X.Size()) {
+            solve_start_direction.SetSize(X.Size());
+            subtract(solve_start_x, X, solve_start_direction);
+            if (solve_start_direction.Norml2() > 0.0) {
+              H_solve_start_direction.SetSize(X.Size());
+              std::vector<const mfem::Vector*> solve_start_inputs{&solve_start_direction};
+              std::vector<mfem::Vector*> solve_start_outputs{&H_solve_start_direction};
+              ++num_subspace_solve_start_vectors;
+              ++num_subspace_solve_start_hess_vecs;
+              batchedSubspaceHessVec(hess_vec_func, solve_start_inputs, solve_start_outputs);
+              ds.push_back(&solve_start_direction);
+              H_ds.push_back(&H_solve_start_direction);
+            }
+          }
+
+          mfem::Vector min_residual_direction;
+          mfem::Vector H_min_residual_direction;
+          if (nonlinear_options.trust_use_min_residual_direction && min_residual_x.Size() == X.Size()) {
+            min_residual_direction.SetSize(X.Size());
+            subtract(min_residual_x, X, min_residual_direction);
+            if (min_residual_direction.Norml2() > 0.0) {
+              H_min_residual_direction.SetSize(X.Size());
+              std::vector<const mfem::Vector*> min_res_inputs{&min_residual_direction};
+              std::vector<mfem::Vector*> min_res_outputs{&H_min_residual_direction};
+              // Reusing solve_start counters for now
+              ++num_subspace_solve_start_vectors;
+              ++num_subspace_solve_start_hess_vecs;
+              batchedSubspaceHessVec(hess_vec_func, min_res_inputs, min_res_outputs);
+              ds.push_back(&min_residual_direction);
+              H_ds.push_back(&H_min_residual_direction);
+            }
+          }
+          solveTheSubspaceProblem(trResults.d, hess_vec_func, ds, H_ds, r, tr_size, num_leftmost, candidate_left_mosts,
+                                  trResults.d_old,
+                                  trResults.has_d_old ? &trResults.H_d_old_at_accept : nullptr, allow_cubic_subspace);
         }
 
         static constexpr double roundOffTol = 0.0;  // 1e-14;
 
-        hess_vec_func(trResults.d, trResults.H_d);
-        double dHd = Dot(trResults.d, trResults.H_d);
-        double modelObjective = Dot(r, trResults.d) + 0.5 * dHd - roundOffTol;
+        timedLineSearchHessVec(hess_vec_func, trResults.d, trResults.H_d);
+        const auto [dHd, rd] = timedDot2(trResults.d, trResults.H_d, r, trResults.d, num_line_search_dot_products,
+                                         line_search_dot_seconds);
+        double modelObjective = rd + 0.5 * dHd - roundOffTol;
 
+        auto update_start = Clock::now();
         add(X, trResults.d, x_pred);
+        vector_update_seconds += secondsSince(update_start);
 
         double realObjective = std::numeric_limits<double>::max();
         double normPred = std::numeric_limits<double>::max();
         try {
           normPred = computeResidual(x_pred, r_pred);
-          double obj1 = 0.5 * (Dot(r, trResults.d) + Dot(r_pred, trResults.d)) - roundOffTol;
+          if (normPred < min_residual_norm) {
+            min_residual_norm = normPred;
+            min_residual_x = x_pred;
+          }
+          double obj1 =
+              0.5 * (rd + timedDot(r_pred, trResults.d, num_line_search_dot_products, line_search_dot_seconds)) -
+              roundOffTol;
           realObjective = obj1;
         } catch (const std::exception&) {
           realObjective = std::numeric_limits<double>::max();
           normPred = std::numeric_limits<double>::max();
         }
 
+        const double trial_work_objective = current_work_objective + realObjective;
+        last_nonmonotone_work_reference = nonmonotoneWorkReference(work_objective_history);
+
         if (normPred <= norm_goal) {
           trResults.d_old = trResults.d;
+          trResults.H_d_old_at_accept = trResults.H_d;
+          trResults.has_d_old = true;
+          pushAcceptedStepHistory(trResults.d);
+          if (!candidate_left_mosts.empty()) {
+            left_mosts = std::move(candidate_left_mosts);
+          }
+          copy_start = Clock::now();
           X = x_pred;
           r = r_pred;
+          vector_copy_scale_seconds += secondsSince(copy_start);
           norm = normPred;
+          current_work_objective = trial_work_objective;
+          pushWorkObjectiveHistory(work_objective_history, current_work_objective);
+          line_search_seconds += secondsSince(line_search_start);
           if (print_level >= 2) {
             printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, true);
             trResults.cg_iterations_count =
@@ -853,7 +1671,11 @@ class TrustRegion : public mfem::NewtonSolver {
         // modelRes = g + Jd
         // modelResNorm = np.linalg.norm(modelRes)
         // realResNorm = np.linalg.norm(gy)
-        bool willAccept = rho >= settings.eta1 && rho <= settings.eta4;  // or (rho >= -0 and realResNorm <= gNorm)
+        const bool monotoneAccept = rho >= settings.eta1 && rho <= settings.eta4;
+        const bool nonmonotoneAccept =
+            nonlinear_options.trust_nonmonotone_window > 0 && modelObjective < 0.0 && rho <= settings.eta4 &&
+            trial_work_objective <= last_nonmonotone_work_reference + settings.eta1 * modelObjective;
+        bool willAccept = monotoneAccept || nonmonotoneAccept;  // or (rho >= -0 and realResNorm <= gNorm)
 
         if (print_level >= 2) {
           printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, willAccept);
@@ -863,11 +1685,27 @@ class TrustRegion : public mfem::NewtonSolver {
 
         if (willAccept) {
           trResults.d_old = trResults.d;
+          trResults.H_d_old_at_accept = trResults.H_d;
+          trResults.has_d_old = true;
+          pushAcceptedStepHistory(trResults.d);
+          if (!candidate_left_mosts.empty()) {
+            left_mosts = std::move(candidate_left_mosts);
+          }
+          if (nonmonotoneAccept && !monotoneAccept) {
+            ++num_nonmonotone_work_accepts;
+            ++num_monotone_work_would_reject;
+          }
+          copy_start = Clock::now();
           X = x_pred;
           r = r_pred;
+          vector_copy_scale_seconds += secondsSince(copy_start);
           norm = normPred;
+          current_work_objective = trial_work_objective;
+          pushWorkObjectiveHistory(work_objective_history, current_work_objective);
+          line_search_seconds += secondsSince(line_search_start);
           break;
         }
+        line_search_seconds += secondsSince(line_search_start);
       }
     }
 
@@ -889,6 +1727,7 @@ class TrustRegion : public mfem::NewtonSolver {
       mfem::out << "num subspace solves = " << num_subspace_solves << "\n";
       mfem::out << "num jacobian_assembles = " << num_jacobian_assembles << "\n";
     }
+    total_seconds = secondsSince(total_start);
   }
 };
 
@@ -962,11 +1801,43 @@ class PcgBlockSolver : public mfem::NewtonSolver {
   mutable double final_h_scale = 1.0;
   /// Last accepted block trust ratio
   mutable double last_trust_ratio = 0.0;
+  /// Time spent evaluating residuals
+  mutable double residual_seconds = 0.0;
+  /// Time spent applying all Hessian-vector products
+  mutable double hess_vec_seconds = 0.0;
+  /// Time spent applying JacobianOperator Hessian-vector products
+  mutable double jacobian_operator_hess_vec_seconds = 0.0;
+  /// Time spent applying assembled Hessian-vector products
+  mutable double assembled_hess_vec_seconds = 0.0;
+  /// Time spent applying legacy matrix-free tangent products
+  mutable double matrix_free_hess_vec_seconds = 0.0;
+  /// Time spent applying preconditioners
+  mutable double preconditioner_seconds = 0.0;
+  /// Time spent evaluating JacobianOperator factories
+  mutable double jacobian_operator_eval_seconds = 0.0;
+  /// Time spent assembling sparse Jacobians
+  mutable double jacobian_assembly_seconds = 0.0;
+  /// Time spent directly assembling diagonals
+  mutable double diagonal_assembly_seconds = 0.0;
+  /// Time spent inverting direct diagonals
+  mutable double diagonal_invert_seconds = 0.0;
+  /// Time spent refreshing preconditioner data
+  mutable double preconditioner_update_seconds = 0.0;
+  /// Time spent in preconditioner SetOperator calls
+  mutable double preconditioner_setup_seconds = 0.0;
 
   /// Optional matrix-free tangent action, y = J(x) dx
   MatrixFreeTangentAction matrix_free_tangent_action;
   /// Optional JacobianOperator factory
   JacobianOperatorFactory jacobian_operator_factory;
+  /// Cached JacobianOperator for the current PCG block
+  mutable std::unique_ptr<JacobianOperator> current_jacobian_operator;
+  /// Owned sparse Jacobian assembled through the JacobianOperator fallback path
+  mutable std::unique_ptr<mfem::HypreParMatrix> assembled_jacobian_from_operator;
+  /// Inverted scalar diagonal preconditioner for the current PCG block
+  mutable mfem::Vector inverse_diagonal_preconditioner;
+  /// Whether the current PCG block should use the scalar diagonal preconditioner
+  mutable bool use_inverse_diagonal_preconditioner = false;
 
 #ifdef MFEM_USE_MPI
   /// Constructor
@@ -980,21 +1851,26 @@ class PcgBlockSolver : public mfem::NewtonSolver {
   void assembleJacobian(const mfem::Vector& x) const
   {
     SMITH_MARK_FUNCTION;
+    auto start = Clock::now();
     ++num_jacobian_assembles;
     grad = &oper->GetGradient(x);
     if (nonlinear_options.force_monolithic) {
       auto* grad_blocked = dynamic_cast<mfem::BlockOperator*>(grad);
       if (grad_blocked) grad = buildMonolithicMatrix(*grad_blocked).release();
     }
+    jacobian_assembly_seconds += secondsSince(start);
   }
 
   /// Evaluate the nonlinear residual.
   mfem::real_t computeResidual(const mfem::Vector& x, mfem::Vector& residual) const
   {
     SMITH_MARK_FUNCTION;
+    auto start = Clock::now();
     ++num_residuals;
     oper->Mult(x, residual);
-    return Norm(residual);
+    const auto norm = Norm(residual);
+    residual_seconds += secondsSince(start);
+    return norm;
   }
 
   /// Set an optional matrix-free tangent action.
@@ -1009,20 +1885,106 @@ class PcgBlockSolver : public mfem::NewtonSolver {
     jacobian_operator_factory = std::move(jacobian_operator);
   }
 
+  /// Evaluate and cache the JacobianOperator at x.
+  void updateJacobianOperator(const mfem::Vector& x) const
+  {
+    SMITH_MARK_FUNCTION;
+    SLIC_ERROR_ROOT_IF(!jacobian_operator_factory, "No JacobianOperator factory is registered.");
+    auto start = Clock::now();
+    ++num_jacobian_operator_evals;
+    current_jacobian_operator = jacobian_operator_factory(x);
+    SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "JacobianOperator factory returned a null operator.");
+    jacobian_operator_eval_seconds += secondsSince(start);
+  }
+
+  /// Assemble and invert the scalar diagonal preconditioner from the current JacobianOperator.
+  void updateDiagonalPreconditioner() const
+  {
+    SMITH_MARK_FUNCTION;
+    SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "Cannot build diagonal preconditioner without a JacobianOperator.");
+
+    auto diagonal_start = Clock::now();
+    current_jacobian_operator->assembleDiagonal(inverse_diagonal_preconditioner);
+    diagonal_assembly_seconds += secondsSince(diagonal_start);
+    ++num_diagonal_assembles;
+
+    auto invert_start = Clock::now();
+    double max_abs_diag = 0.0;
+    for (int i = 0; i < inverse_diagonal_preconditioner.Size(); ++i) {
+      max_abs_diag = std::max(max_abs_diag, std::abs(inverse_diagonal_preconditioner[i]));
+    }
+
+    const double floor = nonlinear_options.pcg_diagonal_floor * max_abs_diag;
+    SLIC_ERROR_ROOT_IF(!(floor > 0.0), "Cannot invert a zero Jacobian diagonal for PCG-block preconditioning.");
+    for (int i = 0; i < inverse_diagonal_preconditioner.Size(); ++i) {
+      inverse_diagonal_preconditioner[i] = 1.0 / std::max(std::abs(inverse_diagonal_preconditioner[i]), floor);
+    }
+    diagonal_invert_seconds += secondsSince(invert_start);
+
+    use_inverse_diagonal_preconditioner = true;
+  }
+
+  /// Refresh the tangent and preconditioner used by the next PCG block attempt.
+  void refreshBlockOperators(const mfem::Vector& x) const
+  {
+    auto refresh_start = Clock::now();
+    if (jacobian_operator_factory) {
+      updateJacobianOperator(x);
+      ++num_preconditioner_updates;
+      if (nonlinear_options.pcg_use_jacobian_diagonal_preconditioner) {
+        updateDiagonalPreconditioner();
+      } else {
+        use_inverse_diagonal_preconditioner = false;
+        auto assembly_start = Clock::now();
+        ++num_jacobian_assembles;
+        assembled_jacobian_from_operator = current_jacobian_operator->assemble();
+        jacobian_assembly_seconds += secondsSince(assembly_start);
+        grad = assembled_jacobian_from_operator.get();
+        auto setup_start = Clock::now();
+        pcg_precond.SetOperator(*grad);
+        preconditioner_setup_seconds += secondsSince(setup_start);
+      }
+    } else {
+      SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_use_jacobian_diagonal_preconditioner,
+                         "PCG-block diagonal preconditioning requires a registered JacobianOperator.");
+      current_jacobian_operator.reset();
+      use_inverse_diagonal_preconditioner = false;
+      assembleJacobian(x);
+      ++num_preconditioner_updates;
+      auto setup_start = Clock::now();
+      pcg_precond.SetOperator(*grad);
+      preconditioner_setup_seconds += secondsSince(setup_start);
+    }
+    preconditioner_update_seconds += secondsSince(refresh_start);
+  }
+
   /// Apply the tangent at x to dx.
   void hessVec(const mfem::Vector& x, const mfem::Vector& dx, mfem::Vector& y) const
   {
     SMITH_MARK_FUNCTION;
+    auto start = Clock::now();
     ++num_hess_vecs;
-    if (jacobian_operator_factory) {
-      ++num_jacobian_operator_evals;
-      std::unique_ptr<JacobianOperator> jacobian_operator = jacobian_operator_factory(x);
-      SLIC_ERROR_ROOT_IF(!jacobian_operator, "JacobianOperator factory returned a null operator.");
-      jacobian_operator->Mult(dx, y);
+    if (current_jacobian_operator) {
+      current_jacobian_operator->Mult(dx, y);
+      const double seconds = secondsSince(start);
+      hess_vec_seconds += seconds;
+      jacobian_operator_hess_vec_seconds += seconds;
+    } else if (jacobian_operator_factory) {
+      updateJacobianOperator(x);
+      current_jacobian_operator->Mult(dx, y);
+      const double seconds = secondsSince(start);
+      hess_vec_seconds += seconds;
+      jacobian_operator_hess_vec_seconds += seconds;
     } else if (matrix_free_tangent_action) {
       matrix_free_tangent_action(x, dx, y);
+      const double seconds = secondsSince(start);
+      hess_vec_seconds += seconds;
+      matrix_free_hess_vec_seconds += seconds;
     } else {
       grad->Mult(dx, y);
+      const double seconds = secondsSince(start);
+      hess_vec_seconds += seconds;
+      assembled_hess_vec_seconds += seconds;
     }
   }
 
@@ -1030,8 +1992,19 @@ class PcgBlockSolver : public mfem::NewtonSolver {
   void precond(const mfem::Vector& x, mfem::Vector& v) const
   {
     SMITH_MARK_FUNCTION;
+    auto start = Clock::now();
     ++num_preconds;
-    pcg_precond.Mult(x, v);
+    if (use_inverse_diagonal_preconditioner) {
+      SLIC_ERROR_ROOT_IF(inverse_diagonal_preconditioner.Size() != x.Size(),
+                         "PCG-block diagonal preconditioner size does not match the residual vector.");
+      v.SetSize(x.Size());
+      for (int i = 0; i < x.Size(); ++i) {
+        v[i] = inverse_diagonal_preconditioner[i] * x[i];
+      }
+    } else {
+      pcg_precond.Mult(x, v);
+    }
+    preconditioner_seconds += secondsSince(start);
   }
 
   /// Return solver diagnostic counters.
@@ -1057,6 +2030,18 @@ class PcgBlockSolver : public mfem::NewtonSolver {
             .num_trust_capped_steps = num_trust_capped_steps,
             .num_accepted_steps = num_accepted_steps,
             .num_trial_steps = num_trial_steps,
+            .residual_seconds = residual_seconds,
+            .hess_vec_seconds = hess_vec_seconds,
+            .jacobian_operator_hess_vec_seconds = jacobian_operator_hess_vec_seconds,
+            .assembled_hess_vec_seconds = assembled_hess_vec_seconds,
+            .matrix_free_hess_vec_seconds = matrix_free_hess_vec_seconds,
+            .preconditioner_seconds = preconditioner_seconds,
+            .jacobian_operator_eval_seconds = jacobian_operator_eval_seconds,
+            .jacobian_assembly_seconds = jacobian_assembly_seconds,
+            .diagonal_assembly_seconds = diagonal_assembly_seconds,
+            .diagonal_invert_seconds = diagonal_invert_seconds,
+            .preconditioner_update_seconds = preconditioner_update_seconds,
+            .preconditioner_setup_seconds = preconditioner_setup_seconds,
             .final_h_scale = final_h_scale,
             .last_trust_ratio = last_trust_ratio};
   }
@@ -1093,6 +2078,22 @@ class PcgBlockSolver : public mfem::NewtonSolver {
     num_trial_steps = 0;
     final_h_scale = nonlinear_options.pcg_h_scale_init;
     last_trust_ratio = 0.0;
+    residual_seconds = 0.0;
+    hess_vec_seconds = 0.0;
+    jacobian_operator_hess_vec_seconds = 0.0;
+    assembled_hess_vec_seconds = 0.0;
+    matrix_free_hess_vec_seconds = 0.0;
+    preconditioner_seconds = 0.0;
+    jacobian_operator_eval_seconds = 0.0;
+    jacobian_assembly_seconds = 0.0;
+    diagonal_assembly_seconds = 0.0;
+    diagonal_invert_seconds = 0.0;
+    preconditioner_update_seconds = 0.0;
+    preconditioner_setup_seconds = 0.0;
+    current_jacobian_operator.reset();
+    assembled_jacobian_from_operator.reset();
+    inverse_diagonal_preconditioner.SetSize(0);
+    use_inverse_diagonal_preconditioner = false;
 
     SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_block_len <= 0, "PcgBlock requires pcg_block_len > 0");
     SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_window <= 0, "PcgBlock requires pcg_window > 0");
@@ -1210,9 +2211,7 @@ class PcgBlockSolver : public mfem::NewtonSolver {
         break;
       }
 
-      assembleJacobian(X);
-      ++num_preconditioner_updates;
-      pcg_precond.SetOperator(*grad);
+      refreshBlockOperators(X);
 
       r_block = r;
       const double norm_block = norm;
@@ -1460,6 +2459,8 @@ class PcgBlockSolver : public mfem::NewtonSolver {
 
           if (retries_remaining <= 0 || h_scale < nonlinear_options.pcg_min_h_scale) {
             block_finished = true;
+          } else {
+            refreshBlockOperators(X);
           }
         }
       }
@@ -1524,6 +2525,11 @@ void EquationSolver::setJacobianOperator(JacobianOperatorFactory jacobian_operat
   auto* pcg_block = dynamic_cast<PcgBlockSolver*>(nonlin_solver_.get());
   if (pcg_block) {
     pcg_block->setJacobianOperator(std::move(jacobian_operator));
+    return;
+  }
+  auto* trust_region = dynamic_cast<TrustRegion*>(nonlin_solver_.get());
+  if (trust_region) {
+    trust_region->setJacobianOperator(std::move(jacobian_operator));
   }
 }
 
@@ -1545,6 +2551,15 @@ std::optional<PcgBlockDiagnostics> EquationSolver::pcgBlockDiagnostics() const
   return pcg_block->diagnostics();
 }
 
+std::optional<TrustRegionDiagnostics> EquationSolver::trustRegionDiagnostics() const
+{
+  auto* trust_region = dynamic_cast<const TrustRegion*>(nonlin_solver_.get());
+  if (!trust_region) {
+    return std::nullopt;
+  }
+  return trust_region->diagnostics();
+}
+
 void SuperLUSolver::Mult(const mfem::Vector& input, mfem::Vector& output) const
 {
   SLIC_ERROR_ROOT_IF(!superlu_mat_, "Operator must be set prior to solving with SuperLU");
diff --git a/src/smith/numerics/equation_solver.hpp b/src/smith/numerics/equation_solver.hpp
index 8d67cc64a5..6100fad73f 100644
--- a/src/smith/numerics/equation_solver.hpp
+++ b/src/smith/numerics/equation_solver.hpp
@@ -146,12 +146,192 @@ struct PcgBlockDiagnostics {
   size_t num_accepted_steps = 0;
   /// Number of trial inner PCG steps
   size_t num_trial_steps = 0;
+  /// Time spent evaluating nonlinear residuals
+  double residual_seconds = 0.0;
+  /// Time spent applying Jacobian-vector products
+  double hess_vec_seconds = 0.0;
+  /// Time spent applying JacobianOperator products
+  double jacobian_operator_hess_vec_seconds = 0.0;
+  /// Time spent applying assembled Jacobian products
+  double assembled_hess_vec_seconds = 0.0;
+  /// Time spent applying legacy matrix-free tangent products
+  double matrix_free_hess_vec_seconds = 0.0;
+  /// Time spent applying preconditioners
+  double preconditioner_seconds = 0.0;
+  /// Time spent evaluating JacobianOperator factories
+  double jacobian_operator_eval_seconds = 0.0;
+  /// Time spent assembling sparse Jacobians
+  double jacobian_assembly_seconds = 0.0;
+  /// Time spent directly assembling diagonals
+  double diagonal_assembly_seconds = 0.0;
+  /// Time spent inverting direct diagonals
+  double diagonal_invert_seconds = 0.0;
+  /// Time spent refreshing preconditioner data
+  double preconditioner_update_seconds = 0.0;
+  /// Time spent in preconditioner SetOperator calls
+  double preconditioner_setup_seconds = 0.0;
   /// Last trust scale used by the solver
   double final_h_scale = 1.0;
   /// Last accepted block trust ratio
   double last_trust_ratio = 0.0;
 };
 
+/// Diagnostic counters for the TrustRegion nonlinear solver
+struct TrustRegionDiagnostics {
+  /// Number of nonlinear residual evaluations
+  size_t num_residuals = 0;
+  /// Number of Jacobian-vector products
+  size_t num_hess_vecs = 0;
+  /// Number of Hessian-vector products in model CG solves
+  size_t num_model_hess_vecs = 0;
+  /// Number of Hessian-vector products in Cauchy-point construction
+  size_t num_cauchy_hess_vecs = 0;
+  /// Number of Hessian-vector products in line-search model checks
+  size_t num_line_search_hess_vecs = 0;
+  /// Number of preconditioner applications
+  size_t num_preconds = 0;
+  /// Number of assembled Jacobians
+  size_t num_jacobian_assembles = 0;
+  /// Number of solver-facing JacobianOperator evaluations
+  size_t num_jacobian_operator_evals = 0;
+  /// Number of direct diagonal assemblies
+  size_t num_diagonal_assembles = 0;
+  /// Number of trust-region model CG iterations
+  size_t num_cg_iterations = 0;
+  /// Number of subspace solves
+  size_t num_subspace_solves = 0;
+  /// Number of retained-leftmost Hessian-vector products for subspace solves
+  size_t num_subspace_leftmost_hess_vecs = 0;
+  /// Number of batched Hessian-vector groups used for subspace solves
+  size_t num_subspace_hess_vec_batches = 0;
+  /// Number of Hessian-vector products inside subspace batches
+  size_t num_subspace_batched_hess_vecs = 0;
+  /// Number of accepted-step history vectors added to subspace solves
+  size_t num_subspace_past_step_vectors = 0;
+  /// Number of Hessian-vector products for accepted-step history vectors
+  size_t num_subspace_past_step_hess_vecs = 0;
+  /// Number of nonlinear-solve-start directions added to subspace solves
+  size_t num_subspace_solve_start_vectors = 0;
+  /// Number of Hessian-vector products for nonlinear-solve-start directions
+  size_t num_subspace_solve_start_hess_vecs = 0;
+  /// Number of quadratic subspace backend solves
+  size_t num_quadratic_subspace_solves = 0;
+  /// Number of cubic subspace backend attempts
+  size_t num_cubic_subspace_attempts = 0;
+  /// Number of cubic subspace attempts that used the cubic candidate
+  size_t num_cubic_subspace_uses = 0;
+  /// Number of cubic subspace attempts that fell back to the quadratic candidate
+  size_t num_cubic_subspace_quadratic_fallbacks = 0;
+  /// Number of preconditioner operator updates
+  size_t num_preconditioner_updates = 0;
+  /// Number of nonmonotone accepted TrustRegion steps based on work surrogate
+  size_t num_nonmonotone_work_accepts = 0;
+  /// Number of accepted TrustRegion work-surrogate steps that monotone acceptance would have rejected
+  size_t num_monotone_work_would_reject = 0;
+  /// Time spent evaluating nonlinear residuals
+  double residual_seconds = 0.0;
+  /// Time spent applying Jacobian-vector products
+  double hess_vec_seconds = 0.0;
+  /// Time spent applying Hessian-vector products in model CG solves
+  double model_hess_vec_seconds = 0.0;
+  /// Time spent applying Hessian-vector products in Cauchy-point construction
+  double cauchy_hess_vec_seconds = 0.0;
+  /// Time spent applying Hessian-vector products in line-search model checks
+  double line_search_hess_vec_seconds = 0.0;
+  /// Time spent applying JacobianOperator products
+  double jacobian_operator_hess_vec_seconds = 0.0;
+  /// Time spent evaluating JacobianOperator factories
+  double jacobian_operator_eval_seconds = 0.0;
+  /// Time spent directly assembling diagonals
+  double diagonal_assembly_seconds = 0.0;
+  /// Time spent inverting direct diagonals
+  double diagonal_invert_seconds = 0.0;
+  /// Time spent applying preconditioners
+  double preconditioner_seconds = 0.0;
+  /// Total time spent in the nonlinear solve
+  double total_seconds = 0.0;
+  /// Time spent solving trust-region model problems
+  double model_solve_seconds = 0.0;
+  /// Total time spent in trust-region subspace solves
+  double subspace_seconds = 0.0;
+  /// Time spent building/applying retained leftmost directions for subspace solves
+  double subspace_leftmost_seconds = 0.0;
+  /// Time spent in subspace Hessian-vector batches
+  double subspace_hess_vec_batch_seconds = 0.0;
+  /// Time spent removing dependent directions before subspace solves
+  double subspace_filter_seconds = 0.0;
+  /// Time spent in dense subspace backend assembly/solve work
+  double subspace_backend_seconds = 0.0;
+  /// Time spent projecting dense subspace Hessian
+  double subspace_project_A_seconds = 0.0;
+  /// Time spent projecting dense subspace Gram matrix
+  double subspace_project_gram_seconds = 0.0;
+  /// Time spent projecting dense subspace gradient
+  double subspace_project_b_seconds = 0.0;
+  /// Time spent building dense subspace orthonormal basis
+  double subspace_basis_seconds = 0.0;
+  /// Time spent forming reduced dense Hessian
+  double subspace_reduced_A_seconds = 0.0;
+  /// Time spent in dense subspace eigensystems
+  double subspace_dense_eigensystem_seconds = 0.0;
+  /// Time spent in dense trust-region solve outside eigensystems
+  double subspace_dense_trust_solve_seconds = 0.0;
+  /// Time spent reconstructing full-space subspace solution
+  double subspace_reconstruct_solution_seconds = 0.0;
+  /// Time spent reconstructing retained leftmost vectors
+  double subspace_reconstruct_leftmost_seconds = 0.0;
+  /// Time spent in subspace postprocessing and model-energy comparison
+  double subspace_finalize_seconds = 0.0;
+  /// Time spent building the Cauchy point
+  double cauchy_point_seconds = 0.0;
+  /// Time spent in dogleg step construction
+  double dogleg_seconds = 0.0;
+  /// Time spent in line-search and trust-radius acceptance logic
+  double line_search_seconds = 0.0;
+  /// Time spent in TrustRegion dot products
+  double dot_seconds = 0.0;
+  /// Number of TrustRegion dot products
+  size_t num_dot_products = 0;
+  /// Number of TrustRegion dot batches/reductions
+  size_t num_dot_reductions = 0;
+  /// Number of dot products in trust-region model solves
+  size_t num_model_dot_products = 0;
+  /// Number of dot products in Cauchy-point construction
+  size_t num_cauchy_dot_products = 0;
+  /// Number of dot products in dogleg construction
+  size_t num_dogleg_dot_products = 0;
+  /// Number of dot products in line-search and acceptance logic
+  size_t num_line_search_dot_products = 0;
+  /// Number of setup dot products outside the main per-step kernels
+  size_t num_setup_dot_products = 0;
+  /// Time spent in trust-region model-solve dot products
+  double model_dot_seconds = 0.0;
+  /// Time spent in Cauchy-point dot products
+  double cauchy_dot_seconds = 0.0;
+  /// Time spent in dogleg dot products
+  double dogleg_dot_seconds = 0.0;
+  /// Time spent in line-search dot products
+  double line_search_dot_seconds = 0.0;
+  /// Time spent in setup dot products
+  double setup_dot_seconds = 0.0;
+  /// Time spent in TrustRegion vector add/update operations
+  double vector_update_seconds = 0.0;
+  /// Time spent in TrustRegion vector copies and scaling operations
+  double vector_copy_scale_seconds = 0.0;
+  /// Time spent in TrustRegion boundary projection operations
+  double projection_seconds = 0.0;
+  /// Time spent assembling sparse Jacobians
+  double jacobian_assembly_seconds = 0.0;
+  /// Time spent refreshing preconditioner data
+  double preconditioner_update_seconds = 0.0;
+  /// Time spent in preconditioner SetOperator calls
+  double preconditioner_setup_seconds = 0.0;
+  /// Last TrustRegion accumulated work-surrogate level used by nonmonotone acceptance
+  double last_work_objective = 0.0;
+  /// Last nonmonotone reference work-surrogate level
+  double last_nonmonotone_work_reference = 0.0;
+};
+
 /**
  * @brief This class manages the objects typically required to solve a nonlinear set of equations arising from
  * discretization of a PDE of the form F(x) = 0. Specifically, it has
@@ -247,6 +427,12 @@ class EquationSolver {
    */
   std::optional<PcgBlockDiagnostics> pcgBlockDiagnostics() const;
 
+  /**
+   * Returns diagnostic counters when the nonlinear solver is TrustRegion.
+   * @return Optional TrustRegion diagnostics; empty for other nonlinear solvers
+   */
+  std::optional<TrustRegionDiagnostics> trustRegionDiagnostics() const;
+
   /**
    * Returns the underlying linear solver object
    * @return A non-owning reference to the underlying linear solver
diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp
new file mode 100644
index 0000000000..454cb81d2d
--- /dev/null
+++ b/src/smith/numerics/mfem_trust_region_subspace.cpp
@@ -0,0 +1,589 @@
+// Copyright (c) Lawrence Livermore National Security, LLC and
+// other Smith Project Developers. See the top-level LICENSE file for
+// details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+
+#include "smith/numerics/trust_region_solver.hpp"
+
+#include <algorithm>
+#include <chrono>
+#include <cmath>
+#include <vector>
+
+#include "smith/infrastructure/profiling.hpp"
+
+namespace smith {
+
+namespace {
+
+using Clock = std::chrono::steady_clock;
+
+double secondsSince(Clock::time_point start)
+{
+  return std::chrono::duration_cast<std::chrono::duration<double>>(Clock::now() - start).count();
+}
+
+TrustRegionSubspaceTimings& mutableTrustRegionSubspaceTimings()
+{
+  static TrustRegionSubspaceTimings timings;
+  return timings;
+}
+
+}  // namespace
+
+void resetTrustRegionSubspaceTimings()
+{
+  mutableTrustRegionSubspaceTimings() = TrustRegionSubspaceTimings {};
+}
+
+TrustRegionSubspaceTimings trustRegionSubspaceTimings()
+{
+  return mutableTrustRegionSubspaceTimings();
+}
+
+int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm)
+{
+  int local_size = parallel_v.Size();
+  int global_size;
+  MPI_Allreduce(&local_size, &global_size, 1, MPI_INT, MPI_SUM, comm);
+  return global_size;
+}
+
+double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm& comm)
+{
+  return mfem::InnerProduct(comm, a, b);
+}
+
+std::pair<std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>> removeDependentDirections(
+    std::vector<const mfem::Vector*> directions, std::vector<const mfem::Vector*> A_directions)
+{
+  SMITH_MARK_FUNCTION;
+  std::vector<double> norms;
+  size_t num_dirs = directions.size();
+
+  for (size_t i = 0; i < num_dirs; ++i) {
+    norms.push_back(std::sqrt(mfem::InnerProduct(MPI_COMM_WORLD, *directions[i], *directions[i])));
+  }
+
+  std::vector<std::pair<const mfem::Vector*, size_t>> kepts;
+  for (size_t i = 0; i < num_dirs; ++i) {
+    bool keepi = true;
+    if (norms[i] == 0) keepi = false;
+    for (auto&& kept_and_j : kepts) {
+      size_t j = kept_and_j.second;
+      double dot_ij = mfem::InnerProduct(MPI_COMM_WORLD, *directions[i], *kept_and_j.first);
+      if (dot_ij > 0.999 * norms[i] * norms[j]) {
+        keepi = false;
+      }
+    }
+    if (keepi) {
+      kepts.emplace_back(std::make_pair(directions[i], i));
+    }
+  }
+
+  std::vector<const mfem::Vector*> directions_new;
+  std::vector<const mfem::Vector*> A_directions_new;
+
+  for (auto kept_and_j : kepts) {
+    directions_new.push_back(directions[kept_and_j.second]);
+    A_directions_new.push_back(A_directions[kept_and_j.second]);
+  }
+
+  return std::make_pair(directions_new, A_directions_new);
+}
+
+std::tuple<std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>>
+removeDependentDirectionTriples(std::vector<const mfem::Vector*> directions,
+                                std::vector<const mfem::Vector*> A_directions,
+                                std::vector<const mfem::Vector*> previous_A_directions)
+{
+  SMITH_MARK_FUNCTION;
+  MFEM_VERIFY(directions.size() == A_directions.size() && directions.size() == previous_A_directions.size(),
+              "Direction triple lists must have matching sizes.");
+
+  std::vector<double> norms;
+  size_t num_dirs = directions.size();
+
+  for (size_t i = 0; i < num_dirs; ++i) {
+    norms.push_back(std::sqrt(mfem::InnerProduct(MPI_COMM_WORLD, *directions[i], *directions[i])));
+  }
+
+  std::vector<std::pair<const mfem::Vector*, size_t>> kepts;
+  for (size_t i = 0; i < num_dirs; ++i) {
+    bool keepi = norms[i] != 0.0;
+    for (auto&& kept_and_j : kepts) {
+      size_t j = kept_and_j.second;
+      double dot_ij = mfem::InnerProduct(MPI_COMM_WORLD, *directions[i], *kept_and_j.first);
+      if (dot_ij > 0.999 * norms[i] * norms[j]) {
+        keepi = false;
+      }
+    }
+    if (keepi) {
+      kepts.emplace_back(std::make_pair(directions[i], i));
+    }
+  }
+
+  std::vector<const mfem::Vector*> directions_new;
+  std::vector<const mfem::Vector*> A_directions_new;
+  std::vector<const mfem::Vector*> previous_A_directions_new;
+
+  for (auto kept_and_j : kepts) {
+    directions_new.push_back(directions[kept_and_j.second]);
+    A_directions_new.push_back(A_directions[kept_and_j.second]);
+    previous_A_directions_new.push_back(previous_A_directions[kept_and_j.second]);
+  }
+
+  return std::make_tuple(directions_new, A_directions_new, previous_A_directions_new);
+}
+
+#ifdef MFEM_USE_LAPACK
+
+TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
+                                               const std::vector<const mfem::Vector*>& A_directions,
+                                               const mfem::Vector& b, double delta, int num_leftmost)
+{
+  return solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost);
+}
+
+namespace {
+
+double dot(const mfem::Vector& a, const mfem::Vector& b)
+{
+  return a * b;
+}
+
+double norm(const mfem::Vector& x)
+{
+  return x.Norml2();
+}
+
+mfem::Vector operator+(const mfem::Vector& x, double value)
+{
+  mfem::Vector out(x);
+  for (int i = 0; i < out.Size(); ++i) {
+    out[i] += value;
+  }
+  return out;
+}
+
+mfem::Vector pointwiseMultiply(const mfem::Vector& a, const mfem::Vector& b)
+{
+  mfem::Vector out(a.Size());
+  for (int i = 0; i < a.Size(); ++i) {
+    out[i] = a[i] * b[i];
+  }
+  return out;
+}
+
+mfem::Vector pointwiseDivide(const mfem::Vector& a, const mfem::Vector& b)
+{
+  mfem::Vector out(a.Size());
+  for (int i = 0; i < a.Size(); ++i) {
+    out[i] = a[i] / b[i];
+  }
+  return out;
+}
+
+double sumAbs(const mfem::Vector& x)
+{
+  double total = 0.0;
+  for (int i = 0; i < x.Size(); ++i) {
+    total += std::abs(x[i]);
+  }
+  return total;
+}
+
+double sum(const mfem::Vector& x)
+{
+  double total = 0.0;
+  for (int i = 0; i < x.Size(); ++i) {
+    total += x[i];
+  }
+  return total;
+}
+
+void symmetrize(mfem::DenseMatrix& A)
+{
+  MFEM_VERIFY(A.Height() == A.Width(), "symmetrize requires square matrix");
+  for (int i = 0; i < A.Height(); ++i) {
+    for (int j = 0; j < i; ++j) {
+      const double value = 0.5 * (A(i, j) + A(j, i));
+      A(i, j) = value;
+      A(j, i) = value;
+    }
+  }
+}
+
+struct SubspaceProjections {
+  mfem::DenseMatrix sAs;
+  mfem::DenseMatrix ss;
+  mfem::Vector sb;
+};
+
+SubspaceProjections denseSubspaceProjections(const std::vector<const mfem::Vector*>& states,
+                                             const std::vector<const mfem::Vector*>& Astates, const mfem::Vector& b)
+{
+  MFEM_VERIFY(states.size() == Astates.size(),
+              "Search directions and their linear operator result must have same number of columns");
+  MFEM_VERIFY(!states.empty(), "Subspace projections require at least one direction.");
+
+  const int n = static_cast<int>(states.size());
+  const int vector_size = states[0]->Size();
+  for (int j = 0; j < n; ++j) {
+    MFEM_VERIFY(states[size_t(j)]->Size() == vector_size, "Subspace direction sizes differ.");
+    MFEM_VERIFY(Astates[size_t(j)]->Size() == vector_size, "Subspace Hessian-vector sizes differ.");
+  }
+  MFEM_VERIFY(b.Size() == vector_size, "Subspace right-hand-side size differs.");
+
+  const int triangular_size = n * (n + 1) / 2;
+  const auto triangular_index = [n](int i, int j) {
+    return i * n - (i * (i - 1)) / 2 + (j - i);
+  };
+  const int sAs_offset = 0;
+  const int ss_offset = triangular_size;
+  const int sb_offset = 2 * triangular_size;
+  const int buffer_size = 2 * triangular_size + n;
+  std::vector<mfem::real_t> local(size_t(buffer_size), 0.0);
+  std::vector<mfem::real_t> global(size_t(buffer_size), 0.0);
+
+  for (int k = 0; k < vector_size; ++k) {
+    const double b_k = b[k];
+    for (int i = 0; i < n; ++i) {
+      const double s_i = (*states[size_t(i)])[k];
+      local[size_t(sb_offset + i)] += s_i * b_k;
+      for (int j = i; j < n; ++j) {
+        const size_t ij = size_t(triangular_index(i, j));
+        local[size_t(sAs_offset) + ij] += s_i * (*Astates[size_t(j)])[k];
+        local[size_t(ss_offset) + ij] += s_i * (*states[size_t(j)])[k];
+      }
+    }
+  }
+
+  MPI_Allreduce(local.data(), global.data(), buffer_size, MFEM_MPI_REAL_T, MPI_SUM, MPI_COMM_WORLD);
+
+  SubspaceProjections projections{mfem::DenseMatrix(n), mfem::DenseMatrix(n), mfem::Vector(n)};
+  for (int i = 0; i < n; ++i) {
+    projections.sb[i] = global[size_t(sb_offset + i)];
+    for (int j = i; j < n; ++j) {
+      const size_t ij = size_t(triangular_index(i, j));
+      projections.sAs(i, j) = global[size_t(sAs_offset) + ij];
+      projections.sAs(j, i) = projections.sAs(i, j);
+      projections.ss(i, j) = global[size_t(ss_offset) + ij];
+      projections.ss(j, i) = projections.ss(i, j);
+    }
+  }
+
+  return projections;
+}
+
+mfem::Vector solveDense(const mfem::DenseMatrix& A, const mfem::Vector& b)
+{
+  mfem::DenseMatrix A_copy(A);
+  mfem::DenseMatrixInverse inv(A_copy);
+  mfem::Vector x(b.Size());
+  inv.Mult(b, x);
+  return x;
+}
+
+double quadraticEnergy(const mfem::DenseMatrix& A, const mfem::Vector& b, const mfem::Vector& x)
+{
+  mfem::Vector Ax(x.Size());
+  A.Mult(x, Ax);
+  return 0.5 * dot(x, Ax) - dot(x, b);
+}
+
+double pnormSquared(const mfem::Vector& bvv, const mfem::Vector& sig)
+{
+  return sum(pointwiseDivide(bvv, pointwiseMultiply(sig, sig)));
+}
+
+double qnormSquared(const mfem::Vector& bvv, const mfem::Vector& sig)
+{
+  mfem::Vector sig_sq = pointwiseMultiply(sig, sig);
+  mfem::Vector sig_cu = pointwiseMultiply(sig_sq, sig);
+  return sum(pointwiseDivide(bvv, sig_cu));
+}
+
+mfem::Vector matrixColumn(const mfem::DenseMatrix& A, int j)
+{
+  mfem::Vector col(A.Height());
+  for (int i = 0; i < A.Height(); ++i) {
+    col[i] = A(i, j);
+  }
+  return col;
+}
+
+mfem::DenseMatrix columnsToMatrix(const std::vector<mfem::Vector>& cols)
+{
+  mfem::DenseMatrix A(cols.empty() ? 0 : cols[0].Size(), static_cast<int>(cols.size()));
+  for (int j = 0; j < A.Width(); ++j) {
+    for (int i = 0; i < A.Height(); ++i) {
+      A(i, j) = cols[size_t(j)][i];
+    }
+  }
+  return A;
+}
+
+std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> exactTrustRegionSolve(
+    mfem::DenseMatrix A, const mfem::Vector& b, double delta, int num_leftmost)
+{
+  auto dense_solve_start = Clock::now();
+  if (A.Height() != A.Width()) {
+    throw PetscException("Exact trust region solver requires square matrices");
+  }
+  if (A.Height() != b.Size()) {
+    throw PetscException("The right hand size for exact trust region solve must be consistent with the input matrix size");
+  }
+
+  mfem::Vector sigs;
+  mfem::DenseMatrix V;
+  auto eig_start = Clock::now();
+  A.Eigensystem(sigs, V);
+  mutableTrustRegionSubspaceTimings().dense_eigensystem_seconds += secondsSince(eig_start);
+
+  std::vector<mfem::Vector> leftmosts;
+  std::vector<double> minsigs;
+  const int num_leftmost_possible = std::min(num_leftmost, sigs.Size());
+  for (int i = 0; i < num_leftmost_possible; ++i) {
+    leftmosts.emplace_back(matrixColumn(V, i));
+    minsigs.emplace_back(sigs[i]);
+  }
+
+  const mfem::Vector leftMost = matrixColumn(V, 0);
+  const double minSig = sigs[0];
+
+  mfem::Vector bv(sigs.Size());
+  for (int i = 0; i < sigs.Size(); ++i) {
+    const mfem::Vector vi = matrixColumn(V, i);
+    bv[i] = dot(vi, b);
+  }
+
+  mfem::Vector bvOverSigs = pointwiseDivide(bv, sigs);
+  const double sigScale = sumAbs(sigs) / sigs.Size();
+  const double eps = 1e-12 * sigScale;
+
+  if ((minSig >= eps) && (norm(bvOverSigs) <= delta)) {
+    mutableTrustRegionSubspaceTimings().dense_trust_solve_seconds += secondsSince(dense_solve_start);
+    return std::make_tuple(solveDense(A, b), leftmosts, minsigs, true);
+  }
+
+  double lam = minSig < eps ? -minSig + eps : 0.0;
+  mfem::Vector sigsPlusLam = sigs + lam;
+  bvOverSigs = pointwiseDivide(bv, sigsPlusLam);
+
+  if ((minSig < eps) && (norm(bvOverSigs) < delta)) {
+    mfem::Vector p(b.Size());
+    p = 0.0;
+    for (int i = 0; i < b.Size(); ++i) {
+      const mfem::Vector vi = matrixColumn(V, i);
+      p.Add(bv[i], vi);
+    }
+
+    const double pz = dot(p, leftMost);
+    const double pp = dot(p, p);
+    const double ddmpp = std::max(delta * delta - pp, 0.0);
+
+    const double tau1 = -pz + std::sqrt(pz * pz + ddmpp);
+    const double tau2 = -pz - std::sqrt(pz * pz + ddmpp);
+
+    mfem::Vector x1(p);
+    mfem::Vector x2(p);
+    x1.Add(tau1, leftMost);
+    x2.Add(tau2, leftMost);
+
+    const double e1 = quadraticEnergy(A, b, x1);
+    const double e2 = quadraticEnergy(A, b, x2);
+
+    mutableTrustRegionSubspaceTimings().dense_trust_solve_seconds += secondsSince(dense_solve_start);
+    return std::make_tuple(e1 < e2 ? x1 : x2, leftmosts, minsigs, true);
+  }
+
+  const mfem::Vector bvbv = pointwiseMultiply(bv, bv);
+  sigsPlusLam = sigs + lam;
+
+  double pNormSq = pnormSquared(bvbv, sigsPlusLam);
+  double pNorm = std::sqrt(pNormSq);
+  double bError = (pNorm - delta) / delta;
+
+  size_t iters = 0;
+  constexpr size_t maxIters = 30;
+  while ((std::abs(bError) > 1e-9) && (iters++ < maxIters)) {
+    const double qNormSq = qnormSquared(bvbv, sigsPlusLam);
+    lam += (pNormSq / qNormSq) * bError;
+    sigsPlusLam = sigs + lam;
+    pNormSq = pnormSquared(bvbv, sigsPlusLam);
+    pNorm = std::sqrt(pNormSq);
+    bError = (pNorm - delta) / delta;
+  }
+
+  const bool success = iters < maxIters;
+
+  bvOverSigs = pointwiseDivide(bv, sigsPlusLam);
+
+  mfem::Vector x(b.Size());
+  x = 0.0;
+  for (int i = 0; i < b.Size(); ++i) {
+    const mfem::Vector vi = matrixColumn(V, i);
+    x.Add(bvOverSigs[i], vi);
+  }
+
+  const double e1 = quadraticEnergy(A, b, x);
+  mfem::Vector neg_x(x);
+  neg_x *= -1.0;
+  const double e2 = quadraticEnergy(A, b, neg_x);
+
+  x *= (e2 < e1 ? -delta : delta) / norm(x);
+
+  mutableTrustRegionSubspaceTimings().dense_trust_solve_seconds += secondsSince(dense_solve_start);
+  return std::make_tuple(x, leftmosts, minsigs, success);
+}
+
+mfem::DenseMatrix orthonormalBasisTransform(const mfem::DenseMatrix& gram, double& trace_mag)
+{
+  mfem::DenseMatrix gram_copy(gram);
+  mfem::Vector evals;
+  mfem::DenseMatrix evecs;
+  gram_copy.Eigensystem(evals, evecs);
+
+  trace_mag = 0.0;
+  for (int i = 0; i < evals.Size(); ++i) {
+    trace_mag += std::abs(evals[i]);
+  }
+
+  std::vector<mfem::Vector> kept_columns;
+  for (int i = 0; i < evals.Size(); ++i) {
+    if (evals[i] > 1e-9 * trace_mag) {
+      mfem::Vector col = matrixColumn(evecs, i);
+      col /= std::sqrt(evals[i]);
+      kept_columns.emplace_back(std::move(col));
+    }
+  }
+
+  return columnsToMatrix(kept_columns);
+}
+
+mfem::DenseMatrix tripleProduct(const mfem::DenseMatrix& L, const mfem::DenseMatrix& A, const mfem::DenseMatrix& R)
+{
+  mfem::DenseMatrix tmp(A.Height(), R.Width());
+  mfem::Mult(A, R, tmp);
+  mfem::DenseMatrix out(L.Width(), R.Width());
+  mfem::MultAtB(L, tmp, out);
+  return out;
+}
+
+mfem::Vector projectWithTranspose(const mfem::DenseMatrix& A, const mfem::Vector& x)
+{
+  mfem::Vector out(A.Width());
+  A.MultTranspose(x, out);
+  return out;
+}
+
+mfem::Vector combineDirections(const std::vector<const mfem::Vector*>& states, const mfem::Vector& coeffs)
+{
+  mfem::Vector out(*states[0]);
+  out = 0.0;
+  for (int i = 0; i < coeffs.Size(); ++i) {
+    out.Add(coeffs[i], *states[size_t(i)]);
+  }
+  return out;
+}
+
+}  // namespace
+
+TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem::Vector*>& states,
+                                                   const std::vector<const mfem::Vector*>& Astates,
+                                                   const mfem::Vector& b, double delta, int num_leftmost)
+{
+  SMITH_MARK_FUNCTION;
+  auto& timings = mutableTrustRegionSubspaceTimings();
+  ++timings.num_solves;
+  timings.total_input_dim += states.size();
+  timings.max_input_dim = std::max(timings.max_input_dim, states.size());
+
+  auto project_A_start = Clock::now();
+  SubspaceProjections projections = denseSubspaceProjections(states, Astates, b);
+  mfem::DenseMatrix& sAs = projections.sAs;
+  timings.project_A_seconds += secondsSince(project_A_start);
+  symmetrize(sAs);
+
+  for (int i = 0; i < sAs.Height(); ++i) {
+    for (int j = 0; j < sAs.Width(); ++j) {
+      if (std::isnan(sAs(i, j))) {
+        throw PetscException("States in subspace solve contain NaNs.");
+      }
+    }
+  }
+
+  auto project_gram_start = Clock::now();
+  mfem::DenseMatrix& ss = projections.ss;
+  timings.project_gram_seconds += secondsSince(project_gram_start);
+  symmetrize(ss);
+
+  double trace_mag = 0.0;
+  auto basis_start = Clock::now();
+  mfem::DenseMatrix T = orthonormalBasisTransform(ss, trace_mag);
+  timings.basis_seconds += secondsSince(basis_start);
+  if (T.Width() == 0) {
+    throw PetscException("No independent directions in MFEM subspace solve.");
+  }
+  timings.total_reduced_dim += static_cast<size_t>(T.Width());
+  timings.max_reduced_dim = std::max(timings.max_reduced_dim, static_cast<size_t>(T.Width()));
+
+  auto reduced_A_start = Clock::now();
+  mfem::DenseMatrix pAp = tripleProduct(T, sAs, T);
+  timings.reduced_A_seconds += secondsSince(reduced_A_start);
+  symmetrize(pAp);
+
+  auto project_b_start = Clock::now();
+  const mfem::Vector& sb = projections.sb;
+  timings.project_b_seconds += secondsSince(project_b_start);
+  const mfem::Vector pb = projectWithTranspose(T, sb);
+
+  auto [reduced_x, leftvecs, leftvals, success] = exactTrustRegionSolve(pAp, pb, delta, num_leftmost);
+  (void)success;
+  const double energy = quadraticEnergy(pAp, pb, reduced_x);
+
+  auto reconstruct_solution_start = Clock::now();
+  mfem::Vector coeffs(T.Height());
+  T.Mult(reduced_x, coeffs);
+  mfem::Vector sol = combineDirections(states, coeffs);
+  timings.reconstruct_solution_seconds += secondsSince(reconstruct_solution_start);
+
+  auto reconstruct_leftmost_start = Clock::now();
+  std::vector<std::shared_ptr<mfem::Vector>> leftmosts;
+  for (const auto& leftvec : leftvecs) {
+    mfem::Vector left_coeffs(T.Height());
+    T.Mult(leftvec, left_coeffs);
+    leftmosts.emplace_back(std::make_shared<mfem::Vector>(combineDirections(states, left_coeffs)));
+  }
+  timings.reconstruct_leftmost_seconds += secondsSince(reconstruct_leftmost_start);
+
+  return std::make_tuple(sol, leftmosts, leftvals, energy);
+}
+
+#else
+
+TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
+                                               const std::vector<const mfem::Vector*>& A_directions,
+                                               const mfem::Vector& b, double delta, int num_leftmost)
+{
+#ifdef SMITH_USE_SLEPC
+  return solveSubspaceProblemPetsc(directions, A_directions, b, delta, num_leftmost);
+#else
+  throw PetscException("MFEM trust-region subspace solve requires MFEM LAPACK support.");
+  return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>> {}, std::vector<double> {}, 0.0);
+#endif
+}
+
+TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem::Vector*>&,
+                                                   const std::vector<const mfem::Vector*>&, const mfem::Vector& b,
+                                                   double, int)
+{
+  throw PetscException("MFEM trust-region subspace solve requires MFEM LAPACK support.");
+  return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>> {}, std::vector<double> {}, 0.0);
+}
+
+#endif  // MFEM_USE_LAPACK
+
+}  // namespace smith
diff --git a/src/smith/numerics/trust_region_solver.cpp b/src/smith/numerics/petsc_trust_region_subspace.cpp
similarity index 64%
rename from src/smith/numerics/trust_region_solver.cpp
rename to src/smith/numerics/petsc_trust_region_subspace.cpp
index 8d8d04a9cc..aac63c7cd1 100644
--- a/src/smith/numerics/trust_region_solver.cpp
+++ b/src/smith/numerics/petsc_trust_region_subspace.cpp
@@ -14,26 +14,10 @@
 #include "smith/numerics/dense_petsc.hpp"
 
 namespace smith {
-
-/**
- * @brief Get the global size of a mfem vector
- * @param parallel_v Vector to check global size
- * @param comm Parallel communicator
- */
-int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm)
-{
-  int local_size = parallel_v.Size();
-  int global_size;
-  MPI_Allreduce(&local_size, &global_size, 1, MPI_INT, MPI_SUM, comm);
-  return global_size;
-}
+namespace {
 
 /// @brief struct which aids in moving between mfem::Vector and petsc BV
 struct BasisVectors {
-  /**
-   * @brief Construct with a representative state to set sizes
-   * @param state The state which is used to set sizes for basis vectors
-   */
   BasisVectors(const mfem::Vector& state) : local_rows(state.Size()), global_rows(globalSize(state, PETSC_COMM_WORLD))
   {
     VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &v);
@@ -47,15 +31,8 @@ struct BasisVectors {
     }
   }
 
-  /**
-   * @brief Destructor
-   */
   ~BasisVectors() { VecDestroy(&v); }
 
-  /**
-   * @brief Construct petsc BV from vector of mfem::Vector
-   * @param states The states used to construct basis vectors
-   */
   BV constructBases(const std::vector<const mfem::Vector*>& states) const
   {
     size_t num_cols = states.size();
@@ -81,10 +58,6 @@ struct BasisVectors {
   Vec v;
 };
 
-/**
- * @brief Create a petsc vector from a mfem::Vector
- * @param state The state used to create an mfem::Vector
- */
 Vec petscVec(const mfem::Vector& state)
 {
   const int local_rows = state.Size();
@@ -110,11 +83,6 @@ Vec petscVec(const mfem::Vector& state)
   return v;
 }
 
-/**
- * @brief Copy a petsc vector to an mfem::Vector
- * @param v The petsc vector
- * @param s The mfem vector
- */
 void copy(const Vec& v, mfem::Vector& s)
 {
   const int local_rows = s.Size();
@@ -133,11 +101,6 @@ void copy(const Vec& v, mfem::Vector& s)
   VecGetValues(v, local_rows, &col_indices[0], &s[0]);
 }
 
-/**
- * @brief The reduced matrix in the space of {s}
- * @param s The vector of mfem::Vector of directions
- * @param As The vector of mfem::Vector of a global matrix A operated on directions
- */
 Mat dot(const std::vector<const mfem::Vector*>& s, const std::vector<const mfem::Vector*>& As)
 {
   SLIC_ERROR_IF(s.size() != As.size(),
@@ -157,11 +120,6 @@ Mat dot(const std::vector<const mfem::Vector*>& s, const std::vector<const mfem:
   return sAs;
 }
 
-/**
- * @brief The reduced vector s.T*b
- * @param s The vector of mfem::vector of directions
- * @param b The right hand size vector to be reduced
- */
 Vec dot(const std::vector<const mfem::Vector*>& s, const mfem::Vector& b)
 {
   size_t num_cols = s.size();
@@ -173,11 +131,6 @@ Vec dot(const std::vector<const mfem::Vector*>& s, const mfem::Vector& b)
   return sb;
 }
 
-/**
- * @brief The qr decomposition of the state vectors
- * @param states The vector of mfem::vectors of directions
- * @return Pair of BV Q and DenseMat R
- */
 auto qr(const std::vector<const mfem::Vector*>& states)
 {
   BasisVectors bvs(*states[0]);
@@ -193,13 +146,6 @@ auto qr(const std::vector<const mfem::Vector*>& states)
   return std::make_pair(Q, DenseMat(R));
 }
 
-/**
- * @brief compute the quadratic energy from small dense matrices and vectors
- * @param A The stiffness matrix
- * @param b The rhs vector
- * @param x The current solution vector
- * @return The quadratic, linearized energy approximation
- */
 double quadraticEnergy(const DenseMat& A, const DenseVec& b, const DenseVec& x)
 {
   DenseVec Ax = A * x;
@@ -208,47 +154,20 @@ double quadraticEnergy(const DenseMat& A, const DenseVec& b, const DenseVec& x)
   return 0.5 * xAx - xb;
 }
 
-/**
- * @brief compute the pnorm_squared
- * @param bvv input vector
- * @param sig eigenvectors
- */
 double pnorm_squared(const DenseVec& bvv, const DenseVec& sig)
 {
   auto bvv_div_sig_squared = bvv / (sig * sig);
   return sum(bvv_div_sig_squared);
 }
 
-/**
- * @brief compute the qnorm_squared
- * @param bvv input vector
- * @param sig eigenvectors
- */
 double qnorm_squared(const DenseVec& bvv, const DenseVec& sig)
 {
   auto bvv_div_sig_cubed = bvv / (sig * sig * sig);
   return sum(bvv_div_sig_cubed);
-  // return bvv.dot((1.0 / (sig * sig * sig)).matrix());
 }
 
-//  returns:
-//    minimum energy solution within delta
-//    N leftmost eigenvectors
-//    N smallest eigenvalue
-//    success status
-
-/**
- * @brief solve the trust region problem exactly using a variant of the Moore Sorensen algorithm
- * @param A matrix
- * @param b rhs
- * @param delta trust region radius
- * @param num_leftmost the number of leftmost eigenvector/values to output
- * returns the solution vector, a std::vector of leftmost vectors
- * a std::vector of leftmost eigenvalues and the energy change (relative to x=0)
- */
 auto exactTrustRegionSolve(DenseMat A, const DenseVec& b, double delta, int num_leftmost)
 {
-  // minimize 1/2 x^T A x - b^T x, s.t. norm(x) <= delta
   auto [isize, jsize] = A.size();
   auto isize2 = b.size();
   SLIC_ERROR_IF(isize != jsize, "Exact trust region solver requires square matrices");
@@ -267,7 +186,6 @@ auto exactTrustRegionSolve(DenseMat A, const DenseVec& b, double delta, int num_
   const auto& leftMost = V[0];
   double minSig = sigs[0];
 
-  // bv = V.T b, V has columns which are eigenvectors
   DenseVec bv(isize);
   for (size_t i = 0; i < size_t(isize); ++i) {
     bv.setValue(i, dot(V[i], b));
@@ -277,22 +195,16 @@ auto exactTrustRegionSolve(DenseMat A, const DenseVec& b, double delta, int num_
   double sigScale = sum(abs(sigs)) / isize;
   double eps = 1e-12 * sigScale;
 
-  // Check if solution is inside the trust region
   if ((minSig >= eps) && (norm(bvOverSigs) <= delta)) {
     return std::make_tuple(A.solve(b), leftmosts, minsigs, true);
   }
 
-  // if we get here, the solution must be on the tr boundary
-  // consider bounding the initial guess, see More' Sorenson paper
   double lam = minSig < eps ? -minSig + eps : 0.0;
 
-  // try to solve this for lam:
-  // (A + lam I)p = b, such that norm(p) = Delta
   DenseVec sigsPlusLam = sigs + lam;
 
   bvOverSigs = bv / sigsPlusLam;
 
-  // Check for the hard case
   if ((minSig < eps) && (norm(bvOverSigs) < delta)) {
     DenseVec p(isize);
     p = 0.0;
@@ -327,7 +239,6 @@ auto exactTrustRegionSolve(DenseMat A, const DenseVec& b, double delta, int num_
   double pNorm = std::sqrt(pNormSq);
   double bError = (pNorm - delta) / delta;
 
-  // consider an out if it doesn't converge, or use a better initial guess, or bound the lam from below and above.
   size_t iters = 0;
   size_t maxIters = 30;
   while ((std::abs(bError) > 1e-9) && (iters++ < maxIters)) {
@@ -364,7 +275,6 @@ auto exactTrustRegionSolve(DenseMat A, const DenseVec& b, double delta, int num_
   return std::make_tuple(x, leftmosts, minsigs, success);
 }
 
-/// @brief remove the vector at location j and return what is left
 std::vector<const mfem::Vector*> remove_at(const std::vector<const mfem::Vector*>& a, size_t j)
 {
   std::vector<const mfem::Vector*> b;
@@ -376,11 +286,11 @@ std::vector<const mfem::Vector*> remove_at(const std::vector<const mfem::Vector*
   return b;
 }
 
-/// @brief returns the solution, as well as a list of the N leftmost eigenvectors
-/// and their eigenvalues, and the predicted model energy change
-std::tuple<mfem::Vector, std::vector<std::shared_ptr<mfem::Vector>>, std::vector<double>, double> solveSubspaceProblem(
-    const std::vector<const mfem::Vector*>& states, const std::vector<const mfem::Vector*>& Astates,
-    const mfem::Vector& b, double delta, int num_leftmost)
+}  // namespace
+
+TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vector<const mfem::Vector*>& states,
+                                                    const std::vector<const mfem::Vector*>& Astates,
+                                                    const mfem::Vector& b, double delta, int num_leftmost)
 {
   SMITH_MARK_FUNCTION;
   DenseMat sAs1 = dot(states, Astates);
@@ -388,14 +298,12 @@ std::tuple<mfem::Vector, std::vector<std::shared_ptr<mfem::Vector>>, std::vector
 
   if (sAs.hasNan()) {
     throw PetscException("States in subspace solve contain NaNs.");
-    return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0);
   }
 
   auto [Q_parallel, R] = qr(states);
 
   if (R.hasNan()) {
     throw PetscException("R from qr returning with a NaN.");
-    return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0);
   }
 
   auto [rows, cols] = R.size();
@@ -406,13 +314,11 @@ std::tuple<mfem::Vector, std::vector<std::shared_ptr<mfem::Vector>>, std::vector
     trace_mag += std::abs(R(i, i));
   }
 
-  // remove any nearly colinear state
   for (int i = 0; i < rows; ++i) {
     if (R(i, i) < 1e-9 * trace_mag) {
-      // printf("removing after QR state number %d\n", i);
       auto statesNew = remove_at(states, size_t(i));
       auto AstatesNew = remove_at(Astates, size_t(i));
-      return solveSubspaceProblem(statesNew, AstatesNew, b, delta, num_leftmost);
+      return solveSubspaceProblemPetsc(statesNew, AstatesNew, b, delta, num_leftmost);
     }
   }
 
@@ -425,6 +331,7 @@ std::tuple<mfem::Vector, std::vector<std::shared_ptr<mfem::Vector>>, std::vector
   DenseVec pb(pb_vec);
 
   auto [reduced_x, leftvecs, leftvals, success] = exactTrustRegionSolve(pAp, pb, delta, num_leftmost);
+  (void)success;
 
   double energy = quadraticEnergy(pAp, pb, reduced_x);
 
@@ -450,47 +357,6 @@ std::tuple<mfem::Vector, std::vector<std::shared_ptr<mfem::Vector>>, std::vector
   return std::make_tuple(sol, leftmosts, leftvals, energy);
 }
 
-/// @brief Remove any obvious dependent directions, namely ones which are scaled version of previous directions
-/// The case where they are linear combinations of previous direction will be handled in the QR solver
-std::pair<std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>> removeDependentDirections(
-    std::vector<const mfem::Vector*> directions, std::vector<const mfem::Vector*> A_directions)
-{
-  SMITH_MARK_FUNCTION;
-  std::vector<double> norms;
-  size_t num_dirs = directions.size();
-
-  for (size_t i = 0; i < num_dirs; ++i) {
-    norms.push_back(std::sqrt(mfem::InnerProduct(PETSC_COMM_WORLD, *directions[i], *directions[i])));
-  }
-
-  std::vector<std::pair<const mfem::Vector*, size_t>> kepts;
-  for (size_t i = 0; i < num_dirs; ++i) {
-    bool keepi = true;
-    if (norms[i] == 0) keepi = false;
-    for (auto&& kept_and_j : kepts) {
-      size_t j = kept_and_j.second;
-      double dot_ij = mfem::InnerProduct(PETSC_COMM_WORLD, *directions[i], *kept_and_j.first);
-      if (dot_ij > 0.999 * norms[i] * norms[j]) {
-        keepi = false;
-      }
-    }
-    // if (!keepi) printf("not keeping %zu\n",i);
-    if (keepi) {
-      kepts.emplace_back(std::make_pair(directions[i], i));
-    }
-  }
-
-  std::vector<const mfem::Vector*> directions_new;
-  std::vector<const mfem::Vector*> A_directions_new;
-
-  for (auto kept_and_j : kepts) {
-    directions_new.push_back(directions[kept_and_j.second]);
-    A_directions_new.push_back(A_directions[kept_and_j.second]);
-  }
-
-  return std::make_pair(directions_new, A_directions_new);
-}
-
 }  // namespace smith
 
 #endif  // SMITH_USE_SLEPC
diff --git a/src/smith/numerics/solver_config.hpp b/src/smith/numerics/solver_config.hpp
index ecbfde4cd9..27635aeda3 100644
--- a/src/smith/numerics/solver_config.hpp
+++ b/src/smith/numerics/solver_config.hpp
@@ -465,12 +465,30 @@ struct NonlinearSolverOptions {
   /// Scaling for the initial trust region size
   double trust_region_scaling = 0.1;
 
+  /// Nonmonotone TrustRegion acceptance window. Zero preserves monotone acceptance.
+  int trust_nonmonotone_window = 0;
+
+  /// Use JacobianOperator products and diagonal preconditioning in TrustRegion instead of assembled sparse products.
+  bool trust_use_jacobian_operator = false;
+
+  /// Use a dense cubic subspace model built from retained Hessian-vector changes.
+  bool trust_use_cubic_subspace = false;
+
   /// Option for how when the subspace solver should be utilized within trust-region solver
   SubSpaceOptions subspace_option = SubSpaceOptions::NEVER;
 
   /// Number of extra leftmost eigenvector to be stored between solves
   int num_leftmost = 1;
 
+  /// Number of additional older accepted TrustRegion steps to include in subspace solves.
+  int trust_num_past_steps = 0;
+
+  /// Include the displacement from current nonlinear-solve state back to the nonlinear-solve initial state.
+  bool trust_use_solve_start_direction = false;
+
+  /// Include the displacement from current nonlinear-solve state to the state with the minimum residual seen so far in this nonlinear solve.
+  bool trust_use_min_residual_direction = false;
+
   /// Should the gradient be converted to a monolithic matrix
   bool force_monolithic = false;
 
@@ -518,6 +536,12 @@ struct NonlinearSolverOptions {
 
   /// Running-mean window for successful PCG-block trust-radius reference steps
   int pcg_delta_avg_window = 5;
+
+  /// Use a direct scalar diagonal extracted from the JacobianOperator as the PCG-block preconditioner
+  bool pcg_use_jacobian_diagonal_preconditioner = false;
+
+  /// Relative floor used when inverting the absolute Jacobian diagonal for PCG-block diagonal preconditioning
+  double pcg_diagonal_floor = 1e-14;
 };
 // _nonlinear_options_end
 
diff --git a/src/smith/numerics/tests/CMakeLists.txt b/src/smith/numerics/tests/CMakeLists.txt
index 10e693b21a..a2577051e2 100644
--- a/src/smith/numerics/tests/CMakeLists.txt
+++ b/src/smith/numerics/tests/CMakeLists.txt
@@ -13,6 +13,7 @@ set(numerics_serial_test_sources
     test_block_preconditioner.cpp
     test_block_preconditioner_backend.cpp
     test_block_preconditioner_custom_operators.cpp
+    test_trust_region_solver_mfem.cpp
     )
 
 smith_add_tests( SOURCES       ${numerics_serial_test_sources}
@@ -30,7 +31,7 @@ if(PETSC_FOUND)
     if(SLEPC_FOUND)
         set(slepc_solver_tests
             test_eigensolver.cpp
-            test_trust_region_solver.cpp
+            test_trust_region_solver_petsc.cpp
             )
         smith_add_tests(SOURCES       ${slepc_solver_tests}
                         DEPENDS_ON    ${numerics_test_dependencies}
diff --git a/src/smith/numerics/tests/test_equationsolver.cpp b/src/smith/numerics/tests/test_equationsolver.cpp
index f0b73bf53e..a534acd8f1 100644
--- a/src/smith/numerics/tests/test_equationsolver.cpp
+++ b/src/smith/numerics/tests/test_equationsolver.cpp
@@ -217,7 +217,10 @@ TEST(EquationSolver, PcgBlockUsesJacobianOperator)
 
     void Mult(const mfem::Vector& dx, mfem::Vector& y) const override { matrix_->Mult(dx, y); }
 
-    std::unique_ptr<mfem::HypreParMatrix> assemble() override { return std::move(matrix_); }
+    std::unique_ptr<mfem::HypreParMatrix> assemble() override
+    {
+      return std::make_unique<mfem::HypreParMatrix>(*matrix_);
+    }
 
     void assembleDiagonal(mfem::Vector& diag) const override { matrix_->GetDiag(diag); }
 
@@ -314,8 +317,8 @@ TEST(EquationSolver, PcgBlockUsesJacobianOperator)
   const auto diagnostics = eq_solver.pcgBlockDiagnostics();
   ASSERT_TRUE(diagnostics.has_value());
   EXPECT_GT(num_operator_evals, 0);
-  EXPECT_EQ(diagnostics->num_hess_vecs, static_cast<size_t>(num_operator_evals));
   EXPECT_EQ(diagnostics->num_jacobian_operator_evals, static_cast<size_t>(num_operator_evals));
+  EXPECT_GE(diagnostics->num_hess_vecs, diagnostics->num_jacobian_operator_evals);
   EXPECT_EQ(diagnostics->num_diagonal_assembles, 0u);
   EXPECT_TRUE(eq_solver.nonlinearSolver().GetConverged());
   EXPECT_LT(x_computed.DistanceTo(x_exact.GetData()), 1.0e-10);
diff --git a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
new file mode 100644
index 0000000000..6e52393681
--- /dev/null
+++ b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
@@ -0,0 +1,500 @@
+// Copyright (c) Lawrence Livermore National Security, LLC and
+// other Smith Project Developers. See the top-level LICENSE file for
+// details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+
+#include <cmath>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "mfem.hpp"
+
+#include "smith/infrastructure/application_manager.hpp"
+#include "smith/numerics/trust_region_solver.hpp"
+
+namespace {
+
+constexpr int test_size = 5;
+constexpr double test_delta = 1.0e-3;
+
+std::vector<mfem::Vector> applyDiagonalOperator(const mfem::Vector& diag, const std::vector<const mfem::Vector*>& states)
+{
+  std::vector<mfem::Vector> out;
+  out.reserve(states.size());
+  for (const auto* state : states) {
+    out.emplace_back(state->Size());
+    for (int i = 0; i < state->Size(); ++i) {
+      out.back()[i] = diag[i] * (*state)[i];
+    }
+  }
+  return out;
+}
+
+void expectNearVector(const mfem::Vector& a, const mfem::Vector& b, double tol)
+{
+  ASSERT_EQ(a.Size(), b.Size());
+  for (int i = 0; i < a.Size(); ++i) {
+    EXPECT_NEAR(a[i], b[i], tol);
+  }
+}
+
+std::vector<const mfem::Vector*> toPointers(const std::vector<mfem::Vector>& vectors)
+{
+  std::vector<const mfem::Vector*> ptrs;
+  ptrs.reserve(vectors.size());
+  for (const auto& v : vectors) {
+    ptrs.push_back(&v);
+  }
+  return ptrs;
+}
+
+struct DiagonalSubspaceFixture {
+  DiagonalSubspaceFixture(int size)
+      : u1(size),
+        u2(size),
+        u3(size),
+        diag(size),
+        b(size)
+  {
+    u1 = 1.0;
+    for (int i = 0; i < size; ++i) {
+      u2[i] = i + 2.0;
+      u3[i] = i * i - 15.0;
+      diag[i] = 2.0 * i + 0.01 * i * i + 1.25;
+      b[i] = -i + 0.02 * i * i + 0.1;
+    }
+  }
+
+  mfem::Vector u1;
+  mfem::Vector u2;
+  mfem::Vector u3;
+  mfem::Vector diag;
+  mfem::Vector b;
+};
+
+}  // namespace
+
+TEST(TrustRegionSubspaceMfem, RemoveDependentDirectionsDropsDuplicatesAndZero)
+{
+  mfem::Vector d1(4);
+  mfem::Vector d2(4);
+  mfem::Vector d3(4);
+  mfem::Vector hd1(4);
+  mfem::Vector hd2(4);
+  mfem::Vector hd3(4);
+
+  d1 = 0.0;
+  d2 = 0.0;
+  d3 = 0.0;
+  hd1 = 0.0;
+  hd2 = 0.0;
+  hd3 = 0.0;
+
+  d1[0] = 1.0;
+  d1[1] = 2.0;
+  d2 = d1;
+  d2 *= 3.0;
+
+  hd1[0] = 2.0;
+  hd1[1] = 5.0;
+  hd2 = hd1;
+  hd2 *= 3.0;
+
+  std::vector<const mfem::Vector*> dirs = {&d1, &d2, &d3};
+  std::vector<const mfem::Vector*> hdirs = {&hd1, &hd2, &hd3};
+
+  auto [dirs_new, hdirs_new] = smith::removeDependentDirections(dirs, hdirs);
+
+  ASSERT_EQ(dirs_new.size(), 1);
+  ASSERT_EQ(hdirs_new.size(), 1);
+  expectNearVector(*dirs_new[0], d1, 0.0);
+  expectNearVector(*hdirs_new[0], hd1, 0.0);
+}
+
+TEST(TrustRegionSubspaceMfem, RemoveDependentDirectionTriplesKeepsHistoryAligned)
+{
+  mfem::Vector d1(3);
+  mfem::Vector d2(3);
+  mfem::Vector d3(3);
+  mfem::Vector hd1(3);
+  mfem::Vector hd2(3);
+  mfem::Vector hd3(3);
+  mfem::Vector old_hd1(3);
+  mfem::Vector old_hd2(3);
+  mfem::Vector old_hd3(3);
+
+  d1 = 0.0;
+  d2 = 0.0;
+  d3 = 0.0;
+  hd1 = 0.0;
+  hd2 = 0.0;
+  hd3 = 0.0;
+  old_hd1 = 0.0;
+  old_hd2 = 0.0;
+  old_hd3 = 0.0;
+
+  d1[0] = 1.0;
+  d2 = d1;
+  d2 *= 2.0;
+  d3[2] = 1.0;
+  hd1[0] = 3.0;
+  hd2[0] = 6.0;
+  hd3[2] = 4.0;
+  old_hd1[0] = 2.0;
+  old_hd2[0] = 4.0;
+  old_hd3[2] = 5.0;
+
+  std::vector<const mfem::Vector*> dirs = {&d1, &d2, &d3};
+  std::vector<const mfem::Vector*> hdirs = {&hd1, &hd2, &hd3};
+  std::vector<const mfem::Vector*> old_hdirs = {&old_hd1, &old_hd2, &old_hd3};
+
+  auto [dirs_new, hdirs_new, old_hdirs_new] = smith::removeDependentDirectionTriples(dirs, hdirs, old_hdirs);
+
+  ASSERT_EQ(dirs_new.size(), 2);
+  expectNearVector(*dirs_new[0], d1, 0.0);
+  expectNearVector(*hdirs_new[0], hd1, 0.0);
+  expectNearVector(*old_hdirs_new[0], old_hd1, 0.0);
+  expectNearVector(*dirs_new[1], d3, 0.0);
+  expectNearVector(*hdirs_new[1], hd3, 0.0);
+  expectNearVector(*old_hdirs_new[1], old_hd3, 0.0);
+}
+
+TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary)
+{
+  DiagonalSubspaceFixture fixture(test_size);
+
+  const std::vector<const mfem::Vector*> states = {&fixture.u1, &fixture.u2, &fixture.u3};
+  const auto astates = applyDiagonalOperator(fixture.diag, states);
+  const auto astate_ptrs = toPointers(astates);
+
+  auto [sol, leftvecs, leftvals, energy] =
+      smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 1);
+
+  EXPECT_NEAR(sol.Norml2(), test_delta, 1.0e-12);
+  EXPECT_FALSE(leftvecs.empty());
+  EXPECT_EQ(leftvals.size(), 1);
+  EXPECT_LT(energy, 0.0);
+}
+
+TEST(TrustRegionSubspaceMfem, GenericSolveUsesMfemBackend)
+{
+  DiagonalSubspaceFixture fixture(test_size);
+
+  const std::vector<const mfem::Vector*> states = {&fixture.u1, &fixture.u2, &fixture.u3, &fixture.u2};
+  const auto astates = applyDiagonalOperator(fixture.diag, states);
+  const auto astate_ptrs = toPointers(astates);
+
+  auto [generic_sol, generic_leftvecs, generic_leftvals, generic_energy] =
+      smith::solveSubspaceProblem(states, astate_ptrs, fixture.b, test_delta, 2);
+  auto [mfem_sol, mfem_leftvecs, mfem_leftvals, mfem_energy] =
+      smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 2);
+
+  expectNearVector(generic_sol, mfem_sol, 1.0e-12);
+  ASSERT_EQ(generic_leftvecs.size(), mfem_leftvecs.size());
+  ASSERT_EQ(generic_leftvals.size(), mfem_leftvals.size());
+  for (size_t i = 0; i < generic_leftvecs.size(); ++i) {
+    const double same = smith::innerProduct(*generic_leftvecs[i], *mfem_leftvecs[i], MPI_COMM_WORLD);
+    mfem::Vector neg(*mfem_leftvecs[i]);
+    neg *= -1.0;
+    const double flipped = smith::innerProduct(*generic_leftvecs[i], neg, MPI_COMM_WORLD);
+    if (std::abs(flipped) > std::abs(same)) {
+      expectNearVector(*generic_leftvecs[i], neg, 1.0e-10);
+    } else {
+      expectNearVector(*generic_leftvecs[i], *mfem_leftvecs[i], 1.0e-10);
+    }
+    EXPECT_NEAR(generic_leftvals[i], mfem_leftvals[i], 1.0e-12);
+  }
+  EXPECT_NEAR(generic_energy, mfem_energy, 1.0e-12);
+}
+
+TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection)
+{
+  mfem::Vector u1(4);
+  mfem::Vector u2(4);
+  mfem::Vector zero(4);
+  mfem::Vector diag(4);
+  mfem::Vector b(4);
+
+  zero = 0.0;
+  for (int i = 0; i < 4; ++i) {
+    u1[i] = 1.0 + i;
+    u2[i] = 0.25 * i - 0.5;
+    diag[i] = 1.0 + i;
+    b[i] = 0.5 - 0.1 * i;
+  }
+
+  const std::vector<const mfem::Vector*> states = {&u1, &zero, &u2};
+  const auto astates = applyDiagonalOperator(diag, states);
+  const auto astate_ptrs = toPointers(astates);
+
+  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemMfem(states, astate_ptrs, b, 0.25, 1);
+
+  EXPECT_LE(sol.Norml2(), 0.25 + 1.0e-12);
+  EXPECT_FALSE(leftvecs.empty());
+  EXPECT_EQ(leftvals.size(), 1);
+  EXPECT_LT(energy, 0.0);
+}
+
+TEST(TrustRegionCubicSubspaceMfem, ZeroCubicMatchesInteriorQuadraticSolve)
+{
+  mfem::DenseMatrix A(2);
+  A = 0.0;
+  A(0, 0) = 4.0;
+  A(1, 1) = 2.0;
+
+  mfem::Vector b(2);
+  b[0] = 2.0;
+  b[1] = -1.0;
+
+  std::vector<mfem::DenseMatrix> cubic(2, mfem::DenseMatrix(2));
+  for (auto& matrix : cubic) {
+    matrix = 0.0;
+  }
+
+  auto [x, energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 10.0);
+
+  EXPECT_NEAR(x[0], 0.5, 1.0e-10);
+  EXPECT_NEAR(x[1], -0.5, 1.0e-10);
+  EXPECT_NEAR(energy, -0.75, 1.0e-10);
+}
+
+TEST(TrustRegionCubicSubspaceMfem, CubicTermChangesOneDimensionalMinimizer)
+{
+  mfem::DenseMatrix A(1);
+  A(0, 0) = 1.0;
+
+  mfem::Vector b(1);
+  b[0] = 1.0;
+
+  std::vector<mfem::DenseMatrix> cubic(1, mfem::DenseMatrix(1));
+  cubic[0](0, 0) = 6.0;
+
+  auto [x, energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 1.0);
+
+  const double expected = (-1.0 + std::sqrt(13.0)) / 6.0;
+  EXPECT_NEAR(x[0], expected, 2.0e-3);
+  EXPECT_NEAR(energy, 0.5 * expected * expected - expected + expected * expected * expected, 5.0e-6);
+}
+
+TEST(TrustRegionCubicSubspaceMfem, RespectsTrustRegionBoundary)
+{
+  mfem::DenseMatrix A(1);
+  A(0, 0) = 1.0;
+
+  mfem::Vector b(1);
+  b[0] = 10.0;
+
+  std::vector<mfem::DenseMatrix> cubic(1, mfem::DenseMatrix(1));
+  cubic[0] = 0.0;
+
+  auto [x, energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 0.25);
+
+  EXPECT_NEAR(x.Norml2(), 0.25, 1.0e-12);
+  EXPECT_NEAR(x[0], 0.25, 1.0e-12);
+  EXPECT_NEAR(energy, 0.5 * 0.25 * 0.25 - 10.0 * 0.25, 1.0e-12);
+}
+
+TEST(TrustRegionCubicSubspaceMfem, HistoryProjectedSubspaceSolveRuns)
+{
+  mfem::Vector e1(2);
+  mfem::Vector e2(2);
+  e1 = 0.0;
+  e2 = 0.0;
+  e1[0] = 1.0;
+  e2[1] = 1.0;
+
+  mfem::Vector h1(2);
+  mfem::Vector h2(2);
+  mfem::Vector old_h1(2);
+  mfem::Vector old_h2(2);
+  h1 = 0.0;
+  h2 = 0.0;
+  old_h1 = 0.0;
+  old_h2 = 0.0;
+  h1[0] = 2.0;
+  h2[1] = 3.0;
+  old_h1[0] = 1.0;
+  old_h2[1] = 3.0;
+
+  mfem::Vector previous_step(2);
+  previous_step = 0.0;
+  previous_step[0] = 1.0;
+
+  mfem::Vector b(2);
+  b[0] = 1.0;
+  b[1] = 0.25;
+
+  std::vector<const mfem::Vector*> directions = {&e1, &e2};
+  std::vector<const mfem::Vector*> h_directions = {&h1, &h2};
+  std::vector<const mfem::Vector*> old_h_directions = {&old_h1, &old_h2};
+
+  auto [x, leftvecs, leftvals, energy] =
+      smith::solveCubicSubspaceProblemMfem(directions, h_directions, old_h_directions, previous_step, b, 0.5, 1);
+
+  EXPECT_LE(x.Norml2(), 0.5 + 1.0e-12);
+  EXPECT_FALSE(leftvecs.empty());
+  EXPECT_EQ(leftvals.size(), 1);
+  EXPECT_LT(energy, 0.0);
+}
+
+TEST(TrustRegionCubicSubspaceMfem, FallsBackToQuadraticWhenCubicPredictionDoesNotImprove)
+{
+  mfem::Vector e1(1);
+  mfem::Vector h1(1);
+  mfem::Vector old_h1(1);
+  mfem::Vector previous_step(1);
+  mfem::Vector b(1);
+
+  e1[0] = 1.0;
+  h1[0] = 1.0;
+  old_h1[0] = 1.0;
+  previous_step[0] = 1.0;
+  b[0] = 1.0;
+
+  std::vector<const mfem::Vector*> directions = {&e1};
+  std::vector<const mfem::Vector*> h_directions = {&h1};
+  std::vector<const mfem::Vector*> old_h_directions = {&old_h1};
+
+  auto [cubic_x, cubic_leftvecs, cubic_leftvals, cubic_energy] =
+      smith::solveCubicSubspaceProblemMfem(directions, h_directions, old_h_directions, previous_step, b, 1.0, 1);
+  auto [quadratic_x, quadratic_leftvecs, quadratic_leftvals, quadratic_energy] =
+      smith::solveSubspaceProblemMfem(directions, h_directions, b, 1.0, 1);
+
+  expectNearVector(cubic_x, quadratic_x, 1.0e-12);
+  EXPECT_EQ(cubic_leftvecs.size(), quadratic_leftvecs.size());
+  EXPECT_EQ(cubic_leftvals.size(), quadratic_leftvals.size());
+  EXPECT_NEAR(cubic_energy, quadratic_energy, 1.0e-12);
+}
+
+TEST(TrustRegionCubicSubspaceMfem, PreviousStepSecantIsExactForCompatibleCubic)
+{
+  mfem::Vector e1(2);
+  mfem::Vector e2(2);
+  e1 = 0.0;
+  e2 = 0.0;
+  e1[0] = 1.0;
+  e2[1] = 1.0;
+
+  mfem::Vector h1(2);
+  mfem::Vector h2(2);
+  mfem::Vector old_h1(2);
+  mfem::Vector old_h2(2);
+  h1 = 0.0;
+  h2 = 0.0;
+  old_h1 = 0.0;
+  old_h2 = 0.0;
+  h1[0] = 1.0;
+  h2[1] = 1.0;
+  old_h1[0] = 7.0;
+  old_h2[1] = 1.0;
+
+  mfem::Vector previous_step(2);
+  previous_step = 0.0;
+  previous_step[0] = 1.0;
+
+  mfem::Vector b(2);
+  b = 0.0;
+  b[0] = 0.1;
+
+  std::vector<const mfem::Vector*> directions = {&e1, &e2};
+  std::vector<const mfem::Vector*> h_directions = {&h1, &h2};
+  std::vector<const mfem::Vector*> old_h_directions = {&old_h1, &old_h2};
+
+  bool used_cubic = false;
+  auto [x, leftvecs, leftvals, energy] =
+      smith::solveCubicSubspaceProblemMfem(directions, h_directions, old_h_directions, previous_step, b, 1.0, 1,
+                                           &used_cubic);
+
+  mfem::DenseMatrix A(2);
+  A = 0.0;
+  A(0, 0) = 1.0;
+  A(1, 1) = 1.0;
+  std::vector<mfem::DenseMatrix> cubic(2, mfem::DenseMatrix(2));
+  cubic[0] = 0.0;
+  cubic[1] = 0.0;
+  cubic[0](0, 0) = -6.0;
+  auto [expected_x, expected_energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 1.0);
+
+  EXPECT_TRUE(used_cubic);
+  expectNearVector(x, expected_x, 1.0e-12);
+  EXPECT_NEAR(energy, expected_energy, 1.0e-12);
+  EXPECT_FALSE(leftvecs.empty());
+  EXPECT_EQ(leftvals.size(), 1);
+}
+
+TEST(TrustRegionCubicSubspaceMfem, PreviousStepSecantIsExactForRotatedCompatibleCubic)
+{
+  mfem::Vector e1(2);
+  mfem::Vector e2(2);
+  e1 = 0.0;
+  e2 = 0.0;
+  e1[0] = 1.0;
+  e2[1] = 1.0;
+
+  constexpr double lambda = -6.0;
+  mfem::Vector previous_step(2);
+  previous_step[0] = 1.0;
+  previous_step[1] = 1.0;
+  mfem::Vector u(previous_step);
+  u /= u.Norml2();
+
+  mfem::DenseMatrix delta_h(2);
+  delta_h = 0.0;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 2; ++j) {
+      delta_h(i, j) = lambda * previous_step.Norml2() * u[i] * u[j];
+    }
+  }
+
+  mfem::Vector h1(e1);
+  mfem::Vector h2(e2);
+  mfem::Vector old_h1(e1);
+  mfem::Vector old_h2(e2);
+  for (int i = 0; i < 2; ++i) {
+    old_h1[i] -= delta_h(i, 0);
+    old_h2[i] -= delta_h(i, 1);
+  }
+
+  mfem::Vector b(2);
+  b[0] = 0.1 * u[0];
+  b[1] = 0.1 * u[1];
+
+  std::vector<const mfem::Vector*> directions = {&e1, &e2};
+  std::vector<const mfem::Vector*> h_directions = {&h1, &h2};
+  std::vector<const mfem::Vector*> old_h_directions = {&old_h1, &old_h2};
+
+  bool used_cubic = false;
+  auto [x, leftvecs, leftvals, energy] =
+      smith::solveCubicSubspaceProblemMfem(directions, h_directions, old_h_directions, previous_step, b, 1.0, 1,
+                                           &used_cubic);
+
+  mfem::DenseMatrix A(2);
+  A = 0.0;
+  A(0, 0) = 1.0;
+  A(1, 1) = 1.0;
+  std::vector<mfem::DenseMatrix> cubic(2, mfem::DenseMatrix(2));
+  cubic[0] = 0.0;
+  cubic[1] = 0.0;
+  for (int k = 0; k < 2; ++k) {
+    for (int i = 0; i < 2; ++i) {
+      for (int j = 0; j < 2; ++j) {
+        cubic[size_t(k)](i, j) = lambda * u[k] * u[i] * u[j];
+      }
+    }
+  }
+  auto [expected_x, expected_energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 1.0);
+
+  EXPECT_TRUE(used_cubic);
+  expectNearVector(x, expected_x, 1.0e-12);
+  EXPECT_NEAR(energy, expected_energy, 1.0e-12);
+  EXPECT_FALSE(leftvecs.empty());
+  EXPECT_EQ(leftvals.size(), 1);
+}
+
+int main(int argc, char* argv[])
+{
+  ::testing::InitGoogleTest(&argc, argv);
+  smith::ApplicationManager applicationManager(argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/smith/numerics/tests/test_trust_region_solver.cpp b/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp
similarity index 62%
rename from src/smith/numerics/tests/test_trust_region_solver.cpp
rename to src/smith/numerics/tests/test_trust_region_solver_petsc.cpp
index af030fc4c3..1e3eae5433 100644
--- a/src/smith/numerics/tests/test_trust_region_solver.cpp
+++ b/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp
@@ -5,6 +5,7 @@
 // SPDX-License-Identifier: (BSD-3-Clause)
 
 #include <cmath>
+#include <stdexcept>
 #include <string>
 #include <utility>
 #include <vector>
@@ -84,12 +85,9 @@ std::vector<mfem::Vector> applyLinearOperator(const Mat& A, const std::vector<co
   return Astates;
 }
 
-// auto createDiagonalTestMatrix(smith::FiniteElementState& x)
 auto createDiagonalTestMatrix(mfem::Vector& x)
 {
   const int local_rows = x.Size();
-  mfem::Vector one = x;
-  one = 1.0;
   const int global_rows = smith::globalSize(x, PETSC_COMM_WORLD);
 
   Vec b;
@@ -117,14 +115,21 @@ auto createDiagonalTestMatrix(mfem::Vector& x)
   return A;
 }
 
-TEST_F(MeshFixture, QR)
+void expectNearVector(const mfem::Vector& a, const mfem::Vector& b, double tol)
+{
+  ASSERT_EQ(a.Size(), b.Size());
+  for (int i = 0; i < a.Size(); ++i) {
+    EXPECT_NEAR(a[i], b[i], tol);
+  }
+}
+
+TEST_F(MeshFixture, PetscSubspaceSolveHitsTrustRegionBoundary)
 {
   SMITH_MARK_FUNCTION;
 
   auto u1 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u1", MESHTAG);
   auto u2 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u2", MESHTAG);
   auto u3 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u3", MESHTAG);
-  auto u4 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u4", MESHTAG);
   auto a = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "a", MESHTAG);
   auto b = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "b", MESHTAG);
 
@@ -132,11 +137,10 @@ TEST_F(MeshFixture, QR)
   for (int i = 0; i < u2.Size(); ++i) {
     u2[i] = i + 2;
     u3[i] = i * i - 15.0;
-    u4[i] = -i + 0.1 * i * i * i - 1.0;
     a[i] = 2 * i + 0.01 * i * i + 1.25;
     b[i] = -i + 0.02 * i * i + 0.1;
   }
-  std::vector<const mfem::Vector*> states = {&u1, &u2, &u3};  //,u4};
+  std::vector<const mfem::Vector*> states = {&u1, &u2, &u3};
 
   auto A_parallel = createDiagonalTestMatrix(a);
   std::vector<mfem::Vector> Astates = applyLinearOperator(A_parallel, states);
@@ -147,12 +151,64 @@ TEST_F(MeshFixture, QR)
   }
 
   double delta = 0.001;
-  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblem(states, AstatePtrs, b, delta, 1);
+  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, delta, 1);
 
-  smith::FiniteElementState smith_sol(b);
-  smith_sol = sol;
+  EXPECT_NEAR(sol.Norml2(), delta, 1e-12);
+  EXPECT_FALSE(leftvecs.empty());
+  EXPECT_EQ(leftvals.size(), 1);
+  EXPECT_LT(energy, 0.0);
 
-  EXPECT_NEAR(std::sqrt(smith::innerProduct(smith_sol, smith_sol)), delta, 1e-12);
+  MatDestroy(&A_parallel);
+}
+
+TEST_F(MeshFixture, MfemSubspaceSolveMatchesPetsc)
+{
+  SMITH_MARK_FUNCTION;
+
+  auto u1 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u1", MESHTAG);
+  auto u2 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u2", MESHTAG);
+  auto u3 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u3", MESHTAG);
+  auto a = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "a", MESHTAG);
+  auto b = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "b", MESHTAG);
+
+  u1 = 1.0;
+  for (int i = 0; i < u2.Size(); ++i) {
+    u2[i] = i + 2;
+    u3[i] = i * i - 15.0;
+    a[i] = 2 * i + 0.01 * i * i + 1.25;
+    b[i] = -i + 0.02 * i * i + 0.1;
+  }
+
+  std::vector<const mfem::Vector*> states = {&u1, &u2, &u3, &u2};
+  auto A_parallel = createDiagonalTestMatrix(a);
+  std::vector<mfem::Vector> Astates = applyLinearOperator(A_parallel, states);
+
+  std::vector<const mfem::Vector*> AstatePtrs;
+  for (size_t i = 0; i < Astates.size(); ++i) {
+    AstatePtrs.push_back(&Astates[i]);
+  }
+
+  auto [petsc_sol, petsc_leftvecs, petsc_leftvals, petsc_energy] =
+      smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, 0.001, 2);
+  auto [mfem_sol, mfem_leftvecs, mfem_leftvals, mfem_energy] =
+      smith::solveSubspaceProblemMfem(states, AstatePtrs, b, 0.001, 2);
+
+  expectNearVector(mfem_sol, petsc_sol, 1e-10);
+  ASSERT_EQ(mfem_leftvecs.size(), petsc_leftvecs.size());
+  ASSERT_EQ(mfem_leftvals.size(), petsc_leftvals.size());
+  for (size_t i = 0; i < mfem_leftvecs.size(); ++i) {
+    const double same = smith::innerProduct(*mfem_leftvecs[i], *petsc_leftvecs[i], MPI_COMM_WORLD);
+    mfem::Vector neg(*petsc_leftvecs[i]);
+    neg *= -1.0;
+    const double flipped = smith::innerProduct(*mfem_leftvecs[i], neg, MPI_COMM_WORLD);
+    if (std::abs(flipped) > std::abs(same)) {
+      expectNearVector(*mfem_leftvecs[i], neg, 1e-9);
+    } else {
+      expectNearVector(*mfem_leftvecs[i], *petsc_leftvecs[i], 1e-9);
+    }
+    EXPECT_NEAR(mfem_leftvals[i], petsc_leftvals[i], 1e-10);
+  }
+  EXPECT_NEAR(mfem_energy, petsc_energy, 1e-12);
 
   MatDestroy(&A_parallel);
 }
diff --git a/src/smith/numerics/trust_region_cubic_subspace.cpp b/src/smith/numerics/trust_region_cubic_subspace.cpp
new file mode 100644
index 0000000000..2bbc86b16c
--- /dev/null
+++ b/src/smith/numerics/trust_region_cubic_subspace.cpp
@@ -0,0 +1,461 @@
+// Copyright (c) Lawrence Livermore National Security, LLC and
+// other Smith Project Developers. See the top-level LICENSE file for
+// details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+
+#include "smith/numerics/trust_region_solver.hpp"
+
+#include <cmath>
+
+#include "smith/infrastructure/profiling.hpp"
+
+namespace smith {
+
+#ifdef MFEM_USE_LAPACK
+
+namespace {
+
+double dot(const mfem::Vector& a, const mfem::Vector& b)
+{
+  return a * b;
+}
+
+void symmetrize(mfem::DenseMatrix& A)
+{
+  MFEM_VERIFY(A.Height() == A.Width(), "symmetrize requires square matrix.");
+  for (int i = 0; i < A.Height(); ++i) {
+    for (int j = 0; j < i; ++j) {
+      const double value = 0.5 * (A(i, j) + A(j, i));
+      A(i, j) = value;
+      A(j, i) = value;
+    }
+  }
+}
+
+mfem::Vector matrixColumn(const mfem::DenseMatrix& A, int j)
+{
+  mfem::Vector col(A.Height());
+  for (int i = 0; i < A.Height(); ++i) {
+    col[i] = A(i, j);
+  }
+  return col;
+}
+
+mfem::DenseMatrix columnsToMatrix(const std::vector<mfem::Vector>& cols)
+{
+  mfem::DenseMatrix A(cols.empty() ? 0 : cols[0].Size(), static_cast<int>(cols.size()));
+  for (int j = 0; j < A.Width(); ++j) {
+    for (int i = 0; i < A.Height(); ++i) {
+      A(i, j) = cols[size_t(j)][i];
+    }
+  }
+  return A;
+}
+
+mfem::DenseMatrix denseDot(const std::vector<const mfem::Vector*>& s, const std::vector<const mfem::Vector*>& As)
+{
+  MFEM_VERIFY(s.size() == As.size(), "Dense dot requires matching direction counts.");
+  mfem::DenseMatrix result(static_cast<int>(s.size()));
+  for (int i = 0; i < result.Height(); ++i) {
+    for (int j = 0; j < result.Width(); ++j) {
+      result(i, j) = innerProduct(*s[size_t(i)], *As[size_t(j)], MPI_COMM_WORLD);
+    }
+  }
+  return result;
+}
+
+mfem::Vector denseDot(const std::vector<const mfem::Vector*>& s, const mfem::Vector& b)
+{
+  mfem::Vector result(static_cast<int>(s.size()));
+  for (int i = 0; i < result.Size(); ++i) {
+    result[i] = innerProduct(*s[size_t(i)], b, MPI_COMM_WORLD);
+  }
+  return result;
+}
+
+mfem::DenseMatrix orthonormalBasisTransform(const mfem::DenseMatrix& gram)
+{
+  mfem::DenseMatrix gram_copy(gram);
+  mfem::Vector evals;
+  mfem::DenseMatrix evecs;
+  gram_copy.Eigensystem(evals, evecs);
+
+  double trace_mag = 0.0;
+  for (int i = 0; i < evals.Size(); ++i) {
+    trace_mag += std::abs(evals[i]);
+  }
+
+  std::vector<mfem::Vector> kept_columns;
+  for (int i = 0; i < evals.Size(); ++i) {
+    if (evals[i] > 1e-9 * trace_mag) {
+      mfem::Vector col = matrixColumn(evecs, i);
+      col /= std::sqrt(evals[i]);
+      kept_columns.emplace_back(std::move(col));
+    }
+  }
+
+  return columnsToMatrix(kept_columns);
+}
+
+mfem::DenseMatrix tripleProduct(const mfem::DenseMatrix& L, const mfem::DenseMatrix& A, const mfem::DenseMatrix& R)
+{
+  mfem::DenseMatrix tmp(A.Height(), R.Width());
+  mfem::Mult(A, R, tmp);
+  mfem::DenseMatrix out(L.Width(), R.Width());
+  mfem::MultAtB(L, tmp, out);
+  return out;
+}
+
+mfem::Vector projectWithTranspose(const mfem::DenseMatrix& A, const mfem::Vector& x)
+{
+  mfem::Vector out(A.Width());
+  A.MultTranspose(x, out);
+  return out;
+}
+
+mfem::DenseMatrix orthonormalBasisWithFirstVector(const mfem::Vector& first)
+{
+  const int n = first.Size();
+  mfem::DenseMatrix Q(n);
+  Q = 0.0;
+
+  mfem::Vector q0(first);
+  q0 /= q0.Norml2();
+  for (int i = 0; i < n; ++i) {
+    Q(i, 0) = q0[i];
+  }
+
+  int col = 1;
+  for (int seed = 0; seed < n && col < n; ++seed) {
+    mfem::Vector candidate(n);
+    candidate = 0.0;
+    candidate[seed] = 1.0;
+    for (int j = 0; j < col; ++j) {
+      const mfem::Vector qj = matrixColumn(Q, j);
+      candidate.Add(-dot(candidate, qj), qj);
+    }
+    const double norm = candidate.Norml2();
+    if (norm > 1.0e-12) {
+      candidate /= norm;
+      for (int i = 0; i < n; ++i) {
+        Q(i, col) = candidate[i];
+      }
+      ++col;
+    }
+  }
+
+  MFEM_VERIFY(col == n, "Failed to build orthonormal basis for cubic tensor completion.");
+  return Q;
+}
+
+std::vector<mfem::DenseMatrix> completeSymmetricCubicTensor(const mfem::DenseMatrix& deltaA,
+                                                            const mfem::Vector& previous_step)
+{
+  const int n = previous_step.Size();
+  const double step_norm = previous_step.Norml2();
+  MFEM_VERIFY(step_norm > 0.0, "Cannot complete cubic tensor with zero previous step.");
+
+  const mfem::DenseMatrix Q = orthonormalBasisWithFirstVector(previous_step);
+  mfem::DenseMatrix delta_hat = tripleProduct(Q, deltaA, Q);
+  symmetrize(delta_hat);
+
+  std::vector<mfem::DenseMatrix> tensor_hat(static_cast<size_t>(n), mfem::DenseMatrix(n));
+  for (auto& matrix : tensor_hat) {
+    matrix = 0.0;
+  }
+
+  for (int i = 0; i < n; ++i) {
+    for (int j = 0; j < n; ++j) {
+      const double value = delta_hat(i, j) / step_norm;
+      tensor_hat[0](i, j) = value;
+      tensor_hat[size_t(i)](0, j) = value;
+      tensor_hat[size_t(i)](j, 0) = value;
+    }
+  }
+
+  std::vector<mfem::DenseMatrix> tensor(static_cast<size_t>(n), mfem::DenseMatrix(n));
+  for (auto& matrix : tensor) {
+    matrix = 0.0;
+  }
+
+  for (int a = 0; a < n; ++a) {
+    for (int b = 0; b < n; ++b) {
+      for (int c = 0; c < n; ++c) {
+        double value = 0.0;
+        for (int alpha = 0; alpha < n; ++alpha) {
+          for (int beta = 0; beta < n; ++beta) {
+            for (int gamma = 0; gamma < n; ++gamma) {
+              value += Q(a, alpha) * Q(b, beta) * Q(c, gamma) * tensor_hat[size_t(alpha)](beta, gamma);
+            }
+          }
+        }
+        tensor[size_t(a)](b, c) = value;
+      }
+    }
+  }
+
+  return tensor;
+}
+
+mfem::Vector combineDirections(const std::vector<const mfem::Vector*>& states, const mfem::Vector& coeffs)
+{
+  mfem::Vector out(*states[0]);
+  out = 0.0;
+  for (int i = 0; i < coeffs.Size(); ++i) {
+    out.Add(coeffs[i], *states[size_t(i)]);
+  }
+  return out;
+}
+
+void verifyCubicInputs(const mfem::DenseMatrix& A, const mfem::Vector& b, const std::vector<mfem::DenseMatrix>& cubic,
+                       double delta)
+{
+  MFEM_VERIFY(A.Height() == A.Width(), "Dense cubic trust-region matrix must be square.");
+  MFEM_VERIFY(A.Height() == b.Size(), "Dense cubic trust-region linear term has incompatible size.");
+  MFEM_VERIFY(delta >= 0.0, "Dense cubic trust-region radius must be nonnegative.");
+  MFEM_VERIFY(static_cast<int>(cubic.size()) == b.Size(), "Dense cubic tensor must have one matrix per dimension.");
+  for (const auto& matrix : cubic) {
+    MFEM_VERIFY(matrix.Height() == b.Size() && matrix.Width() == b.Size(),
+                "Dense cubic tensor matrix has incompatible size.");
+  }
+}
+
+double cubicEnergy(const mfem::DenseMatrix& A, const mfem::Vector& b, const std::vector<mfem::DenseMatrix>& cubic,
+                   const mfem::Vector& x)
+{
+  mfem::Vector Ax(x.Size());
+  A.Mult(x, Ax);
+  double energy = 0.5 * dot(x, Ax) - dot(x, b);
+  for (int k = 0; k < x.Size(); ++k) {
+    cubic[size_t(k)].Mult(x, Ax);
+    energy += (x[k] * dot(x, Ax)) / 6.0;
+  }
+  return energy;
+}
+
+mfem::Vector cubicGradient(const mfem::DenseMatrix& A, const mfem::Vector& b,
+                           const std::vector<mfem::DenseMatrix>& cubic, const mfem::Vector& x)
+{
+  mfem::Vector grad(x.Size());
+  A.Mult(x, grad);
+  grad -= b;
+
+  mfem::Vector tmp(x.Size());
+  for (int i = 0; i < x.Size(); ++i) {
+    double correction = 0.0;
+    cubic[size_t(i)].Mult(x, tmp);
+    correction += dot(x, tmp);
+    for (int k = 0; k < x.Size(); ++k) {
+      for (int j = 0; j < x.Size(); ++j) {
+        correction += x[k] * (cubic[size_t(k)](i, j) + cubic[size_t(k)](j, i)) * x[j];
+      }
+    }
+    grad[i] += correction / 6.0;
+  }
+
+  return grad;
+}
+
+void projectToBall(mfem::Vector& x, double delta)
+{
+  const double norm = x.Norml2();
+  if (norm > delta && norm > 0.0) {
+    x *= delta / norm;
+  }
+}
+
+mfem::Vector solveQuadraticCandidate(mfem::DenseMatrix A, const mfem::Vector& b, double delta)
+{
+  const int n = b.Size();
+  mfem::DenseMatrix shifted(A);
+  double trace = 0.0;
+  for (int i = 0; i < n; ++i) {
+    trace += std::abs(A(i, i));
+  }
+  const double regularization = std::max(1.0e-14, 1.0e-12 * trace / std::max(n, 1));
+  for (int i = 0; i < n; ++i) {
+    shifted(i, i) += regularization;
+  }
+
+  mfem::DenseMatrixInverse inv(shifted);
+  mfem::Vector x(n);
+  inv.Mult(b, x);
+  projectToBall(x, delta);
+  return x;
+}
+
+mfem::Vector projectedGradientSolve(const mfem::DenseMatrix& A, const mfem::Vector& b,
+                                    const std::vector<mfem::DenseMatrix>& cubic, mfem::Vector x, double delta)
+{
+  double energy = cubicEnergy(A, b, cubic, x);
+  constexpr int max_iters = 200;
+  constexpr double grad_tol = 1.0e-11;
+
+  for (int iter = 0; iter < max_iters; ++iter) {
+    mfem::Vector grad = cubicGradient(A, b, cubic, x);
+    if (grad.Norml2() <= grad_tol * std::max(1.0, b.Norml2())) {
+      break;
+    }
+
+    double step = 0.25;
+    bool accepted = false;
+    for (int ls = 0; ls < 30; ++ls) {
+      mfem::Vector trial(x);
+      trial.Add(-step, grad);
+      projectToBall(trial, delta);
+      const double trial_energy = cubicEnergy(A, b, cubic, trial);
+      if (trial_energy < energy - 1.0e-14) {
+        x = trial;
+        energy = trial_energy;
+        accepted = true;
+        break;
+      }
+      step *= 0.5;
+    }
+    if (!accepted) {
+      break;
+    }
+  }
+
+  return x;
+}
+
+}  // namespace
+
+DenseCubicTrustRegionResult solveDenseCubicTrustRegionProblemMfem(const mfem::DenseMatrix& A, const mfem::Vector& b,
+                                                                  const std::vector<mfem::DenseMatrix>& cubic,
+                                                                  double delta)
+{
+  SMITH_MARK_FUNCTION;
+  verifyCubicInputs(A, b, cubic, delta);
+
+  mfem::Vector best(b.Size());
+  best = 0.0;
+  double best_energy = cubicEnergy(A, b, cubic, best);
+  if (delta == 0.0 || b.Size() == 0) {
+    return std::make_tuple(best, best_energy);
+  }
+
+  std::vector<mfem::Vector> starts;
+  starts.emplace_back(best);
+  starts.emplace_back(solveQuadraticCandidate(A, b, delta));
+
+  mfem::Vector direction(b);
+  if (direction.Norml2() > 0.0) {
+    direction *= delta / direction.Norml2();
+    starts.emplace_back(direction);
+    direction *= -1.0;
+    starts.emplace_back(direction);
+  }
+
+  for (int i = 0; i < b.Size(); ++i) {
+    mfem::Vector axis(b.Size());
+    axis = 0.0;
+    axis[i] = delta;
+    starts.emplace_back(axis);
+    axis[i] = -delta;
+    starts.emplace_back(axis);
+  }
+
+  for (const auto& start : starts) {
+    mfem::Vector candidate = projectedGradientSolve(A, b, cubic, start, delta);
+    const double energy = cubicEnergy(A, b, cubic, candidate);
+    if (energy < best_energy) {
+      best = candidate;
+      best_energy = energy;
+    }
+  }
+
+  return std::make_tuple(best, best_energy);
+}
+
+TrustRegionSubspaceResult solveCubicSubspaceProblemMfem(
+    const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
+    const std::vector<const mfem::Vector*>& previous_A_directions, const mfem::Vector& previous_step,
+    const mfem::Vector& b, double delta, int num_leftmost, bool* used_cubic)
+{
+  SMITH_MARK_FUNCTION;
+  MFEM_VERIFY(directions.size() == A_directions.size(), "Cubic subspace directions and A_directions differ.");
+  MFEM_VERIFY(directions.size() == previous_A_directions.size(),
+              "Cubic subspace directions and previous_A_directions differ.");
+  MFEM_VERIFY(!directions.empty(), "Cubic subspace solve requires at least one direction.");
+
+  mfem::DenseMatrix ss = denseDot(directions, directions);
+  symmetrize(ss);
+  mfem::DenseMatrix T = orthonormalBasisTransform(ss);
+  MFEM_VERIFY(T.Width() > 0, "No independent directions in cubic MFEM subspace solve.");
+
+  mfem::DenseMatrix sAs = denseDot(directions, A_directions);
+  symmetrize(sAs);
+  mfem::DenseMatrix pAp = tripleProduct(T, sAs, T);
+  symmetrize(pAp);
+
+  mfem::DenseMatrix sDeltaA = denseDot(directions, previous_A_directions);
+  sDeltaA *= -1.0;
+  sDeltaA += sAs;
+  symmetrize(sDeltaA);
+  mfem::DenseMatrix pDeltaAp = tripleProduct(T, sDeltaA, T);
+  symmetrize(pDeltaAp);
+
+  mfem::Vector previous_coeffs = denseDot(directions, previous_step);
+  previous_coeffs = projectWithTranspose(T, previous_coeffs);
+  const double previous_norm_squared = dot(previous_coeffs, previous_coeffs);
+
+  std::vector<mfem::DenseMatrix> cubic(size_t(T.Width()), mfem::DenseMatrix(T.Width()));
+  for (auto& matrix : cubic) {
+    matrix = 0.0;
+  }
+  if (previous_norm_squared > 0.0) {
+    cubic = completeSymmetricCubicTensor(pDeltaAp, previous_coeffs);
+  }
+
+  const mfem::Vector sb = denseDot(directions, b);
+  const mfem::Vector pb = projectWithTranspose(T, sb);
+  auto [reduced_x, energy] = solveDenseCubicTrustRegionProblemMfem(pAp, pb, cubic, delta);
+
+  mfem::Vector coeffs(T.Height());
+  T.Mult(reduced_x, coeffs);
+  mfem::Vector sol = combineDirections(directions, coeffs);
+
+  auto [quadratic_sol, leftmosts, leftvals, quadratic_energy] =
+      solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost);
+  (void)quadratic_energy;
+
+  const mfem::Vector quadratic_s_coeffs = denseDot(directions, quadratic_sol);
+  const mfem::Vector quadratic_reduced_x = projectWithTranspose(T, quadratic_s_coeffs);
+  const double quadratic_cubic_energy = cubicEnergy(pAp, pb, cubic, quadratic_reduced_x);
+  if (quadratic_cubic_energy <= energy) {
+    if (used_cubic != nullptr) {
+      *used_cubic = false;
+    }
+    return std::make_tuple(quadratic_sol, leftmosts, leftvals, quadratic_cubic_energy);
+  }
+
+  if (used_cubic != nullptr) {
+    *used_cubic = true;
+  }
+  return std::make_tuple(sol, leftmosts, leftvals, energy);
+}
+
+#else
+
+DenseCubicTrustRegionResult solveDenseCubicTrustRegionProblemMfem(const mfem::DenseMatrix&, const mfem::Vector& b,
+                                                                  const std::vector<mfem::DenseMatrix>&, double)
+{
+  throw PetscException("MFEM dense cubic trust-region solve requires MFEM LAPACK support.");
+  return std::make_tuple(b, 0.0);
+}
+
+TrustRegionSubspaceResult solveCubicSubspaceProblemMfem(const std::vector<const mfem::Vector*>&,
+                                                        const std::vector<const mfem::Vector*>&,
+                                                        const std::vector<const mfem::Vector*>&,
+                                                        const mfem::Vector&, const mfem::Vector& b, double, int, bool*)
+{
+  throw PetscException("MFEM dense cubic trust-region solve requires MFEM LAPACK support.");
+  return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>> {}, std::vector<double> {}, 0.0);
+}
+
+#endif  // MFEM_USE_LAPACK
+
+}  // namespace smith
diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp
index ad4b390f18..f076520f0e 100644
--- a/src/smith/numerics/trust_region_solver.hpp
+++ b/src/smith/numerics/trust_region_solver.hpp
@@ -14,14 +14,13 @@
 
 #include "smith/smith_config.hpp"
 
-#ifdef SMITH_USE_SLEPC
-
 #include <memory>
-#include <optional>
-#include <variant>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
 
-#include "smith/physics/state/finite_element_state.hpp"
-#include "smith/physics/state/finite_element_dual.hpp"
+#include "mfem.hpp"
 
 namespace smith {
 
@@ -38,6 +37,37 @@ class PetscException : public std::exception {
   std::string msg;
 };
 
+enum class TrustRegionSubspaceBackend {
+  Petsc,
+  Mfem
+};
+
+using TrustRegionSubspaceResult =
+    std::tuple<mfem::Vector, std::vector<std::shared_ptr<mfem::Vector>>, std::vector<double>, double>;
+
+struct TrustRegionSubspaceTimings {
+  size_t num_solves = 0;
+  size_t total_input_dim = 0;
+  size_t total_reduced_dim = 0;
+  size_t max_input_dim = 0;
+  size_t max_reduced_dim = 0;
+  double project_A_seconds = 0.0;
+  double project_gram_seconds = 0.0;
+  double project_b_seconds = 0.0;
+  double basis_seconds = 0.0;
+  double reduced_A_seconds = 0.0;
+  double dense_eigensystem_seconds = 0.0;
+  double dense_trust_solve_seconds = 0.0;
+  double reconstruct_solution_seconds = 0.0;
+  double reconstruct_leftmost_seconds = 0.0;
+};
+
+void resetTrustRegionSubspaceTimings();
+
+TrustRegionSubspaceTimings trustRegionSubspaceTimings();
+
+using DenseCubicTrustRegionResult = std::tuple<mfem::Vector, double>;
+
 /// @brief computes the global size of mfem::Vector
 int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm);
 
@@ -46,13 +76,36 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm
 
 /// @brief returns the solution, as well as a list of the N leftmost eigenvectors
 /// and their eigenvalues, and the predicted model energy change
-std::tuple<mfem::Vector, std::vector<std::shared_ptr<mfem::Vector>>, std::vector<double>, double> solveSubspaceProblem(
+TrustRegionSubspaceResult solveSubspaceProblem(
     const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
     const mfem::Vector& b, double delta, int num_leftmost);
 
+#ifdef SMITH_USE_SLEPC
+TrustRegionSubspaceResult solveSubspaceProblemPetsc(
+    const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
+    const mfem::Vector& b, double delta, int num_leftmost);
+#endif
+
+TrustRegionSubspaceResult solveSubspaceProblemMfem(
+    const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
+    const mfem::Vector& b, double delta, int num_leftmost);
+
+/// @brief solves a small dense cubic trust-region model
+///   1/2 x^T A x - b^T x + 1/6 sum_k x_k x^T cubic[k] x, ||x|| <= delta.
+DenseCubicTrustRegionResult solveDenseCubicTrustRegionProblemMfem(
+    const mfem::DenseMatrix& A, const mfem::Vector& b, const std::vector<mfem::DenseMatrix>& cubic, double delta);
+
+TrustRegionSubspaceResult solveCubicSubspaceProblemMfem(
+    const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
+    const std::vector<const mfem::Vector*>& previous_A_directions, const mfem::Vector& previous_step,
+    const mfem::Vector& b, double delta, int num_leftmost, bool* used_cubic = nullptr);
+
 std::pair<std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>> removeDependentDirections(
     std::vector<const mfem::Vector*> directions, std::vector<const mfem::Vector*> A_directions);
 
-}  // namespace smith
+std::tuple<std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>>
+removeDependentDirectionTriples(std::vector<const mfem::Vector*> directions,
+                                std::vector<const mfem::Vector*> A_directions,
+                                std::vector<const mfem::Vector*> previous_A_directions);
 
-#endif  // SMITH_USE_SLEPC
+}  // namespace smith
diff --git a/src/smith/physics/solid_mechanics.hpp b/src/smith/physics/solid_mechanics.hpp
index c635f67f89..504538d4e6 100644
--- a/src/smith/physics/solid_mechanics.hpp
+++ b/src/smith/physics/solid_mechanics.hpp
@@ -12,12 +12,14 @@
 
 #pragma once
 
+#include <chrono>
 #include <cstddef>
 #include <array>
 #include <functional>
 #include <memory>
 #include <optional>
 #include <string>
+#include <type_traits>
 #include <unordered_map>
 #include <utility>
 #include <vector>
@@ -53,6 +55,18 @@
 #include "smith/physics/state/finite_element_vector.hpp"
 
 namespace smith {
+
+struct SolidMechanicsJacobianTimings {
+  size_t legacy_jacobian_evals = 0;
+  size_t jacobian_operator_evals = 0;
+  size_t jacobian_operator_assemblies = 0;
+  double legacy_derivative_seconds = 0.0;
+  double legacy_sparse_assembly_seconds = 0.0;
+  double legacy_essential_elimination_seconds = 0.0;
+  double jacobian_operator_derivative_seconds = 0.0;
+  double jacobian_operator_sparse_assembly_seconds = 0.0;
+  double jacobian_operator_essential_elimination_seconds = 0.0;
+};
 namespace solid_mechanics {
 
 namespace detail {
@@ -1051,12 +1065,23 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
         // gradient of residual function
         [this](const mfem::Vector& u) -> mfem::Operator& {
           SMITH_MARK_FUNCTION;
+          using Clock = std::chrono::steady_clock;
+          auto seconds_since = [](Clock::time_point start) {
+            return std::chrono::duration_cast<std::chrono::duration<double>>(Clock::now() - start).count();
+          };
+          auto derivative_start = Clock::now();
           auto [r, drdu] = (*residual_)(time_, shapeDisplacement(), differentiate_wrt(u), acceleration_,
                                         *parameters_[parameter_indices].state...);
+          jacobian_timings_.legacy_derivative_seconds += seconds_since(derivative_start);
+          ++jacobian_timings_.legacy_jacobian_evals;
           J_.reset();
+          auto assembly_start = Clock::now();
           J_ = assemble(drdu);
+          jacobian_timings_.legacy_sparse_assembly_seconds += seconds_since(assembly_start);
           J_e_.reset();
+          auto elimination_start = Clock::now();
           J_e_ = bcs_.eliminateAllEssentialDofsFromMatrix(*J_);
+          jacobian_timings_.legacy_essential_elimination_seconds += seconds_since(elimination_start);
           return *J_;
         });
   }
@@ -1080,6 +1105,86 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
     }
   }
 
+  /// @brief Build a quasistatic JacobianOperator with essential boundary conditions applied.
+  std::unique_ptr<JacobianOperator> quasistaticJacobianOperator(const mfem::Vector& u) const
+  {
+    SMITH_MARK_FUNCTION;
+
+    using Clock = std::chrono::steady_clock;
+    auto seconds_since = [](Clock::time_point start) {
+      return std::chrono::duration_cast<std::chrono::duration<double>>(Clock::now() - start).count();
+    };
+    auto derivative_start = Clock::now();
+    auto [r, drdu] = (*residual_)(time_, shapeDisplacement(), differentiate_wrt(u), acceleration_,
+                                  *parameters_[parameter_indices].state...);
+    jacobian_timings_.jacobian_operator_derivative_seconds += seconds_since(derivative_start);
+    ++jacobian_timings_.jacobian_operator_evals;
+
+    using GradientT = std::remove_reference_t<decltype(drdu)>;
+
+    class QuasistaticJacobianOperator : public JacobianOperator {
+     public:
+      QuasistaticJacobianOperator(
+          const GradientT& gradient, const mfem::Array<int>& constrained_dofs,
+          std::function<std::unique_ptr<mfem::HypreParMatrix>(mfem::HypreParMatrix&)> eliminate_essential_dofs,
+          SolidMechanicsJacobianTimings& timings)
+          : JacobianOperator(gradient.Height(), gradient.Width()),
+            gradient_(gradient),
+            constrained_dofs_(constrained_dofs),
+            eliminate_essential_dofs_(std::move(eliminate_essential_dofs)),
+            timings_(timings)
+      {
+      }
+
+      void Mult(const mfem::Vector& du, mfem::Vector& dr) const override
+      {
+        mfem::Vector du_interior(du);
+        du_interior.SetSubVector(constrained_dofs_, 0.0);
+
+        gradient_.Mult(du_interior, dr);
+        for (int i = 0; i < constrained_dofs_.Size(); ++i) {
+          const int dof = constrained_dofs_[i];
+          dr[dof] = du[dof];
+        }
+      }
+
+      std::unique_ptr<mfem::HypreParMatrix> assemble() override
+      {
+        using AssemblyClock = std::chrono::steady_clock;
+        auto seconds_since = [](AssemblyClock::time_point start) {
+          return std::chrono::duration_cast<std::chrono::duration<double>>(AssemblyClock::now() - start).count();
+        };
+        auto assembly_start = AssemblyClock::now();
+        std::unique_ptr<mfem::HypreParMatrix> matrix = gradient_.assemble();
+        timings_.jacobian_operator_sparse_assembly_seconds += seconds_since(assembly_start);
+        auto elimination_start = AssemblyClock::now();
+        eliminate_essential_dofs_(*matrix);
+        timings_.jacobian_operator_essential_elimination_seconds += seconds_since(elimination_start);
+        ++timings_.jacobian_operator_assemblies;
+        return matrix;
+      }
+
+      void assembleDiagonal(mfem::Vector& diag) const override
+      {
+        gradient_.assembleDiagonal(diag);
+        for (int i = 0; i < constrained_dofs_.Size(); ++i) {
+          diag[constrained_dofs_[i]] = 1.0;
+        }
+      }
+
+     private:
+      GradientT gradient_;
+      mfem::Array<int> constrained_dofs_;
+      std::function<std::unique_ptr<mfem::HypreParMatrix>(mfem::HypreParMatrix&)> eliminate_essential_dofs_;
+      SolidMechanicsJacobianTimings& timings_;
+    };
+
+    return std::make_unique<QuasistaticJacobianOperator>(
+        drdu, bcs_.allEssentialTrueDofs(),
+        [this](mfem::HypreParMatrix& matrix) { return bcs_.eliminateAllEssentialDofsFromMatrix(matrix); },
+        jacobian_timings_);
+  }
+
   /**
    * @brief Return the assembled stiffness matrix
    *
@@ -1161,6 +1266,7 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
     if (is_quasistatic_) {
       nonlin_solver_->setMatrixFreeTangentAction([this](const mfem::Vector& u, const mfem::Vector& du,
                                                         mfem::Vector& dr) { quasistaticTangentAction(u, du, dr); });
+      nonlin_solver_->setJacobianOperator([this](const mfem::Vector& u) { return quasistaticJacobianOperator(u); });
     }
 
     if (checkpoint_to_disk_) {
@@ -1412,6 +1518,12 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
   /// @overload
   const smith::EquationSolver& equationSolver() const { return *nonlin_solver_; }
 
+  /// @brief Return accumulated Jacobian construction timings for this physics object.
+  const SolidMechanicsJacobianTimings& jacobianTimings() const { return jacobian_timings_; }
+
+  /// @brief Reset accumulated Jacobian construction timings for this physics object.
+  void resetJacobianTimings() const { jacobian_timings_ = {}; }
+
  protected:
   /// The compile-time finite element trial space for displacement and velocity (H1 of order p)
   using trial = H1<order, dim>;
@@ -1480,6 +1592,9 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
   /// because are associated with essential boundary conditions
   std::unique_ptr<mfem::HypreParMatrix> J_e_;
 
+  /// Accumulated timing diagnostics for quasistatic Jacobian construction paths.
+  mutable SolidMechanicsJacobianTimings jacobian_timings_;
+
   /// an intermediate variable used to store the predicted end-step displacement
   mfem::Vector predicted_displacement_;
 
diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp
index b9514b74ed..a94a61bb63 100644
--- a/src/smith/physics/tests/shallow_arch_buckling.cpp
+++ b/src/smith/physics/tests/shallow_arch_buckling.cpp
@@ -26,14 +26,23 @@ namespace smith {
 namespace {
 
 constexpr double length = 10.0;
-constexpr double thickness = 0.25;
+constexpr double thickness = 0.025;
 constexpr double end_tol = 1.0e-8;
 constexpr double top_tol = 1.0e-8;
 std::string solver_name = "TrustRegion";
 int print_level = 2;
 int pcg_block_len = 10;
 double pcg_powell_eta = 0.005;
-int nonlinear_max_iterations = 30000;
+int nonlinear_max_iterations = 300000;
+bool pcg_diagonal_preconditioner = false;
+int trust_subspace_option = static_cast<int>(SubSpaceOptions::NEVER);
+int trust_num_leftmost = 1;
+int trust_num_past_steps = 0;
+int trust_nonmonotone_window = 0;
+bool trust_use_jacobian_operator = false;
+bool trust_use_cubic_subspace = false;
+bool trust_use_solve_start_direction = false;
+bool trust_use_min_residual_direction = false;
 
 NonlinearSolver selectedNonlinearSolver()
 {
@@ -66,6 +75,29 @@ void parseCommandLine(int& argc, char** argv)
       pcg_powell_eta = std::stod(arg.substr(std::string("--pcg-powell-eta=").size()));
     } else if (arg.rfind("--nonlinear-max-iterations=", 0) == 0) {
       nonlinear_max_iterations = std::stoi(arg.substr(std::string("--nonlinear-max-iterations=").size()));
+    } else if (arg.rfind("--pcg-diagonal-preconditioner=", 0) == 0) {
+      const std::string value = arg.substr(std::string("--pcg-diagonal-preconditioner=").size());
+      pcg_diagonal_preconditioner = (value == "1" || value == "true" || value == "on");
+    } else if (arg.rfind("--trust-subspace-option=", 0) == 0) {
+      trust_subspace_option = std::stoi(arg.substr(std::string("--trust-subspace-option=").size()));
+    } else if (arg.rfind("--trust-num-leftmost=", 0) == 0) {
+      trust_num_leftmost = std::stoi(arg.substr(std::string("--trust-num-leftmost=").size()));
+    } else if (arg.rfind("--trust-num-past-steps=", 0) == 0) {
+      trust_num_past_steps = std::stoi(arg.substr(std::string("--trust-num-past-steps=").size()));
+    } else if (arg.rfind("--trust-nonmonotone-window=", 0) == 0) {
+      trust_nonmonotone_window = std::stoi(arg.substr(std::string("--trust-nonmonotone-window=").size()));
+    } else if (arg.rfind("--trust-use-jacobian-operator=", 0) == 0) {
+      const std::string value = arg.substr(std::string("--trust-use-jacobian-operator=").size());
+      trust_use_jacobian_operator = (value == "1" || value == "true" || value == "on");
+    } else if (arg.rfind("--trust-use-cubic-subspace=", 0) == 0) {
+      const std::string value = arg.substr(std::string("--trust-use-cubic-subspace=").size());
+      trust_use_cubic_subspace = (value == "1" || value == "true" || value == "on");
+    } else if (arg.rfind("--trust-use-solve-start-direction=", 0) == 0) {
+      const std::string value = arg.substr(std::string("--trust-use-solve-start-direction=").size());
+      trust_use_solve_start_direction = (value == "1" || value == "true" || value == "on");
+    } else if (arg.rfind("--trust-use-min-residual-direction=", 0) == 0) {
+      const std::string value = arg.substr(std::string("--trust-use-min-residual-direction=").size());
+      trust_use_min_residual_direction = (value == "1" || value == "true" || value == "on");
     } else {
       argv[write_arg] = argv[read_arg];
       ++write_arg;
@@ -79,11 +111,13 @@ void parseCommandLine(int& argc, char** argv)
 TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough)
 {
   MPI_Barrier(MPI_COMM_WORLD);
+  int rank = 0;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 
   constexpr int p = 1;
   constexpr int dim = 2;
-  constexpr int nx = 96;
-  constexpr int ny = 4;
+  constexpr int nx = 150;
+  constexpr int ny = 6;
 
   axom::sidre::DataStore datastore;
   smith::StateManager::initialize(datastore, "shallow_arch_buckling");
@@ -98,9 +132,14 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough)
       "right_end", [](std::vector<vec2> vertices, int) { return average(vertices)[0] > length - end_tol; });
   mesh->addDomainOfBoundaryElements(
       "top_face", [](std::vector<vec2> vertices, int) { return average(vertices)[1] > thickness - top_tol; });
-  EXPECT_GT(mesh->domain("left_end").total_elements(), 0);
-  EXPECT_GT(mesh->domain("right_end").total_elements(), 0);
-  EXPECT_GT(mesh->domain("top_face").total_elements(), 0);
+  auto globalElementCount = [](int local_count) {
+    int global_count = 0;
+    MPI_Allreduce(&local_count, &global_count, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+    return global_count;
+  };
+  EXPECT_GT(globalElementCount(mesh->domain("left_end").total_elements()), 0);
+  EXPECT_GT(globalElementCount(mesh->domain("right_end").total_elements()), 0);
+  EXPECT_GT(globalElementCount(mesh->domain("top_face").total_elements()), 0);
 
   smith::LinearSolverOptions linear_options{.linear_solver = LinearSolver::CG,
                                             .preconditioner = Preconditioner::HypreJacobi,
@@ -109,14 +148,24 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough)
                                             .max_iterations = 10000,
                                             .print_level = 0};
 
-  smith::NonlinearSolverOptions nonlinear_options{.nonlin_solver = selectedNonlinearSolver(),
-                                                  .relative_tol = 1.0e-8,
-                                                  .absolute_tol = 1.0e-10,
-                                                  .max_iterations = nonlinear_max_iterations,
-                                                  .print_level = print_level,
-                                                  .pcg_block_len = pcg_block_len,
-                                                  .pcg_powell_eta = pcg_powell_eta,
-                                                  .pcg_max_block_retries = 40};
+  smith::NonlinearSolverOptions nonlinear_options{
+      .nonlin_solver = selectedNonlinearSolver(),
+      .relative_tol = 1.0e-8,
+      .absolute_tol = 1.0e-10,
+      .max_iterations = nonlinear_max_iterations,
+      .print_level = print_level,
+      .trust_nonmonotone_window = trust_nonmonotone_window,
+      .trust_use_jacobian_operator = trust_use_jacobian_operator,
+      .trust_use_cubic_subspace = trust_use_cubic_subspace,
+      .subspace_option = static_cast<SubSpaceOptions>(trust_subspace_option),
+      .num_leftmost = trust_num_leftmost,
+      .trust_num_past_steps = trust_num_past_steps,
+      .trust_use_solve_start_direction = trust_use_solve_start_direction,
+      .trust_use_min_residual_direction = trust_use_min_residual_direction,
+      .pcg_block_len = pcg_block_len,
+      .pcg_powell_eta = pcg_powell_eta,
+      .pcg_max_block_retries = 40,
+      .pcg_use_jacobian_diagonal_preconditioner = pcg_diagonal_preconditioner};
 
   SolidMechanics<p, dim> solid(nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options,
                                "compressed_beam", mesh);
@@ -144,47 +193,169 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough)
   solid.completeSetup();
   solid.outputStateToDisk("shallow_arch_buckling");
 
-  mfem::out << "Compressed thin beam snap-through run: solver = " << solver_name << '\n';
+  if (rank == 0) {
+    mfem::out << "Compressed thin beam snap-through run: solver = " << solver_name
+              << ", trust_subspace_option = " << trust_subspace_option
+              << ", trust_num_leftmost = " << trust_num_leftmost
+              << ", trust_num_past_steps = " << trust_num_past_steps
+              << ", trust_nonmonotone_window = " << trust_nonmonotone_window
+              << ", trust_use_jacobian_operator = " << trust_use_jacobian_operator
+              << ", trust_use_cubic_subspace = " << trust_use_cubic_subspace
+              << ", pcg_diagonal_preconditioner = " << pcg_diagonal_preconditioner << '\n';
+  }
 
-  constexpr int num_steps = 20;
+  constexpr int num_steps = 5;
   int num_converged_steps = 0;
   for (int step = 0; step < num_steps; ++step) {
+    solid.resetJacobianTimings();
     solid.advanceTimestep(1.0 / num_steps);
     const auto& nonlinear_solver = solid.equationSolver().nonlinearSolver();
     if (nonlinear_solver.GetConverged()) {
       ++num_converged_steps;
     }
-    mfem::out << "Load step " << step + 1 << "/" << num_steps << ": converged = " << nonlinear_solver.GetConverged()
-              << ", nonlinear iterations = " << nonlinear_solver.GetNumIterations()
-              << ", final relative residual = " << nonlinear_solver.GetFinalRelNorm() << '\n';
+    if (rank == 0) {
+      mfem::out << "Load step " << step + 1 << "/" << num_steps
+                << ": converged = " << nonlinear_solver.GetConverged()
+                << ", nonlinear iterations = " << nonlinear_solver.GetNumIterations()
+                << ", final relative residual = " << nonlinear_solver.GetFinalRelNorm() << '\n';
+    }
     solid.outputStateToDisk("shallow_arch_buckling");
-    if (const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics()) {
-      mfem::out << "  PCG diagnostics: residuals = " << diagnostics->num_residuals
-                << ", hess-vecs = " << diagnostics->num_hess_vecs
-                << ", preconditioner applications = " << diagnostics->num_preconds
-                << ", Jacobian assemblies = " << diagnostics->num_jacobian_assembles
-                << ", preconditioner updates = " << diagnostics->num_preconditioner_updates
-                << ", accepted blocks = " << diagnostics->num_blocks
-                << ", accepted steps = " << diagnostics->num_accepted_steps
-                << ", block rejects = " << diagnostics->num_block_rejects
-                << ", prefix accepts = " << diagnostics->num_prefix_accepts
-                << ", momentum resets = " << diagnostics->num_momentum_resets
-                << ", nonzero beta = " << diagnostics->num_nonzero_beta
-                << ", zero beta = " << diagnostics->num_zero_beta
-                << ", Powell restarts = " << diagnostics->num_powell_restarts
-                << ", descent restarts = " << diagnostics->num_descent_restarts
-                << ", negative curvature = " << diagnostics->num_negative_curvature
-                << ", trust capped steps = " << diagnostics->num_trust_capped_steps
-                << ", line-search backtracks = " << diagnostics->num_line_search_backtracks
-                << ", final h_scale = " << diagnostics->final_h_scale
-                << ", last trust ratio = " << diagnostics->last_trust_ratio << '\n';
+    if (rank == 0 && print_level >= 1) {
+      if (const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics()) {
+        mfem::out << "  PCG diagnostics: residuals = " << diagnostics->num_residuals
+                  << ", hess-vecs = " << diagnostics->num_hess_vecs
+                  << ", preconditioner applications = " << diagnostics->num_preconds
+                  << ", Jacobian assemblies = " << diagnostics->num_jacobian_assembles
+                  << ", Jacobian operator evals = " << diagnostics->num_jacobian_operator_evals
+                  << ", diagonal assemblies = " << diagnostics->num_diagonal_assembles
+                  << ", preconditioner updates = " << diagnostics->num_preconditioner_updates
+                  << ", accepted blocks = " << diagnostics->num_blocks
+                  << ", accepted steps = " << diagnostics->num_accepted_steps
+                  << ", block rejects = " << diagnostics->num_block_rejects
+                  << ", prefix accepts = " << diagnostics->num_prefix_accepts
+                  << ", momentum resets = " << diagnostics->num_momentum_resets
+                  << ", nonzero beta = " << diagnostics->num_nonzero_beta
+                  << ", zero beta = " << diagnostics->num_zero_beta
+                  << ", Powell restarts = " << diagnostics->num_powell_restarts
+                  << ", descent restarts = " << diagnostics->num_descent_restarts
+                  << ", negative curvature = " << diagnostics->num_negative_curvature
+                  << ", trust capped steps = " << diagnostics->num_trust_capped_steps
+                  << ", line-search backtracks = " << diagnostics->num_line_search_backtracks
+                  << ", final h_scale = " << diagnostics->final_h_scale
+                  << ", last trust ratio = " << diagnostics->last_trust_ratio << '\n';
+        mfem::out << "  PCG timings: residual = " << diagnostics->residual_seconds
+                  << ", hess-vec = " << diagnostics->hess_vec_seconds
+                  << ", operator hess-vec = " << diagnostics->jacobian_operator_hess_vec_seconds
+                  << ", assembled hess-vec = " << diagnostics->assembled_hess_vec_seconds
+                  << ", matrix-free hess-vec = " << diagnostics->matrix_free_hess_vec_seconds
+                  << ", preconditioner = " << diagnostics->preconditioner_seconds
+                  << ", Jacobian operator eval = " << diagnostics->jacobian_operator_eval_seconds
+                  << ", Jacobian assembly = " << diagnostics->jacobian_assembly_seconds
+                  << ", diagonal assembly = " << diagnostics->diagonal_assembly_seconds
+                  << ", diagonal invert = " << diagnostics->diagonal_invert_seconds
+                  << ", preconditioner update = " << diagnostics->preconditioner_update_seconds
+                  << ", preconditioner setup = " << diagnostics->preconditioner_setup_seconds << '\n';
+      }
+      if (const auto diagnostics = solid.equationSolver().trustRegionDiagnostics()) {
+        const double operator_timed_seconds =
+            diagnostics->residual_seconds + diagnostics->hess_vec_seconds + diagnostics->preconditioner_seconds +
+            diagnostics->jacobian_operator_eval_seconds + diagnostics->diagonal_assembly_seconds +
+            diagnostics->diagonal_invert_seconds + diagnostics->jacobian_assembly_seconds +
+            diagnostics->preconditioner_update_seconds;
+        const double assembled_hess_vec_seconds =
+            diagnostics->hess_vec_seconds - diagnostics->jacobian_operator_hess_vec_seconds;
+        mfem::out << "  TrustRegion diagnostics: residuals = " << diagnostics->num_residuals
+                  << ", hess-vecs = " << diagnostics->num_hess_vecs
+                  << ", model hess-vecs = " << diagnostics->num_model_hess_vecs
+                  << ", cauchy hess-vecs = " << diagnostics->num_cauchy_hess_vecs
+                  << ", line-search hess-vecs = " << diagnostics->num_line_search_hess_vecs
+                  << ", preconditioner applications = " << diagnostics->num_preconds
+                  << ", Jacobian assemblies = " << diagnostics->num_jacobian_assembles
+                  << ", Jacobian operator evals = " << diagnostics->num_jacobian_operator_evals
+                  << ", diagonal assemblies = " << diagnostics->num_diagonal_assembles
+                  << ", CG iterations = " << diagnostics->num_cg_iterations
+                  << ", subspace solves = " << diagnostics->num_subspace_solves
+                  << ", subspace leftmost hess-vecs = " << diagnostics->num_subspace_leftmost_hess_vecs
+                  << ", subspace hess-vec batches = " << diagnostics->num_subspace_hess_vec_batches
+                  << ", subspace batched hess-vecs = " << diagnostics->num_subspace_batched_hess_vecs
+                  << ", subspace past-step vectors = " << diagnostics->num_subspace_past_step_vectors
+                  << ", subspace past-step hess-vecs = " << diagnostics->num_subspace_past_step_hess_vecs
+                  << ", quadratic subspace solves = " << diagnostics->num_quadratic_subspace_solves
+                  << ", cubic subspace attempts = " << diagnostics->num_cubic_subspace_attempts
+                  << ", cubic subspace uses = " << diagnostics->num_cubic_subspace_uses
+                  << ", cubic subspace quadratic fallbacks = " << diagnostics->num_cubic_subspace_quadratic_fallbacks
+                  << ", nonmonotone work accepts = " << diagnostics->num_nonmonotone_work_accepts
+                  << ", monotone work would reject = " << diagnostics->num_monotone_work_would_reject
+                  << ", preconditioner updates = " << diagnostics->num_preconditioner_updates << '\n';
+        mfem::out << "  TrustRegion timings: total = " << diagnostics->total_seconds
+                  << ", operator-timed = " << operator_timed_seconds << ", residual = " << diagnostics->residual_seconds
+                  << ", hess-vec = " << diagnostics->hess_vec_seconds
+                  << ", model hess-vec = " << diagnostics->model_hess_vec_seconds
+                  << ", cauchy hess-vec = " << diagnostics->cauchy_hess_vec_seconds
+                  << ", line-search hess-vec = " << diagnostics->line_search_hess_vec_seconds
+                  << ", operator hess-vec = " << diagnostics->jacobian_operator_hess_vec_seconds
+                  << ", assembled hess-vec = " << assembled_hess_vec_seconds
+                  << ", preconditioner = " << diagnostics->preconditioner_seconds
+                  << ", Jacobian operator eval = " << diagnostics->jacobian_operator_eval_seconds
+                  << ", diagonal assembly = " << diagnostics->diagonal_assembly_seconds
+                  << ", diagonal invert = " << diagnostics->diagonal_invert_seconds
+                  << ", model solve = " << diagnostics->model_solve_seconds
+                  << ", subspace = " << diagnostics->subspace_seconds
+                  << ", subspace leftmost = " << diagnostics->subspace_leftmost_seconds
+                  << ", subspace hess-vec batches = " << diagnostics->subspace_hess_vec_batch_seconds
+                  << ", subspace filter = " << diagnostics->subspace_filter_seconds
+                  << ", subspace backend = " << diagnostics->subspace_backend_seconds
+                  << ", subspace project A = " << diagnostics->subspace_project_A_seconds
+                  << ", subspace project gram = " << diagnostics->subspace_project_gram_seconds
+                  << ", subspace project b = " << diagnostics->subspace_project_b_seconds
+                  << ", subspace basis = " << diagnostics->subspace_basis_seconds
+                  << ", subspace reduced A = " << diagnostics->subspace_reduced_A_seconds
+                  << ", subspace dense eigensystem = " << diagnostics->subspace_dense_eigensystem_seconds
+                  << ", subspace dense trust solve = " << diagnostics->subspace_dense_trust_solve_seconds
+                  << ", subspace reconstruct solution = " << diagnostics->subspace_reconstruct_solution_seconds
+                  << ", subspace reconstruct leftmost = " << diagnostics->subspace_reconstruct_leftmost_seconds
+                  << ", subspace finalize = " << diagnostics->subspace_finalize_seconds
+                  << ", cauchy point = " << diagnostics->cauchy_point_seconds
+                  << ", dogleg = " << diagnostics->dogleg_seconds
+                  << ", line search = " << diagnostics->line_search_seconds << ", dot = " << diagnostics->dot_seconds
+                  << ", dot count = " << diagnostics->num_dot_products
+                  << ", dot reductions = " << diagnostics->num_dot_reductions
+                  << ", model dots = " << diagnostics->num_model_dot_products << " / " << diagnostics->model_dot_seconds
+                  << ", cauchy dots = " << diagnostics->num_cauchy_dot_products << " / "
+                  << diagnostics->cauchy_dot_seconds << ", dogleg dots = " << diagnostics->num_dogleg_dot_products
+                  << " / " << diagnostics->dogleg_dot_seconds
+                  << ", line-search dots = " << diagnostics->num_line_search_dot_products << " / "
+                  << diagnostics->line_search_dot_seconds << ", setup dots = " << diagnostics->num_setup_dot_products
+                  << " / " << diagnostics->setup_dot_seconds
+                  << ", vector update = " << diagnostics->vector_update_seconds
+                  << ", vector copy/scale = " << diagnostics->vector_copy_scale_seconds
+                  << ", projection = " << diagnostics->projection_seconds
+                  << ", Jacobian assembly = " << diagnostics->jacobian_assembly_seconds
+                  << ", preconditioner update = " << diagnostics->preconditioner_update_seconds
+                  << ", preconditioner setup = " << diagnostics->preconditioner_setup_seconds
+                  << ", work objective = " << diagnostics->last_work_objective
+                  << ", nonmonotone work reference = " << diagnostics->last_nonmonotone_work_reference << '\n';
+      }
+      const auto& jacobian_timings = solid.jacobianTimings();
+      mfem::out << "  Solid Jacobian timings: legacy evals = " << jacobian_timings.legacy_jacobian_evals
+                << ", legacy derivative = " << jacobian_timings.legacy_derivative_seconds
+                << ", legacy sparse assembly = " << jacobian_timings.legacy_sparse_assembly_seconds
+                << ", legacy EBC elimination = " << jacobian_timings.legacy_essential_elimination_seconds
+                << ", operator evals = " << jacobian_timings.jacobian_operator_evals
+                << ", operator assemblies = " << jacobian_timings.jacobian_operator_assemblies
+                << ", operator derivative = " << jacobian_timings.jacobian_operator_derivative_seconds
+                << ", operator sparse assembly = " << jacobian_timings.jacobian_operator_sparse_assembly_seconds
+                << ", operator EBC elimination = " << jacobian_timings.jacobian_operator_essential_elimination_seconds
+                << '\n';
     }
     if (!nonlinear_solver.GetConverged()) {
       throw std::runtime_error("Nonlinear solve failed to converge at load step " + std::to_string(step + 1));
     }
   }
 
-  mfem::out << "Converged load steps: " << num_converged_steps << "/" << num_steps << '\n';
+  if (rank == 0) {
+    mfem::out << "Converged load steps: " << num_converged_steps << "/" << num_steps << '\n';
+  }
 }
 
 }  // namespace smith

From f8ff2c0750418f6f596727f25799da2271b14775 Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Fri, 8 May 2026 09:17:05 -0600
Subject: [PATCH 09/27] Temporary cmake adjust.

---
 cmake/thirdparty/FindMFEM.cmake | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/cmake/thirdparty/FindMFEM.cmake b/cmake/thirdparty/FindMFEM.cmake
index cd7d45c8af..bb7d37037d 100644
--- a/cmake/thirdparty/FindMFEM.cmake
+++ b/cmake/thirdparty/FindMFEM.cmake
@@ -107,6 +107,17 @@ else()
         # filter out items containing "Xlinker"
         set(_mfem_tpl_list ${mfem_tpl_lnk_flags})
         separate_arguments(_mfem_tpl_list)
+        foreach(_link_flag ${_mfem_tpl_list})
+            if(_link_flag MATCHES "^-L(.+)")
+                set(_link_dir "${CMAKE_MATCH_1}")
+                if(EXISTS "${_link_dir}/liblapack.dylib" OR EXISTS "${_link_dir}/libblas.dylib")
+                    list(APPEND _mfem_tpl_link_dirs "${_link_dir}")
+                endif()
+            endif()
+        endforeach()
+        if(_mfem_tpl_link_dirs)
+            list(REMOVE_DUPLICATES _mfem_tpl_link_dirs)
+        endif()
         list(FILTER _mfem_tpl_list EXCLUDE REGEX Xlinker)
         # On Apple, -Wl,-rpath,... entries duplicate CMake's own rpath management
         # (CMAKE_INSTALL_RPATH_USE_LINK_PATH) and cause ld "duplicate -rpath" warnings
@@ -138,6 +149,12 @@ else()
         TREAT_INCLUDES_AS_SYSTEM ON
         EXPORTABLE    ON)
 
+    if(APPLE AND _mfem_tpl_link_dirs)
+        foreach(_link_dir ${_mfem_tpl_link_dirs})
+            target_link_options(mfem INTERFACE "LINKER:-rpath,${_link_dir}")
+        endforeach()
+    endif()
+
     install(TARGETS          mfem
         EXPORT               smith-targets
         DESTINATION          lib

From 33dd85b7f79c53946e7db2fb9be1cfada627b24c Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Fri, 8 May 2026 17:06:15 -0600
Subject: [PATCH 10/27] Simplify the experimental attempts.

---
 src/smith/numerics/CMakeLists.txt             |    1 -
 src/smith/numerics/equation_solver.cpp        | 1033 +----------------
 src/smith/numerics/equation_solver.hpp        |  201 ----
 src/smith/numerics/functional/functional.hpp  |   65 --
 .../tests/functional_comparisons.cpp          |   81 --
 .../numerics/mfem_trust_region_subspace.cpp   |   46 +-
 .../numerics/petsc_trust_region_subspace.cpp  |    2 +-
 src/smith/numerics/solver_config.hpp          |   64 -
 .../numerics/tests/test_equationsolver.cpp    |  200 ----
 .../tests/test_trust_region_solver_mfem.cpp   |  301 -----
 .../tests/test_trust_region_solver_petsc.cpp  |    4 +
 .../numerics/trust_region_cubic_subspace.cpp  |  461 --------
 src/smith/numerics/trust_region_solver.hpp    |   19 +-
 src/smith/physics/dfem_weak_form.hpp          |   12 -
 src/smith/physics/functional_weak_form.hpp    |   20 -
 src/smith/physics/solid_mechanics.hpp         |  141 ---
 .../physics/tests/shallow_arch_buckling.cpp   |  187 +--
 src/smith/physics/tests/solid.cpp             |   60 -
 .../physics/tests/solid_statics_patch.cpp     |   79 --
 .../tests/test_functional_weak_form.cpp       |   32 -
 src/smith/physics/weak_form.hpp               |   17 -
 21 files changed, 31 insertions(+), 2995 deletions(-)
 delete mode 100644 src/smith/numerics/trust_region_cubic_subspace.cpp

diff --git a/src/smith/numerics/CMakeLists.txt b/src/smith/numerics/CMakeLists.txt
index e8c767394d..8bc793fedd 100644
--- a/src/smith/numerics/CMakeLists.txt
+++ b/src/smith/numerics/CMakeLists.txt
@@ -21,7 +21,6 @@ set(numerics_sources
     equation_solver.cpp
     petsc_trust_region_subspace.cpp
     mfem_trust_region_subspace.cpp
-    trust_region_cubic_subspace.cpp
     odes.cpp
     petsc_solvers.cpp
     block_preconditioner.cpp
diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index 29d9af1e3f..4db0fc096d 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -407,26 +407,12 @@ class TrustRegion : public mfem::NewtonSolver {
   mutable size_t num_subspace_solve_start_hess_vecs = 0;
   /// internal counter for quadratic subspace backend solves
   mutable size_t num_quadratic_subspace_solves = 0;
-  /// internal counter for cubic subspace backend attempts
-  mutable size_t num_cubic_subspace_attempts = 0;
-  /// internal counter for cubic subspace candidates used
-  mutable size_t num_cubic_subspace_uses = 0;
-  /// internal counter for cubic attempts that returned quadratic candidate
-  mutable size_t num_cubic_subspace_quadratic_fallbacks = 0;
   /// internal counter for matrix assembles
   mutable size_t num_jacobian_assembles = 0;
-  /// internal counter for JacobianOperator evaluations
-  mutable size_t num_jacobian_operator_evals = 0;
-  /// internal counter for direct diagonal assemblies
-  mutable size_t num_diagonal_assembles = 0;
   /// internal counter for model CG iterations
   mutable size_t num_cg_iterations = 0;
   /// internal counter for preconditioner operator updates
   mutable size_t num_preconditioner_updates = 0;
-  /// internal counter for nonmonotone accepted steps
-  mutable size_t num_nonmonotone_work_accepts = 0;
-  /// internal counter for accepted steps that monotone acceptance would reject
-  mutable size_t num_monotone_work_would_reject = 0;
   /// time spent evaluating residuals
   mutable double residual_seconds = 0.0;
   /// time spent applying Hessian-vector products
@@ -437,14 +423,6 @@ class TrustRegion : public mfem::NewtonSolver {
   mutable double cauchy_hess_vec_seconds = 0.0;
   /// time spent applying line-search Hessian-vector products
   mutable double line_search_hess_vec_seconds = 0.0;
-  /// time spent applying JacobianOperator Hessian-vector products
-  mutable double jacobian_operator_hess_vec_seconds = 0.0;
-  /// time spent evaluating JacobianOperator factories
-  mutable double jacobian_operator_eval_seconds = 0.0;
-  /// time spent directly assembling diagonals
-  mutable double diagonal_assembly_seconds = 0.0;
-  /// time spent inverting direct diagonals
-  mutable double diagonal_invert_seconds = 0.0;
   /// time spent applying preconditioners
   mutable double preconditioner_seconds = 0.0;
   /// total time spent in the nonlinear solve
@@ -507,21 +485,6 @@ class TrustRegion : public mfem::NewtonSolver {
   mutable double preconditioner_update_seconds = 0.0;
   /// time spent in preconditioner SetOperator calls
   mutable double preconditioner_setup_seconds = 0.0;
-  /// current accumulated actual work-surrogate level for nonmonotone acceptance
-  mutable double current_work_objective = 0.0;
-  /// last nonmonotone reference work surrogate
-  mutable double last_nonmonotone_work_reference = 0.0;
-  /// Optional JacobianOperator factory
-  JacobianOperatorFactory jacobian_operator_factory;
-  /// Cached JacobianOperator for current TrustRegion iteration
-  mutable std::unique_ptr<JacobianOperator> current_jacobian_operator;
-  /// Inverted scalar diagonal preconditioner for JacobianOperator mode
-  mutable mfem::Vector inverse_diagonal_preconditioner;
-  /// Current assembled Hessian clone used to preserve a valid previous Hessian
-  mutable std::unique_ptr<mfem::Operator> current_hessian;
-  /// Previous assembled Hessian used for cubic finite-difference subspace models
-  mutable std::unique_ptr<mfem::Operator> previous_hessian;
-
 #ifdef MFEM_USE_MPI
   /// constructor
   TrustRegion(MPI_Comm comm_, const NonlinearSolverOptions& nonlinear_opts, const LinearSolverOptions& linear_opts,
@@ -691,26 +654,6 @@ class TrustRegion : public mfem::NewtonSolver {
     ++num_line_search_hess_vecs;
   }
 
-  double nonmonotoneWorkReference(const std::vector<double>& work_objective_history) const
-  {
-    if (work_objective_history.empty()) {
-      return current_work_objective;
-    }
-    return *std::max_element(work_objective_history.begin(), work_objective_history.end());
-  }
-
-  void pushWorkObjectiveHistory(std::vector<double>& work_objective_history, double objective) const
-  {
-    const int window = nonlinear_options.trust_nonmonotone_window;
-    if (window <= 0) {
-      return;
-    }
-    work_objective_history.push_back(objective);
-    while (work_objective_history.size() > static_cast<size_t>(window)) {
-      work_objective_history.erase(work_objective_history.begin());
-    }
-  }
-
   void pushAcceptedStepHistory(const mfem::Vector& step) const
   {
     if (nonlinear_options.trust_num_past_steps <= 0) {
@@ -745,10 +688,7 @@ class TrustRegion : public mfem::NewtonSolver {
                                [[maybe_unused]] const std::vector<const mfem::Vector*> Hds,
                                [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] double delta,
                                [[maybe_unused]] int num_leftmost,
-                               [[maybe_unused]] std::vector<std::shared_ptr<mfem::Vector>>& candidate_left_mosts,
-                               [[maybe_unused]] const mfem::Vector& previous_step,
-                               [[maybe_unused]] const mfem::Vector* previous_H_previous_step,
-                               [[maybe_unused]] bool allow_cubic_subspace) const
+                               [[maybe_unused]] std::vector<std::shared_ptr<mfem::Vector>>& candidate_left_mosts) const
   {
     SMITH_MARK_FUNCTION;
     auto subspace_start = Clock::now();
@@ -780,31 +720,9 @@ class TrustRegion : public mfem::NewtonSolver {
 
     try {
       auto backend_start = Clock::now();
-      if (nonlinear_options.trust_use_cubic_subspace && allow_cubic_subspace && previous_hessian) {
-        std::vector<mfem::Vector> previous_H_vectors;
-        std::vector<const mfem::Vector*> previous_H_directions;
-        previous_H_vectors.reserve(directions.size());
-        previous_H_directions.reserve(directions.size());
-        for (const auto* direction : directions) {
-          previous_H_vectors.emplace_back(direction->Size());
-          previous_hessian->Mult(*direction, previous_H_vectors.back());
-          previous_H_directions.emplace_back(&previous_H_vectors.back());
-        }
-        ++num_cubic_subspace_attempts;
-        bool used_cubic = false;
-        std::tie(sol, leftvecs, leftvals, energy_change) = solveCubicSubspaceProblemMfem(
-            directions, H_directions, previous_H_directions, previous_step, b, delta, num_leftmost, &used_cubic);
-        if (used_cubic) {
-          ++num_cubic_subspace_uses;
-        } else {
-          ++num_cubic_subspace_quadratic_fallbacks;
-          ++num_quadratic_subspace_solves;
-        }
-      } else {
-        ++num_quadratic_subspace_solves;
-        std::tie(sol, leftvecs, leftvals, energy_change) =
-            solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost);
-      }
+      ++num_quadratic_subspace_solves;
+      std::tie(sol, leftvecs, leftvals, energy_change) =
+          solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost);
       subspace_backend_seconds += secondsSince(backend_start);
     } catch (const std::exception& e) {
       if (print_level >= 1) {
@@ -1031,63 +949,14 @@ class TrustRegion : public mfem::NewtonSolver {
     SMITH_MARK_FUNCTION;
     auto start = Clock::now();
     ++num_jacobian_assembles;
-    if (nonlinear_options.trust_use_cubic_subspace) {
-      previous_hessian = std::move(current_hessian);
-    }
     grad = &oper->GetGradient(x);
     if (nonlinear_options.force_monolithic) {
       auto* grad_blocked = dynamic_cast<mfem::BlockOperator*>(grad);
       if (grad_blocked) grad = buildMonolithicMatrix(*grad_blocked).release();
     }
-    if (nonlinear_options.trust_use_cubic_subspace) {
-      current_hessian = cloneAssembledOperator(*grad);
-    }
     jacobian_assembly_seconds += secondsSince(start);
   }
 
-  /// Set an optional JacobianOperator factory.
-  void setJacobianOperator(JacobianOperatorFactory jacobian_operator)
-  {
-    jacobian_operator_factory = std::move(jacobian_operator);
-  }
-
-  /// Evaluate and cache the JacobianOperator at x.
-  void updateJacobianOperator(const mfem::Vector& x) const
-  {
-    SMITH_MARK_FUNCTION;
-    SLIC_ERROR_ROOT_IF(!jacobian_operator_factory, "No JacobianOperator factory is registered.");
-    auto start = Clock::now();
-    ++num_jacobian_operator_evals;
-    current_jacobian_operator = jacobian_operator_factory(x);
-    SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "JacobianOperator factory returned a null operator.");
-    jacobian_operator_eval_seconds += secondsSince(start);
-  }
-
-  /// Assemble and invert the scalar diagonal preconditioner from the current JacobianOperator.
-  void updateDiagonalPreconditioner() const
-  {
-    SMITH_MARK_FUNCTION;
-    SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "Cannot build diagonal preconditioner without a JacobianOperator.");
-
-    auto diagonal_start = Clock::now();
-    current_jacobian_operator->assembleDiagonal(inverse_diagonal_preconditioner);
-    diagonal_assembly_seconds += secondsSince(diagonal_start);
-    ++num_diagonal_assembles;
-
-    auto invert_start = Clock::now();
-    double max_abs_diag = 0.0;
-    for (int i = 0; i < inverse_diagonal_preconditioner.Size(); ++i) {
-      max_abs_diag = std::max(max_abs_diag, std::abs(inverse_diagonal_preconditioner[i]));
-    }
-
-    const double floor = nonlinear_options.pcg_diagonal_floor * max_abs_diag;
-    SLIC_ERROR_ROOT_IF(!(floor > 0.0), "Cannot invert a zero Jacobian diagonal for TrustRegion preconditioning.");
-    for (int i = 0; i < inverse_diagonal_preconditioner.Size(); ++i) {
-      inverse_diagonal_preconditioner[i] = 1.0 / std::max(std::abs(inverse_diagonal_preconditioner[i]), floor);
-    }
-    diagonal_invert_seconds += secondsSince(invert_start);
-  }
-
   /// evaluate the nonlinear residual
   mfem::real_t computeResidual(const mfem::Vector& x_, mfem::Vector& r_) const
   {
@@ -1106,16 +975,8 @@ class TrustRegion : public mfem::NewtonSolver {
     SMITH_MARK_FUNCTION;
     auto start = Clock::now();
     ++num_hess_vecs;
-    if (nonlinear_options.trust_use_jacobian_operator) {
-      SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "TrustRegion JacobianOperator mode has no current operator.");
-      current_jacobian_operator->Mult(x_, v_);
-      const double seconds = secondsSince(start);
-      hess_vec_seconds += seconds;
-      jacobian_operator_hess_vec_seconds += seconds;
-    } else {
-      grad->Mult(x_, v_);
-      hess_vec_seconds += secondsSince(start);
-    }
+    grad->Mult(x_, v_);
+    hess_vec_seconds += secondsSince(start);
   }
 
   /// apply trust region specific preconditioner
@@ -1124,16 +985,7 @@ class TrustRegion : public mfem::NewtonSolver {
     SMITH_MARK_FUNCTION;
     auto start = Clock::now();
     ++num_preconds;
-    if (nonlinear_options.trust_use_jacobian_operator) {
-      SLIC_ERROR_ROOT_IF(inverse_diagonal_preconditioner.Size() != x_.Size(),
-                         "TrustRegion JacobianOperator diagonal preconditioner is not initialized.");
-      v_.SetSize(x_.Size());
-      for (int i = 0; i < x_.Size(); ++i) {
-        v_[i] = inverse_diagonal_preconditioner[i] * x_[i];
-      }
-    } else {
-      tr_precond.Mult(x_, v_);
-    }
+    tr_precond.Mult(x_, v_);
     preconditioner_seconds += secondsSince(start);
   };
 
@@ -1147,8 +999,6 @@ class TrustRegion : public mfem::NewtonSolver {
             .num_line_search_hess_vecs = num_line_search_hess_vecs,
             .num_preconds = num_preconds,
             .num_jacobian_assembles = num_jacobian_assembles,
-            .num_jacobian_operator_evals = num_jacobian_operator_evals,
-            .num_diagonal_assembles = num_diagonal_assembles,
             .num_cg_iterations = num_cg_iterations,
             .num_subspace_solves = num_subspace_solves,
             .num_subspace_leftmost_hess_vecs = num_subspace_leftmost_hess_vecs,
@@ -1159,21 +1009,12 @@ class TrustRegion : public mfem::NewtonSolver {
             .num_subspace_solve_start_vectors = num_subspace_solve_start_vectors,
             .num_subspace_solve_start_hess_vecs = num_subspace_solve_start_hess_vecs,
             .num_quadratic_subspace_solves = num_quadratic_subspace_solves,
-            .num_cubic_subspace_attempts = num_cubic_subspace_attempts,
-            .num_cubic_subspace_uses = num_cubic_subspace_uses,
-            .num_cubic_subspace_quadratic_fallbacks = num_cubic_subspace_quadratic_fallbacks,
             .num_preconditioner_updates = num_preconditioner_updates,
-            .num_nonmonotone_work_accepts = num_nonmonotone_work_accepts,
-            .num_monotone_work_would_reject = num_monotone_work_would_reject,
             .residual_seconds = residual_seconds,
             .hess_vec_seconds = hess_vec_seconds,
             .model_hess_vec_seconds = model_hess_vec_seconds,
             .cauchy_hess_vec_seconds = cauchy_hess_vec_seconds,
             .line_search_hess_vec_seconds = line_search_hess_vec_seconds,
-            .jacobian_operator_hess_vec_seconds = jacobian_operator_hess_vec_seconds,
-            .jacobian_operator_eval_seconds = jacobian_operator_eval_seconds,
-            .diagonal_assembly_seconds = diagonal_assembly_seconds,
-            .diagonal_invert_seconds = diagonal_invert_seconds,
             .preconditioner_seconds = preconditioner_seconds,
             .total_seconds = total_seconds,
             .model_solve_seconds = model_solve_seconds,
@@ -1213,9 +1054,7 @@ class TrustRegion : public mfem::NewtonSolver {
             .projection_seconds = projection_seconds,
             .jacobian_assembly_seconds = jacobian_assembly_seconds,
             .preconditioner_update_seconds = preconditioner_update_seconds,
-            .preconditioner_setup_seconds = preconditioner_setup_seconds,
-            .last_work_objective = current_work_objective,
-            .last_nonmonotone_work_reference = last_nonmonotone_work_reference};
+            .preconditioner_setup_seconds = preconditioner_setup_seconds};
   }
 
   /// @overload
@@ -1246,25 +1085,14 @@ class TrustRegion : public mfem::NewtonSolver {
     num_subspace_solve_start_vectors = 0;
     num_subspace_solve_start_hess_vecs = 0;
     num_quadratic_subspace_solves = 0;
-    num_cubic_subspace_attempts = 0;
-    num_cubic_subspace_uses = 0;
-    num_cubic_subspace_quadratic_fallbacks = 0;
     num_jacobian_assembles = 0;
-    num_jacobian_operator_evals = 0;
-    num_diagonal_assembles = 0;
     num_cg_iterations = 0;
     num_preconditioner_updates = 0;
-    num_nonmonotone_work_accepts = 0;
-    num_monotone_work_would_reject = 0;
     residual_seconds = 0.0;
     hess_vec_seconds = 0.0;
     model_hess_vec_seconds = 0.0;
     cauchy_hess_vec_seconds = 0.0;
     line_search_hess_vec_seconds = 0.0;
-    jacobian_operator_hess_vec_seconds = 0.0;
-    jacobian_operator_eval_seconds = 0.0;
-    diagonal_assembly_seconds = 0.0;
-    diagonal_invert_seconds = 0.0;
     preconditioner_seconds = 0.0;
     total_seconds = 0.0;
     model_solve_seconds = 0.0;
@@ -1296,19 +1124,13 @@ class TrustRegion : public mfem::NewtonSolver {
     jacobian_assembly_seconds = 0.0;
     preconditioner_update_seconds = 0.0;
     preconditioner_setup_seconds = 0.0;
-    current_work_objective = 0.0;
-    last_nonmonotone_work_reference = 0.0;
     accepted_step_history.clear();
     resetTrustRegionSubspaceTimings();
     solve_start_x.SetSize(X.Size());
     solve_start_x = X;
     min_residual_x.SetSize(X.Size());
     min_residual_x = X;
-    current_jacobian_operator.reset();
-    inverse_diagonal_preconditioner.SetSize(0);
     previous_H_left_mosts.clear();
-    current_hessian.reset();
-    previous_hessian.reset();
 
     real_t norm, norm_goal = 0.0;
     norm = initial_norm = computeResidual(X, r);
@@ -1321,11 +1143,6 @@ class TrustRegion : public mfem::NewtonSolver {
       mfem::out << "TrustRegion iteration " << std::setw(3) << 0 << " : ||r|| = " << std::setw(13) << norm << "\n";
     }
 
-    SLIC_ERROR_ROOT_IF(nonlinear_options.trust_nonmonotone_window < 0,
-                       "TrustRegion requires trust_nonmonotone_window >= 0");
-    std::vector<double> work_objective_history;
-    pushWorkObjectiveHistory(work_objective_history, current_work_objective);
-
     prec->iterative_mode = false;
     tr_precond.iterative_mode = false;
 
@@ -1381,26 +1198,17 @@ class TrustRegion : public mfem::NewtonSolver {
         break;
       }
 
-      if (nonlinear_options.trust_use_jacobian_operator) {
-        SLIC_ERROR_ROOT_IF(!jacobian_operator_factory,
-                           "TrustRegion JacobianOperator mode requires a registered JacobianOperator factory.");
-        updateJacobianOperator(X);
-        updateDiagonalPreconditioner();
+      assembleJacobian(X);
+
+      if (it == 0 || (trResults.cg_iterations_count >= settings.max_cg_iterations ||
+                      cumulative_cg_iters_from_last_precond_update >= settings.max_cumulative_iteration)) {
+        auto preconditioner_update_start = Clock::now();
+        auto preconditioner_setup_start = Clock::now();
+        tr_precond.SetOperator(*grad);
+        preconditioner_setup_seconds += secondsSince(preconditioner_setup_start);
+        preconditioner_update_seconds += secondsSince(preconditioner_update_start);
         ++num_preconditioner_updates;
         cumulative_cg_iters_from_last_precond_update = 0;
-      } else {
-        assembleJacobian(X);
-
-        if (it == 0 || (trResults.cg_iterations_count >= settings.max_cg_iterations ||
-                        cumulative_cg_iters_from_last_precond_update >= settings.max_cumulative_iteration)) {
-          auto preconditioner_update_start = Clock::now();
-          auto preconditioner_setup_start = Clock::now();
-          tr_precond.SetOperator(*grad);
-          preconditioner_setup_seconds += secondsSince(preconditioner_setup_start);
-          preconditioner_update_seconds += secondsSince(preconditioner_update_start);
-          ++num_preconditioner_updates;
-          cumulative_cg_iters_from_last_precond_update = 0;
-        }
       }
 
       auto hess_vec_func = [&](const mfem::Vector& x_, mfem::Vector& v_) { hessVec(x_, v_); };
@@ -1481,8 +1289,6 @@ class TrustRegion : public mfem::NewtonSolver {
                                        ((d_norm > (1.0 - 1.0e-6) * tr_size) && lineSearchIter > 1));
         bool use_with_option2 = (subspace_option >= 2) && (d_norm > (1.0 - 1.0e-6) * tr_size);
         bool use_with_option3 = (subspace_option >= 3);
-        const bool allow_cubic_subspace =
-            trResults.interior_status == TrustRegionResults::Status::NegativeCurvature || use_with_option2;
 
         if (use_with_option1 || use_with_option2 || use_with_option3) {
           if (!have_computed_Hvs) {
@@ -1581,9 +1387,7 @@ class TrustRegion : public mfem::NewtonSolver {
               H_ds.push_back(&H_min_residual_direction);
             }
           }
-          solveTheSubspaceProblem(trResults.d, hess_vec_func, ds, H_ds, r, tr_size, num_leftmost, candidate_left_mosts,
-                                  trResults.d_old,
-                                  trResults.has_d_old ? &trResults.H_d_old_at_accept : nullptr, allow_cubic_subspace);
+          solveTheSubspaceProblem(trResults.d, hess_vec_func, ds, H_ds, r, tr_size, num_leftmost, candidate_left_mosts);
         }
 
         static constexpr double roundOffTol = 0.0;  // 1e-14;
@@ -1614,9 +1418,6 @@ class TrustRegion : public mfem::NewtonSolver {
           normPred = std::numeric_limits<double>::max();
         }
 
-        const double trial_work_objective = current_work_objective + realObjective;
-        last_nonmonotone_work_reference = nonmonotoneWorkReference(work_objective_history);
-
         if (normPred <= norm_goal) {
           trResults.d_old = trResults.d;
           trResults.H_d_old_at_accept = trResults.H_d;
@@ -1630,8 +1431,6 @@ class TrustRegion : public mfem::NewtonSolver {
           r = r_pred;
           vector_copy_scale_seconds += secondsSince(copy_start);
           norm = normPred;
-          current_work_objective = trial_work_objective;
-          pushWorkObjectiveHistory(work_objective_history, current_work_objective);
           line_search_seconds += secondsSince(line_search_start);
           if (print_level >= 2) {
             printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, true);
@@ -1671,11 +1470,7 @@ class TrustRegion : public mfem::NewtonSolver {
         // modelRes = g + Jd
         // modelResNorm = np.linalg.norm(modelRes)
         // realResNorm = np.linalg.norm(gy)
-        const bool monotoneAccept = rho >= settings.eta1 && rho <= settings.eta4;
-        const bool nonmonotoneAccept =
-            nonlinear_options.trust_nonmonotone_window > 0 && modelObjective < 0.0 && rho <= settings.eta4 &&
-            trial_work_objective <= last_nonmonotone_work_reference + settings.eta1 * modelObjective;
-        bool willAccept = monotoneAccept || nonmonotoneAccept;  // or (rho >= -0 and realResNorm <= gNorm)
+        const bool willAccept = rho >= settings.eta1 && rho <= settings.eta4;
 
         if (print_level >= 2) {
           printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, willAccept);
@@ -1691,17 +1486,11 @@ class TrustRegion : public mfem::NewtonSolver {
           if (!candidate_left_mosts.empty()) {
             left_mosts = std::move(candidate_left_mosts);
           }
-          if (nonmonotoneAccept && !monotoneAccept) {
-            ++num_nonmonotone_work_accepts;
-            ++num_monotone_work_would_reject;
-          }
           copy_start = Clock::now();
           X = x_pred;
           r = r_pred;
           vector_copy_scale_seconds += secondsSince(copy_start);
           norm = normPred;
-          current_work_objective = trial_work_objective;
-          pushWorkObjectiveHistory(work_objective_history, current_work_objective);
           line_search_seconds += secondsSince(line_search_start);
           break;
         }
@@ -1731,754 +1520,6 @@ class TrustRegion : public mfem::NewtonSolver {
   }
 };
 
-/**
- * @brief Skeleton for a nonlinear preconditioned conjugate-gradient block solver.
- *
- * The full algorithm is added in a follow-on chunk. This class establishes the Smith/MFEM integration points used by
- * that implementation: residual evaluation, Jacobian assembly, Hessian-vector products, preconditioning, counters, and
- * standard nonlinear convergence bookkeeping.
- */
-class PcgBlockSolver : public mfem::NewtonSolver {
- protected:
-  /// Trial solution vector
-  mutable mfem::Vector x_trial;
-  /// Trial residual vector
-  mutable mfem::Vector r_trial;
-  /// Scratch vector
-  mutable mfem::Vector scratch;
-
-  /// Nonlinear solution options
-  NonlinearSolverOptions nonlinear_options;
-
-  /// Preconditioner used by the PCG-block recurrence
-  Solver& pcg_precond;
-
-  /// Reconstructed Smith print level
-  mutable size_t print_level = 0;
-
- public:
-  /// Internal counter for hess-vecs
-  mutable size_t num_hess_vecs = 0;
-  /// Internal counter for preconditions
-  mutable size_t num_preconds = 0;
-  /// Internal counter for residuals
-  mutable size_t num_residuals = 0;
-  /// Internal counter for matrix assembles
-  mutable size_t num_jacobian_assembles = 0;
-  /// Internal counter for JacobianOperator evaluations
-  mutable size_t num_jacobian_operator_evals = 0;
-  /// Internal counter for direct diagonal assemblies
-  mutable size_t num_diagonal_assembles = 0;
-  /// Internal counter for preconditioner operator updates
-  mutable size_t num_preconditioner_updates = 0;
-  /// Internal counter for accepted prefix blocks
-  mutable size_t num_prefix_accepts = 0;
-  /// Internal counter for momentum resets
-  mutable size_t num_momentum_resets = 0;
-  /// Internal counter for nonzero PCG beta values
-  mutable size_t num_nonzero_beta = 0;
-  /// Internal counter for zero PCG beta values
-  mutable size_t num_zero_beta = 0;
-  /// Internal counter for accepted blocks
-  mutable size_t num_blocks = 0;
-  /// Internal counter for rejected blocks
-  mutable size_t num_block_rejects = 0;
-  /// Internal counter for Powell restarts
-  mutable size_t num_powell_restarts = 0;
-  /// Internal counter for descent-guard restarts
-  mutable size_t num_descent_restarts = 0;
-  /// Internal counter for non-positive curvature directions
-  mutable size_t num_negative_curvature = 0;
-  /// Internal counter for line-search backtracks
-  mutable size_t num_line_search_backtracks = 0;
-  /// Internal counter for positive-curvature steps capped by the trust radius
-  mutable size_t num_trust_capped_steps = 0;
-  /// Internal counter for accepted inner PCG steps
-  mutable size_t num_accepted_steps = 0;
-  /// Internal counter for trial inner PCG steps
-  mutable size_t num_trial_steps = 0;
-  /// Last trust scale used by the solver
-  mutable double final_h_scale = 1.0;
-  /// Last accepted block trust ratio
-  mutable double last_trust_ratio = 0.0;
-  /// Time spent evaluating residuals
-  mutable double residual_seconds = 0.0;
-  /// Time spent applying all Hessian-vector products
-  mutable double hess_vec_seconds = 0.0;
-  /// Time spent applying JacobianOperator Hessian-vector products
-  mutable double jacobian_operator_hess_vec_seconds = 0.0;
-  /// Time spent applying assembled Hessian-vector products
-  mutable double assembled_hess_vec_seconds = 0.0;
-  /// Time spent applying legacy matrix-free tangent products
-  mutable double matrix_free_hess_vec_seconds = 0.0;
-  /// Time spent applying preconditioners
-  mutable double preconditioner_seconds = 0.0;
-  /// Time spent evaluating JacobianOperator factories
-  mutable double jacobian_operator_eval_seconds = 0.0;
-  /// Time spent assembling sparse Jacobians
-  mutable double jacobian_assembly_seconds = 0.0;
-  /// Time spent directly assembling diagonals
-  mutable double diagonal_assembly_seconds = 0.0;
-  /// Time spent inverting direct diagonals
-  mutable double diagonal_invert_seconds = 0.0;
-  /// Time spent refreshing preconditioner data
-  mutable double preconditioner_update_seconds = 0.0;
-  /// Time spent in preconditioner SetOperator calls
-  mutable double preconditioner_setup_seconds = 0.0;
-
-  /// Optional matrix-free tangent action, y = J(x) dx
-  MatrixFreeTangentAction matrix_free_tangent_action;
-  /// Optional JacobianOperator factory
-  JacobianOperatorFactory jacobian_operator_factory;
-  /// Cached JacobianOperator for the current PCG block
-  mutable std::unique_ptr<JacobianOperator> current_jacobian_operator;
-  /// Owned sparse Jacobian assembled through the JacobianOperator fallback path
-  mutable std::unique_ptr<mfem::HypreParMatrix> assembled_jacobian_from_operator;
-  /// Inverted scalar diagonal preconditioner for the current PCG block
-  mutable mfem::Vector inverse_diagonal_preconditioner;
-  /// Whether the current PCG block should use the scalar diagonal preconditioner
-  mutable bool use_inverse_diagonal_preconditioner = false;
-
-#ifdef MFEM_USE_MPI
-  /// Constructor
-  PcgBlockSolver(MPI_Comm comm_, const NonlinearSolverOptions& nonlinear_opts, Solver& preconditioner)
-      : mfem::NewtonSolver(comm_), nonlinear_options(nonlinear_opts), pcg_precond(preconditioner)
-  {
-  }
-#endif
-
-  /// Assemble the Jacobian at x.
-  void assembleJacobian(const mfem::Vector& x) const
-  {
-    SMITH_MARK_FUNCTION;
-    auto start = Clock::now();
-    ++num_jacobian_assembles;
-    grad = &oper->GetGradient(x);
-    if (nonlinear_options.force_monolithic) {
-      auto* grad_blocked = dynamic_cast<mfem::BlockOperator*>(grad);
-      if (grad_blocked) grad = buildMonolithicMatrix(*grad_blocked).release();
-    }
-    jacobian_assembly_seconds += secondsSince(start);
-  }
-
-  /// Evaluate the nonlinear residual.
-  mfem::real_t computeResidual(const mfem::Vector& x, mfem::Vector& residual) const
-  {
-    SMITH_MARK_FUNCTION;
-    auto start = Clock::now();
-    ++num_residuals;
-    oper->Mult(x, residual);
-    const auto norm = Norm(residual);
-    residual_seconds += secondsSince(start);
-    return norm;
-  }
-
-  /// Set an optional matrix-free tangent action.
-  void setMatrixFreeTangentAction(MatrixFreeTangentAction tangent_action)
-  {
-    matrix_free_tangent_action = std::move(tangent_action);
-  }
-
-  /// Set an optional JacobianOperator factory.
-  void setJacobianOperator(JacobianOperatorFactory jacobian_operator)
-  {
-    jacobian_operator_factory = std::move(jacobian_operator);
-  }
-
-  /// Evaluate and cache the JacobianOperator at x.
-  void updateJacobianOperator(const mfem::Vector& x) const
-  {
-    SMITH_MARK_FUNCTION;
-    SLIC_ERROR_ROOT_IF(!jacobian_operator_factory, "No JacobianOperator factory is registered.");
-    auto start = Clock::now();
-    ++num_jacobian_operator_evals;
-    current_jacobian_operator = jacobian_operator_factory(x);
-    SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "JacobianOperator factory returned a null operator.");
-    jacobian_operator_eval_seconds += secondsSince(start);
-  }
-
-  /// Assemble and invert the scalar diagonal preconditioner from the current JacobianOperator.
-  void updateDiagonalPreconditioner() const
-  {
-    SMITH_MARK_FUNCTION;
-    SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "Cannot build diagonal preconditioner without a JacobianOperator.");
-
-    auto diagonal_start = Clock::now();
-    current_jacobian_operator->assembleDiagonal(inverse_diagonal_preconditioner);
-    diagonal_assembly_seconds += secondsSince(diagonal_start);
-    ++num_diagonal_assembles;
-
-    auto invert_start = Clock::now();
-    double max_abs_diag = 0.0;
-    for (int i = 0; i < inverse_diagonal_preconditioner.Size(); ++i) {
-      max_abs_diag = std::max(max_abs_diag, std::abs(inverse_diagonal_preconditioner[i]));
-    }
-
-    const double floor = nonlinear_options.pcg_diagonal_floor * max_abs_diag;
-    SLIC_ERROR_ROOT_IF(!(floor > 0.0), "Cannot invert a zero Jacobian diagonal for PCG-block preconditioning.");
-    for (int i = 0; i < inverse_diagonal_preconditioner.Size(); ++i) {
-      inverse_diagonal_preconditioner[i] = 1.0 / std::max(std::abs(inverse_diagonal_preconditioner[i]), floor);
-    }
-    diagonal_invert_seconds += secondsSince(invert_start);
-
-    use_inverse_diagonal_preconditioner = true;
-  }
-
-  /// Refresh the tangent and preconditioner used by the next PCG block attempt.
-  void refreshBlockOperators(const mfem::Vector& x) const
-  {
-    auto refresh_start = Clock::now();
-    if (jacobian_operator_factory) {
-      updateJacobianOperator(x);
-      ++num_preconditioner_updates;
-      if (nonlinear_options.pcg_use_jacobian_diagonal_preconditioner) {
-        updateDiagonalPreconditioner();
-      } else {
-        use_inverse_diagonal_preconditioner = false;
-        auto assembly_start = Clock::now();
-        ++num_jacobian_assembles;
-        assembled_jacobian_from_operator = current_jacobian_operator->assemble();
-        jacobian_assembly_seconds += secondsSince(assembly_start);
-        grad = assembled_jacobian_from_operator.get();
-        auto setup_start = Clock::now();
-        pcg_precond.SetOperator(*grad);
-        preconditioner_setup_seconds += secondsSince(setup_start);
-      }
-    } else {
-      SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_use_jacobian_diagonal_preconditioner,
-                         "PCG-block diagonal preconditioning requires a registered JacobianOperator.");
-      current_jacobian_operator.reset();
-      use_inverse_diagonal_preconditioner = false;
-      assembleJacobian(x);
-      ++num_preconditioner_updates;
-      auto setup_start = Clock::now();
-      pcg_precond.SetOperator(*grad);
-      preconditioner_setup_seconds += secondsSince(setup_start);
-    }
-    preconditioner_update_seconds += secondsSince(refresh_start);
-  }
-
-  /// Apply the tangent at x to dx.
-  void hessVec(const mfem::Vector& x, const mfem::Vector& dx, mfem::Vector& y) const
-  {
-    SMITH_MARK_FUNCTION;
-    auto start = Clock::now();
-    ++num_hess_vecs;
-    if (current_jacobian_operator) {
-      current_jacobian_operator->Mult(dx, y);
-      const double seconds = secondsSince(start);
-      hess_vec_seconds += seconds;
-      jacobian_operator_hess_vec_seconds += seconds;
-    } else if (jacobian_operator_factory) {
-      updateJacobianOperator(x);
-      current_jacobian_operator->Mult(dx, y);
-      const double seconds = secondsSince(start);
-      hess_vec_seconds += seconds;
-      jacobian_operator_hess_vec_seconds += seconds;
-    } else if (matrix_free_tangent_action) {
-      matrix_free_tangent_action(x, dx, y);
-      const double seconds = secondsSince(start);
-      hess_vec_seconds += seconds;
-      matrix_free_hess_vec_seconds += seconds;
-    } else {
-      grad->Mult(dx, y);
-      const double seconds = secondsSince(start);
-      hess_vec_seconds += seconds;
-      assembled_hess_vec_seconds += seconds;
-    }
-  }
-
-  /// Apply the configured nonlinear PCG preconditioner.
-  void precond(const mfem::Vector& x, mfem::Vector& v) const
-  {
-    SMITH_MARK_FUNCTION;
-    auto start = Clock::now();
-    ++num_preconds;
-    if (use_inverse_diagonal_preconditioner) {
-      SLIC_ERROR_ROOT_IF(inverse_diagonal_preconditioner.Size() != x.Size(),
-                         "PCG-block diagonal preconditioner size does not match the residual vector.");
-      v.SetSize(x.Size());
-      for (int i = 0; i < x.Size(); ++i) {
-        v[i] = inverse_diagonal_preconditioner[i] * x[i];
-      }
-    } else {
-      pcg_precond.Mult(x, v);
-    }
-    preconditioner_seconds += secondsSince(start);
-  }
-
-  /// Return solver diagnostic counters.
-  PcgBlockDiagnostics diagnostics() const
-  {
-    return {.num_residuals = num_residuals,
-            .num_hess_vecs = num_hess_vecs,
-            .num_preconds = num_preconds,
-            .num_jacobian_assembles = num_jacobian_assembles,
-            .num_jacobian_operator_evals = num_jacobian_operator_evals,
-            .num_diagonal_assembles = num_diagonal_assembles,
-            .num_preconditioner_updates = num_preconditioner_updates,
-            .num_prefix_accepts = num_prefix_accepts,
-            .num_momentum_resets = num_momentum_resets,
-            .num_nonzero_beta = num_nonzero_beta,
-            .num_zero_beta = num_zero_beta,
-            .num_blocks = num_blocks,
-            .num_block_rejects = num_block_rejects,
-            .num_powell_restarts = num_powell_restarts,
-            .num_descent_restarts = num_descent_restarts,
-            .num_negative_curvature = num_negative_curvature,
-            .num_line_search_backtracks = num_line_search_backtracks,
-            .num_trust_capped_steps = num_trust_capped_steps,
-            .num_accepted_steps = num_accepted_steps,
-            .num_trial_steps = num_trial_steps,
-            .residual_seconds = residual_seconds,
-            .hess_vec_seconds = hess_vec_seconds,
-            .jacobian_operator_hess_vec_seconds = jacobian_operator_hess_vec_seconds,
-            .assembled_hess_vec_seconds = assembled_hess_vec_seconds,
-            .matrix_free_hess_vec_seconds = matrix_free_hess_vec_seconds,
-            .preconditioner_seconds = preconditioner_seconds,
-            .jacobian_operator_eval_seconds = jacobian_operator_eval_seconds,
-            .jacobian_assembly_seconds = jacobian_assembly_seconds,
-            .diagonal_assembly_seconds = diagonal_assembly_seconds,
-            .diagonal_invert_seconds = diagonal_invert_seconds,
-            .preconditioner_update_seconds = preconditioner_update_seconds,
-            .preconditioner_setup_seconds = preconditioner_setup_seconds,
-            .final_h_scale = final_h_scale,
-            .last_trust_ratio = last_trust_ratio};
-  }
-
-  /// @overload
-  void Mult(const mfem::Vector&, mfem::Vector& X) const
-  {
-    MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator).");
-    MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver).");
-
-    print_level = static_cast<size_t>(std::max(nonlinear_options.print_level, 0));
-    print_level = print_options.iterations ? std::max<size_t>(1, print_level) : print_level;
-    print_level = print_options.summary ? std::max<size_t>(2, print_level) : print_level;
-
-    num_hess_vecs = 0;
-    num_preconds = 0;
-    num_residuals = 0;
-    num_jacobian_assembles = 0;
-    num_jacobian_operator_evals = 0;
-    num_diagonal_assembles = 0;
-    num_preconditioner_updates = 0;
-    num_prefix_accepts = 0;
-    num_momentum_resets = 0;
-    num_nonzero_beta = 0;
-    num_zero_beta = 0;
-    num_blocks = 0;
-    num_block_rejects = 0;
-    num_powell_restarts = 0;
-    num_descent_restarts = 0;
-    num_negative_curvature = 0;
-    num_line_search_backtracks = 0;
-    num_trust_capped_steps = 0;
-    num_accepted_steps = 0;
-    num_trial_steps = 0;
-    final_h_scale = nonlinear_options.pcg_h_scale_init;
-    last_trust_ratio = 0.0;
-    residual_seconds = 0.0;
-    hess_vec_seconds = 0.0;
-    jacobian_operator_hess_vec_seconds = 0.0;
-    assembled_hess_vec_seconds = 0.0;
-    matrix_free_hess_vec_seconds = 0.0;
-    preconditioner_seconds = 0.0;
-    jacobian_operator_eval_seconds = 0.0;
-    jacobian_assembly_seconds = 0.0;
-    diagonal_assembly_seconds = 0.0;
-    diagonal_invert_seconds = 0.0;
-    preconditioner_update_seconds = 0.0;
-    preconditioner_setup_seconds = 0.0;
-    current_jacobian_operator.reset();
-    assembled_jacobian_from_operator.reset();
-    inverse_diagonal_preconditioner.SetSize(0);
-    use_inverse_diagonal_preconditioner = false;
-
-    SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_block_len <= 0, "PcgBlock requires pcg_block_len > 0");
-    SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_window <= 0, "PcgBlock requires pcg_window > 0");
-    SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_ls_max_backtracks < 0, "PcgBlock requires pcg_ls_max_backtracks >= 0");
-    SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_delta_avg_window <= 0, "PcgBlock requires pcg_delta_avg_window > 0");
-
-    mfem::real_t norm = computeResidual(X, r);
-    initial_norm = norm;
-    if (norm == 0.0) {
-      converged = true;
-      final_iter = 0;
-      final_norm = norm;
-      return;
-    }
-
-    const mfem::real_t norm_goal = std::max(rel_tol * initial_norm, abs_tol);
-
-    if (print_level == 1) {
-      mfem::out << "PcgBlock iteration " << std::setw(3) << 0 << " : ||r|| = " << std::setw(13) << norm << "\n";
-    }
-
-    pcg_precond.iterative_mode = false;
-
-    x_trial.SetSize(X.Size());
-    x_trial = 0.0;
-    r_trial.SetSize(X.Size());
-    r_trial = 0.0;
-    scratch.SetSize(X.Size());
-    scratch = 0.0;
-
-    mfem::Vector r_block(X.Size());
-    mfem::Vector r_candidate(X.Size());
-    mfem::Vector force(X.Size());
-    mfem::Vector z(X.Size());
-    mfem::Vector z_old(X.Size());
-    mfem::Vector p(X.Size());
-    mfem::Vector p_old(X.Size());
-    mfem::Vector Hp(X.Size());
-    mfem::Vector step(X.Size());
-    mfem::Vector x_candidate(X.Size());
-
-    bool have_momentum = false;
-    double rho_old = 0.0;
-    double h_scale = nonlinear_options.pcg_h_scale_init;
-    int retries_remaining = nonlinear_options.pcg_max_block_retries;
-    int it = 0;
-    double cumulative_work = 0.0;
-    std::vector<double> work_history{cumulative_work};
-    std::vector<double> accepted_step_norms;
-
-    auto append_bounded = [](std::vector<double>& history, double value, int max_size) {
-      history.push_back(value);
-      const auto bound = static_cast<size_t>(max_size);
-      if (history.size() > bound) {
-        const auto num_to_remove = static_cast<std::vector<double>::difference_type>(history.size() - bound);
-        history.erase(history.begin(), history.begin() + num_to_remove);
-      }
-    };
-
-    auto reset_momentum = [&]() {
-      have_momentum = false;
-      rho_old = 0.0;
-      p_old = 0.0;
-      z_old = 0.0;
-      ++num_momentum_resets;
-    };
-
-    auto window_max = [&](const std::vector<double>& history) {
-      const int window = nonlinear_options.pcg_window;
-      const auto begin = history.size() > static_cast<size_t>(window) ? history.end() - window : history.begin();
-      return *std::max_element(begin, history.end());
-    };
-
-    auto current_delta_ref = [&]() {
-      if (accepted_step_norms.empty()) {
-        return 0.0;
-      }
-      const int window = nonlinear_options.pcg_delta_avg_window;
-      const auto begin = accepted_step_norms.size() > static_cast<size_t>(window) ? accepted_step_norms.end() - window
-                                                                                  : accepted_step_norms.begin();
-      double sum = 0.0;
-      for (auto iter = begin; iter != accepted_step_norms.end(); ++iter) {
-        sum += *iter;
-      }
-      return sum / static_cast<double>(accepted_step_norms.end() - begin);
-    };
-
-    for (; true;) {
-      MFEM_ASSERT(mfem::IsFinite(norm), "norm = " << norm);
-      if (print_level >= 2) {
-        mfem::out << "PcgBlock iteration " << std::setw(3) << it << " : ||r|| = " << std::setw(13) << norm;
-        if (it > 0) {
-          mfem::out << ", ||r||/||r_0|| = " << std::setw(13) << (initial_norm != 0.0 ? norm / initial_norm : norm);
-        } else {
-          mfem::out << ", norm goal = " << std::setw(13) << norm_goal;
-        }
-        mfem::out << '\n';
-      }
-
-      if (print_level >= 1 && (norm != norm)) {
-        mfem::out << "Initial residual for PCG-block iteration is undefined/nan." << std::endl;
-        mfem::out << "PcgBlock: No convergence!\n";
-        converged = false;
-        break;
-      }
-
-      if (norm <= norm_goal && it >= nonlinear_options.min_iterations) {
-        converged = true;
-        break;
-      } else if (it >= max_iter) {
-        converged = false;
-        break;
-      } else if (retries_remaining <= 0 || h_scale < nonlinear_options.pcg_min_h_scale) {
-        converged = false;
-        break;
-      }
-
-      refreshBlockOperators(X);
-
-      r_block = r;
-      const double norm_block = norm;
-      bool block_finished = false;
-
-      while (!block_finished) {
-        x_trial = X;
-        r = r_block;
-        norm = norm_block;
-
-        double block_predicted = 0.0;
-        double block_actual = 0.0;
-        double block_delta_ref = current_delta_ref();
-        double block_trust_size = h_scale * (block_delta_ref > 0.0 ? block_delta_ref : 1.0);
-        double trial_cumulative_work = cumulative_work;
-        int trial_steps = 0;
-        bool trial_failed = false;
-        bool trial_ended_after_inner_failure = false;
-        std::vector<double> trial_step_norms;
-        auto trial_work_history = work_history;
-
-        for (int block_it = 0; block_it < nonlinear_options.pcg_block_len && it + trial_steps < max_iter; ++block_it) {
-          force = r;
-          force *= -1.0;
-          precond(force, z);
-          ++num_trial_steps;
-
-          const double rho = Dot(force, z);
-          if (!mfem::IsFinite(rho) || rho <= 0.0) {
-            trial_ended_after_inner_failure = trial_steps > 0;
-            trial_failed = trial_steps == 0;
-            break;
-          }
-
-          double beta = 0.0;
-          if (have_momentum) {
-            const double force_dot_z_old = Dot(force, z_old);
-            beta = std::max(0.0, (rho - force_dot_z_old) / rho_old);
-            if (std::abs(force_dot_z_old) > nonlinear_options.pcg_powell_eta * rho) {
-              beta = 0.0;
-              ++num_powell_restarts;
-            }
-          }
-
-          p = z;
-          if (have_momentum && beta != 0.0) {
-            p.Add(beta, p_old);
-          }
-
-          double force_dot_p = Dot(force, p);
-          if (force_dot_p <= nonlinear_options.pcg_eps_descent * rho) {
-            beta = 0.0;
-            p = z;
-            force_dot_p = rho;
-            ++num_descent_restarts;
-          }
-          if (beta == 0.0) {
-            ++num_zero_beta;
-          } else {
-            ++num_nonzero_beta;
-          }
-
-          hessVec(X, p, Hp);
-          const double pHp = Dot(p, Hp);
-
-          double alpha = 0.0;
-          double alpha_quad = std::numeric_limits<double>::quiet_NaN();
-          const bool positive_curvature = pHp > 0.0 && mfem::IsFinite(pHp);
-          if (positive_curvature) {
-            alpha_quad = force_dot_p / pHp;
-            alpha = alpha_quad;
-          } else {
-            ++num_negative_curvature;
-          }
-
-          const double p_norm = Norm(p);
-          double delta_ref = current_delta_ref();
-          if (delta_ref <= 0.0 && alpha > 0.0 && mfem::IsFinite(alpha) && p_norm > 0.0) {
-            delta_ref = alpha * p_norm;
-          } else if (delta_ref <= 0.0) {
-            delta_ref = 1.0;
-          }
-          block_delta_ref = delta_ref;
-          block_trust_size = h_scale * delta_ref;
-
-          const bool apply_trust_cap = !positive_curvature || h_scale < nonlinear_options.pcg_h_scale_init;
-          bool trust_capped = false;
-          if (apply_trust_cap && p_norm > 0.0) {
-            const double alpha_cap = h_scale * delta_ref / p_norm;
-            if (alpha > 0.0 && mfem::IsFinite(alpha)) {
-              if (alpha_cap < alpha) {
-                ++num_trust_capped_steps;
-                trust_capped = true;
-              }
-              alpha = std::min(alpha, alpha_cap);
-            } else {
-              alpha = alpha_cap;
-              trust_capped = true;
-            }
-          }
-
-          if (!(alpha > 0.0) || !mfem::IsFinite(alpha)) {
-            trial_ended_after_inner_failure = trial_steps > 0;
-            trial_failed = trial_steps == 0;
-            break;
-          }
-
-          bool accepted_step = false;
-          double accepted_work = 0.0;
-          double accepted_predicted = 0.0;
-          double accepted_step_norm = 0.0;
-          int accepted_ls_count = 0;
-
-          for (int ls = 0; ls <= nonlinear_options.pcg_ls_max_backtracks; ++ls) {
-            step = p;
-            step *= alpha;
-            add(x_trial, step, x_candidate);
-
-            const double norm_candidate = computeResidual(x_candidate, r_candidate);
-            const double work = -0.5 * Dot(r, step) - 0.5 * Dot(r_candidate, step);
-            const double cumulative_candidate = trial_cumulative_work + work;
-            const double work_ref = window_max(trial_work_history);
-            const bool finite_candidate = mfem::IsFinite(norm_candidate) && mfem::IsFinite(work);
-            const bool sufficient_work =
-                cumulative_candidate >= work_ref - nonlinear_options.pcg_ls_armijo_c * alpha * force_dot_p;
-
-            if (finite_candidate && (sufficient_work || norm_candidate <= norm_goal)) {
-              const double predicted = alpha * force_dot_p - 0.5 * alpha * alpha * pHp;
-              accepted_predicted = std::max(predicted, 0.0);
-              accepted_work = work;
-              accepted_step_norm = Norm(step);
-              accepted_ls_count = ls;
-              norm = norm_candidate;
-              accepted_step = true;
-              break;
-            }
-
-            alpha *= nonlinear_options.pcg_ls_shrink;
-          }
-
-          if (!accepted_step) {
-            trial_ended_after_inner_failure = trial_steps > 0;
-            trial_failed = trial_steps == 0;
-            break;
-          }
-
-          x_trial = x_candidate;
-          r = r_candidate;
-          trial_cumulative_work += accepted_work;
-          append_bounded(trial_work_history, trial_cumulative_work, nonlinear_options.pcg_window);
-          append_bounded(trial_step_norms, accepted_step_norm, nonlinear_options.pcg_delta_avg_window);
-          block_predicted += accepted_predicted;
-          block_actual += accepted_work;
-          num_line_search_backtracks += static_cast<size_t>(accepted_ls_count);
-
-          if (print_level >= 2) {
-            mfem::out << "  PcgBlock step " << std::setw(3) << (it + trial_steps + 1) << " : alpha = " << std::setw(13)
-                      << alpha << ", approx work = " << std::setw(13) << accepted_predicted
-                      << ", achieved work = " << std::setw(13) << accepted_work << ", trust size = " << std::setw(13)
-                      << block_trust_size << ", capped = " << trust_capped << ", ls = " << accepted_ls_count << '\n';
-          }
-
-          p_old = p;
-          z_old = z;
-          rho_old = rho;
-          have_momentum = true;
-          ++trial_steps;
-          ++num_accepted_steps;
-
-          if (norm <= norm_goal && it + trial_steps >= nonlinear_options.min_iterations) {
-            break;
-          }
-        }
-
-        double trust_ratio = 1.0;
-        if (block_predicted > nonlinear_options.pcg_eps_descent) {
-          trust_ratio = block_actual / block_predicted;
-        } else if (block_actual < 0.0) {
-          trust_ratio = -std::numeric_limits<double>::infinity();
-        }
-
-        const bool block_converged = norm <= norm_goal && it + trial_steps >= nonlinear_options.min_iterations;
-        const bool accept_block =
-            trial_steps > 0 && !trial_failed &&
-            (block_converged || (block_actual >= 0.0 && trust_ratio >= nonlinear_options.pcg_trust_eta_bad));
-
-        const double old_h_scale = h_scale;
-        const bool prefix_accept = accept_block && trial_ended_after_inner_failure;
-        bool reset_next_momentum = false;
-        if (accept_block) {
-          if (prefix_accept) {
-            ++num_prefix_accepts;
-          }
-          X = x_trial;
-          cumulative_work = trial_cumulative_work;
-          work_history = std::move(trial_work_history);
-          accepted_step_norms.insert(accepted_step_norms.end(), trial_step_norms.begin(), trial_step_norms.end());
-          if (accepted_step_norms.size() > static_cast<size_t>(nonlinear_options.pcg_delta_avg_window)) {
-            accepted_step_norms.erase(accepted_step_norms.begin(),
-                                      accepted_step_norms.end() - nonlinear_options.pcg_delta_avg_window);
-          }
-          it += trial_steps;
-          ++num_blocks;
-
-          if (trust_ratio < nonlinear_options.pcg_trust_eta_bad) {
-            h_scale = std::max(h_scale * nonlinear_options.pcg_shrink, nonlinear_options.pcg_min_h_scale);
-            reset_momentum();
-            reset_next_momentum = true;
-          } else if (trial_ended_after_inner_failure) {
-            reset_momentum();
-            reset_next_momentum = true;
-          } else if (trust_ratio >= nonlinear_options.pcg_trust_eta_good) {
-            h_scale = std::min(h_scale * nonlinear_options.pcg_growth, nonlinear_options.pcg_h_scale_init);
-          }
-          const double next_trust_size = h_scale * block_delta_ref;
-
-          if (print_level >= 2) {
-            mfem::out << "PcgBlock block accepted: steps = " << std::setw(3) << trial_steps
-                      << ", prefix = " << prefix_accept << ", approx work = " << std::setw(13) << block_predicted
-                      << ", achieved work = " << std::setw(13) << block_actual << ", rho = " << std::setw(13)
-                      << trust_ratio << ", h_scale = " << std::setw(13) << old_h_scale << " -> " << std::setw(13)
-                      << h_scale << ", trust size = " << std::setw(13) << block_trust_size << " -> " << std::setw(13)
-                      << next_trust_size << ", reset momentum = " << reset_next_momentum << '\n';
-          }
-          last_trust_ratio = trust_ratio;
-
-          block_finished = true;
-        } else {
-          r = r_block;
-          norm = norm_block;
-          h_scale *= nonlinear_options.pcg_shrink;
-          reset_momentum();
-          --retries_remaining;
-          ++num_block_rejects;
-          const double next_trust_size = h_scale * block_delta_ref;
-
-          if (print_level >= 2) {
-            mfem::out << "PcgBlock block rejected: steps = " << std::setw(3) << trial_steps
-                      << ", approx work = " << std::setw(13) << block_predicted << ", achieved work = " << std::setw(13)
-                      << block_actual << ", rho = " << std::setw(13) << trust_ratio << ", h_scale = " << std::setw(13)
-                      << old_h_scale << " -> " << std::setw(13) << h_scale << ", trust size = " << std::setw(13)
-                      << block_trust_size << " -> " << std::setw(13) << next_trust_size << ", reset momentum = 1"
-                      << ", retries left = " << retries_remaining << '\n';
-          }
-
-          if (retries_remaining <= 0 || h_scale < nonlinear_options.pcg_min_h_scale) {
-            block_finished = true;
-          } else {
-            refreshBlockOperators(X);
-          }
-        }
-      }
-    }
-
-    final_iter = it;
-    final_norm = norm;
-    final_h_scale = h_scale;
-
-    if (print_level == 1) {
-      mfem::out << "PcgBlock iteration " << std::setw(3) << final_iter << " : ||r|| = " << std::setw(13) << norm
-                << '\n';
-    }
-    if (!converged && print_level >= 1) {
-      mfem::out << "PcgBlock: No convergence!\n";
-    }
-  }
-};
 
 EquationSolver::EquationSolver(NonlinearSolverOptions nonlinear_opts, LinearSolverOptions lin_opts, MPI_Comm comm)
 {
@@ -2512,27 +1553,6 @@ void EquationSolver::setOperator(const mfem::Operator& op)
   }
 }
 
-void EquationSolver::setMatrixFreeTangentAction(MatrixFreeTangentAction tangent_action)
-{
-  auto* pcg_block = dynamic_cast<PcgBlockSolver*>(nonlin_solver_.get());
-  if (pcg_block) {
-    pcg_block->setMatrixFreeTangentAction(std::move(tangent_action));
-  }
-}
-
-void EquationSolver::setJacobianOperator(JacobianOperatorFactory jacobian_operator)
-{
-  auto* pcg_block = dynamic_cast<PcgBlockSolver*>(nonlin_solver_.get());
-  if (pcg_block) {
-    pcg_block->setJacobianOperator(std::move(jacobian_operator));
-    return;
-  }
-  auto* trust_region = dynamic_cast<TrustRegion*>(nonlin_solver_.get());
-  if (trust_region) {
-    trust_region->setJacobianOperator(std::move(jacobian_operator));
-  }
-}
-
 void EquationSolver::solve(mfem::Vector& x) const
 {
   mfem::Vector zero(x);
@@ -2542,15 +1562,6 @@ void EquationSolver::solve(mfem::Vector& x) const
   nonlin_solver_->Mult(zero, x);
 }
 
-std::optional<PcgBlockDiagnostics> EquationSolver::pcgBlockDiagnostics() const
-{
-  auto* pcg_block = dynamic_cast<const PcgBlockSolver*>(nonlin_solver_.get());
-  if (!pcg_block) {
-    return std::nullopt;
-  }
-  return pcg_block->diagnostics();
-}
-
 std::optional<TrustRegionDiagnostics> EquationSolver::trustRegionDiagnostics() const
 {
   auto* trust_region = dynamic_cast<const TrustRegion*>(nonlin_solver_.get());
@@ -2671,8 +1682,6 @@ std::unique_ptr<mfem::NewtonSolver> buildNonlinearSolver(NonlinearSolverOptions
     nonlinear_solver = std::make_unique<NewtonSolver>(comm, nonlinear_opts);
   } else if (nonlinear_opts.nonlin_solver == NonlinearSolver::TrustRegion) {
     nonlinear_solver = std::make_unique<TrustRegion>(comm, nonlinear_opts, linear_opts, prec);
-  } else if (nonlinear_opts.nonlin_solver == NonlinearSolver::PcgBlock) {
-    nonlinear_solver = std::make_unique<PcgBlockSolver>(comm, nonlinear_opts, prec);
 #ifdef SMITH_USE_PETSC
   } else if (nonlinear_opts.nonlin_solver == NonlinearSolver::PetscNewton) {
     nonlinear_solver = std::make_unique<mfem_ext::PetscNewtonSolver>(comm, nonlinear_opts);
@@ -2931,7 +1940,7 @@ void EquationSolver::defineInputFileSchema(axom::inlet::Container& container)
   nonlinear_container.addInt("max_iter", "Maximum iterations for the Newton solve.").defaultValue(500);
   nonlinear_container.addInt("print_level", "Nonlinear print level.").defaultValue(0);
   nonlinear_container
-      .addString("solver_type", "Solver type (Newton|NewtonLineSearch|TrustRegion|PcgBlock|KINFullStep|KINLineSearch)")
+      .addString("solver_type", "Solver type (Newton|NewtonLineSearch|TrustRegion|KINFullStep|KINLineSearch)")
       .defaultValue("Newton");
 }
 
@@ -3011,8 +2020,6 @@ smith::NonlinearSolverOptions FromInlet<smith::NonlinearSolverOptions>::operator
     options.nonlin_solver = smith::NonlinearSolver::NewtonLineSearch;
   } else if (solver_type == "TrustRegion") {
     options.nonlin_solver = smith::NonlinearSolver::TrustRegion;
-  } else if (solver_type == "PcgBlock") {
-    options.nonlin_solver = smith::NonlinearSolver::PcgBlock;
   } else if (solver_type == "KINFullStep") {
     options.nonlin_solver = smith::NonlinearSolver::KINFullStep;
   } else if (solver_type == "KINLineSearch") {
diff --git a/src/smith/numerics/equation_solver.hpp b/src/smith/numerics/equation_solver.hpp
index 6100fad73f..3ddf35edef 100644
--- a/src/smith/numerics/equation_solver.hpp
+++ b/src/smith/numerics/equation_solver.hpp
@@ -13,10 +13,8 @@
 #pragma once
 
 #include <cstddef>
-#include <functional>
 #include <memory>
 #include <optional>
-#include <type_traits>
 #include <variant>
 #include <utility>
 
@@ -30,152 +28,6 @@
 
 namespace smith {
 
-/**
- * @brief Solver-facing interface for Jacobian operations.
- *
- * A JacobianOperator represents the operations available on J(x) after differentiating a residual but before
- * necessarily assembling a sparse matrix. Concrete implementations may support matrix-free products, sparse assembly,
- * diagonal extraction, or all of them. Unsupported operations should throw.
- */
-class JacobianOperator : public mfem::Operator {
- public:
-  using mfem::Operator::Operator;
-
-  /// Assemble the sparse Jacobian representation.
-  virtual std::unique_ptr<mfem::HypreParMatrix> assemble()
-  {
-    SLIC_ERROR("This JacobianOperator does not support sparse assembly.");
-    return nullptr;
-  }
-
-  /// Assemble the scalar true-dof diagonal of the Jacobian.
-  virtual void assembleDiagonal(mfem::Vector&) const
-  {
-    SLIC_ERROR("This JacobianOperator does not support diagonal assembly.");
-  }
-};
-
-/**
- * @brief Adapter from a smith::functional Gradient object to the solver-facing JacobianOperator interface.
- */
-template <typename Gradient>
-class FunctionalJacobianOperator : public JacobianOperator {
-  using GradientT = std::remove_reference_t<Gradient>;
-
- public:
-  explicit FunctionalJacobianOperator(GradientT& gradient)
-      : JacobianOperator(gradient.Height(), gradient.Width()), gradient_(&gradient)
-  {
-  }
-
-  explicit FunctionalJacobianOperator(GradientT&& gradient)
-      : JacobianOperator(gradient.Height(), gradient.Width()),
-        owned_gradient_(std::make_unique<GradientT>(std::move(gradient))),
-        gradient_(owned_gradient_.get())
-  {
-  }
-
-  void Mult(const mfem::Vector& dx, mfem::Vector& y) const override { gradient_->Mult(dx, y); }
-
-  void AddMult(const mfem::Vector& dx, mfem::Vector& y, const double a = 1.0) const override
-  {
-    gradient_->AddMult(dx, y, a);
-  }
-
-  std::unique_ptr<mfem::HypreParMatrix> assemble() override { return gradient_->assemble(); }
-
-  void assembleDiagonal(mfem::Vector& diag) const override { gradient_->assembleDiagonal(diag); }
-
- private:
-  std::unique_ptr<GradientT> owned_gradient_;
-  GradientT* gradient_;
-};
-
-/**
- * @brief Matrix-free tangent action callback.
- *
- * The callback evaluates y = J(x) dx for the current nonlinear state x
- * without requiring EquationSolver to assemble J.
- */
-using MatrixFreeTangentAction = std::function<void(const mfem::Vector& x, const mfem::Vector& dx, mfem::Vector& y)>;
-
-/**
- * @brief Callback that evaluates and returns a JacobianOperator at the supplied nonlinear state.
- */
-using JacobianOperatorFactory = std::function<std::unique_ptr<JacobianOperator>(const mfem::Vector& x)>;
-
-/// Diagnostic counters for the nonlinear PCG-block solver
-struct PcgBlockDiagnostics {
-  /// Number of nonlinear residual evaluations
-  size_t num_residuals = 0;
-  /// Number of assembled Jacobian-vector products
-  size_t num_hess_vecs = 0;
-  /// Number of preconditioner applications
-  size_t num_preconds = 0;
-  /// Number of assembled Jacobians
-  size_t num_jacobian_assembles = 0;
-  /// Number of solver-facing JacobianOperator evaluations
-  size_t num_jacobian_operator_evals = 0;
-  /// Number of direct diagonal assemblies
-  size_t num_diagonal_assembles = 0;
-  /// Number of preconditioner operator updates
-  size_t num_preconditioner_updates = 0;
-  /// Number of accepted prefix blocks
-  size_t num_prefix_accepts = 0;
-  /// Number of momentum resets
-  size_t num_momentum_resets = 0;
-  /// Number of steps with nonzero PCG beta
-  size_t num_nonzero_beta = 0;
-  /// Number of steps with zero PCG beta
-  size_t num_zero_beta = 0;
-  /// Number of accepted blocks
-  size_t num_blocks = 0;
-  /// Number of rejected blocks
-  size_t num_block_rejects = 0;
-  /// Number of Powell restarts
-  size_t num_powell_restarts = 0;
-  /// Number of descent-guard restarts
-  size_t num_descent_restarts = 0;
-  /// Number of non-positive curvature directions
-  size_t num_negative_curvature = 0;
-  /// Number of line-search backtracks
-  size_t num_line_search_backtracks = 0;
-  /// Number of positive-curvature steps capped by the trust radius
-  size_t num_trust_capped_steps = 0;
-  /// Number of accepted inner PCG steps
-  size_t num_accepted_steps = 0;
-  /// Number of trial inner PCG steps
-  size_t num_trial_steps = 0;
-  /// Time spent evaluating nonlinear residuals
-  double residual_seconds = 0.0;
-  /// Time spent applying Jacobian-vector products
-  double hess_vec_seconds = 0.0;
-  /// Time spent applying JacobianOperator products
-  double jacobian_operator_hess_vec_seconds = 0.0;
-  /// Time spent applying assembled Jacobian products
-  double assembled_hess_vec_seconds = 0.0;
-  /// Time spent applying legacy matrix-free tangent products
-  double matrix_free_hess_vec_seconds = 0.0;
-  /// Time spent applying preconditioners
-  double preconditioner_seconds = 0.0;
-  /// Time spent evaluating JacobianOperator factories
-  double jacobian_operator_eval_seconds = 0.0;
-  /// Time spent assembling sparse Jacobians
-  double jacobian_assembly_seconds = 0.0;
-  /// Time spent directly assembling diagonals
-  double diagonal_assembly_seconds = 0.0;
-  /// Time spent inverting direct diagonals
-  double diagonal_invert_seconds = 0.0;
-  /// Time spent refreshing preconditioner data
-  double preconditioner_update_seconds = 0.0;
-  /// Time spent in preconditioner SetOperator calls
-  double preconditioner_setup_seconds = 0.0;
-  /// Last trust scale used by the solver
-  double final_h_scale = 1.0;
-  /// Last accepted block trust ratio
-  double last_trust_ratio = 0.0;
-};
-
 /// Diagnostic counters for the TrustRegion nonlinear solver
 struct TrustRegionDiagnostics {
   /// Number of nonlinear residual evaluations
@@ -192,10 +44,6 @@ struct TrustRegionDiagnostics {
   size_t num_preconds = 0;
   /// Number of assembled Jacobians
   size_t num_jacobian_assembles = 0;
-  /// Number of solver-facing JacobianOperator evaluations
-  size_t num_jacobian_operator_evals = 0;
-  /// Number of direct diagonal assemblies
-  size_t num_diagonal_assembles = 0;
   /// Number of trust-region model CG iterations
   size_t num_cg_iterations = 0;
   /// Number of subspace solves
@@ -216,18 +64,8 @@ struct TrustRegionDiagnostics {
   size_t num_subspace_solve_start_hess_vecs = 0;
   /// Number of quadratic subspace backend solves
   size_t num_quadratic_subspace_solves = 0;
-  /// Number of cubic subspace backend attempts
-  size_t num_cubic_subspace_attempts = 0;
-  /// Number of cubic subspace attempts that used the cubic candidate
-  size_t num_cubic_subspace_uses = 0;
-  /// Number of cubic subspace attempts that fell back to the quadratic candidate
-  size_t num_cubic_subspace_quadratic_fallbacks = 0;
   /// Number of preconditioner operator updates
   size_t num_preconditioner_updates = 0;
-  /// Number of nonmonotone accepted TrustRegion steps based on work surrogate
-  size_t num_nonmonotone_work_accepts = 0;
-  /// Number of accepted TrustRegion work-surrogate steps that monotone acceptance would have rejected
-  size_t num_monotone_work_would_reject = 0;
   /// Time spent evaluating nonlinear residuals
   double residual_seconds = 0.0;
   /// Time spent applying Jacobian-vector products
@@ -238,14 +76,6 @@ struct TrustRegionDiagnostics {
   double cauchy_hess_vec_seconds = 0.0;
   /// Time spent applying Hessian-vector products in line-search model checks
   double line_search_hess_vec_seconds = 0.0;
-  /// Time spent applying JacobianOperator products
-  double jacobian_operator_hess_vec_seconds = 0.0;
-  /// Time spent evaluating JacobianOperator factories
-  double jacobian_operator_eval_seconds = 0.0;
-  /// Time spent directly assembling diagonals
-  double diagonal_assembly_seconds = 0.0;
-  /// Time spent inverting direct diagonals
-  double diagonal_invert_seconds = 0.0;
   /// Time spent applying preconditioners
   double preconditioner_seconds = 0.0;
   /// Total time spent in the nonlinear solve
@@ -326,10 +156,6 @@ struct TrustRegionDiagnostics {
   double preconditioner_update_seconds = 0.0;
   /// Time spent in preconditioner SetOperator calls
   double preconditioner_setup_seconds = 0.0;
-  /// Last TrustRegion accumulated work-surrogate level used by nonmonotone acceptance
-  double last_work_objective = 0.0;
-  /// Last nonmonotone reference work-surrogate level
-  double last_nonmonotone_work_reference = 0.0;
 };
 
 /**
@@ -382,27 +208,6 @@ class EquationSolver {
    */
   void setOperator(const mfem::Operator& op);
 
-  /**
-   * @brief Sets an optional matrix-free tangent action for nonlinear solvers that can use J(x) dx directly.
-   *
-   * Solvers that do not support matrix-free tangent actions ignore this callback. Supported solvers retain their
-   * assembled-gradient fallback when no callback is set.
-   *
-   * @param[in] tangent_action Callback evaluating y = J(x) dx.
-   */
-  void setMatrixFreeTangentAction(MatrixFreeTangentAction tangent_action);
-
-  /**
-   * @brief Sets an optional JacobianOperator factory for nonlinear solvers that can use matrix-free Jacobian products.
-   *
-   * This is the preferred replacement for the narrower matrix-free tangent-action callback. During migration,
-   * PCG-block uses this callback first when it is registered and otherwise falls back to MatrixFreeTangentAction or
-   * assembled gradients.
-   *
-   * @param[in] jacobian_operator Callback evaluating and returning J(x).
-   */
-  void setJacobianOperator(JacobianOperatorFactory jacobian_operator);
-
   /**
    * Solves the system F(x) = 0
    * @param[in,out] x Solution to the system of nonlinear equations
@@ -421,12 +226,6 @@ class EquationSolver {
    */
   const mfem::NewtonSolver& nonlinearSolver() const { return *nonlin_solver_; }
 
-  /**
-   * Returns diagnostic counters when the nonlinear solver is PcgBlock.
-   * @return Optional PCG-block diagnostics; empty for other nonlinear solvers
-   */
-  std::optional<PcgBlockDiagnostics> pcgBlockDiagnostics() const;
-
   /**
    * Returns diagnostic counters when the nonlinear solver is TrustRegion.
    * @return Optional TrustRegion diagnostics; empty for other nonlinear solvers
diff --git a/src/smith/numerics/functional/functional.hpp b/src/smith/numerics/functional/functional.hpp
index 7e611182b8..dddeadc4d0 100644
--- a/src/smith/numerics/functional/functional.hpp
+++ b/src/smith/numerics/functional/functional.hpp
@@ -849,71 +849,6 @@ class Functional<test(trials...), exec> {
       return max_entries;
     }
 
-    void AssembleDiagonal(mfem::Vector& diag) const override
-    {
-      SLIC_ERROR_ROOT_IF(form_.test_function_space_.family != Family::H1 ||
-                             form_.trial_function_spaces_[which_argument].family != Family::H1,
-                         "Functional gradient diagonal assembly currently supports H1 test/trial spaces only.");
-      SLIC_ERROR_ROOT_IF(test_space_ != trial_space_,
-                         "Functional gradient diagonal assembly currently requires the same test/trial FE space.");
-      SLIC_ERROR_ROOT_IF(form_.output_L_.Size() != form_.input_L_[which_argument].Size(),
-                         "Functional gradient diagonal assembly requires square local operators.");
-
-      mfem::Vector local_diag(form_.output_L_.Size(), form_.mem_type);
-      local_diag = 0.0;
-
-      std::vector<double> K_elem_buffer(max_buffer_size());
-
-      for (auto& integral : form_.integrals_) {
-        // if this integral's derivative isn't identically zero
-        if (integral.functional_to_integral_index_.count(which_argument) > 0) {
-          Domain& dom = integral.domain_;
-
-          uint32_t id = integral.functional_to_integral_index_.at(which_argument);
-          const auto& G_test = dom.get_restriction(form_.test_function_space_);
-          const auto& G_trial = dom.get_restriction(form_.trial_function_spaces_[which_argument]);
-          for (const auto& [geom, calculate_element_matrices_func] : integral.element_gradient_[id]) {
-            const auto& test_restriction = G_test.restrictions.at(geom);
-            const auto& trial_restriction = G_trial.restrictions.at(geom);
-
-            CPUArrayView<double, 3> K_e(K_elem_buffer.data(), test_restriction.num_elements,
-                                        trial_restriction.nodes_per_elem * trial_restriction.components,
-                                        test_restriction.nodes_per_elem * test_restriction.components);
-            detail::zero_out(K_e);
-
-            calculate_element_matrices_func(K_e);
-
-            uint32_t rows_per_elem = uint32_t(test_restriction.nodes_per_elem * test_restriction.components);
-            uint32_t cols_per_elem = uint32_t(trial_restriction.nodes_per_elem * trial_restriction.components);
-
-            std::vector<DoF> test_vdofs(rows_per_elem);
-            std::vector<DoF> trial_vdofs(cols_per_elem);
-
-            for (uint32_t e = 0; e < test_restriction.num_elements; e++) {
-              test_restriction.GetElementVDofs(int(e), test_vdofs);
-              trial_restriction.GetElementVDofs(int(e), trial_vdofs);
-
-              for (uint32_t i = 0; i < cols_per_elem; i++) {
-                int col = int(trial_vdofs[i].index());
-
-                for (uint32_t j = 0; j < rows_per_elem; j++) {
-                  int row = int(test_vdofs[j].index());
-                  if (row == col) {
-                    local_diag(row) += K_e(e, i, j);
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-
-      diag.SetSize(Height(), form_.mem_type);
-      form_.P_test_->MultTranspose(local_diag, diag);
-    }
-
-    void assembleDiagonal(mfem::Vector& diag) const { AssembleDiagonal(diag); }
-
     std::unique_ptr<mfem::HypreParMatrix> assemble()
     {
       if (row_ptr.empty()) {
diff --git a/src/smith/numerics/functional/tests/functional_comparisons.cpp b/src/smith/numerics/functional/tests/functional_comparisons.cpp
index 031bb56ee5..eab6ca2bb4 100644
--- a/src/smith/numerics/functional/tests/functional_comparisons.cpp
+++ b/src/smith/numerics/functional/tests/functional_comparisons.cpp
@@ -187,15 +187,6 @@ void functional_test(mfem::ParMesh& mesh, H1<p> test, H1<p> trial, Dimension<dim
 
   std::unique_ptr<mfem::HypreParMatrix> J_func = assemble(drdU);
 
-  mfem::Vector diag_direct(U.Size());
-  drdU.assembleDiagonal(diag_direct);
-
-  mfem::Vector diag_assembled(U.Size());
-  J_func->GetDiag(diag_assembled);
-
-  mfem::Vector diag_diff(U.Size());
-  subtract(diag_direct, diag_assembled, diag_diff);
-
   // Compute the gradient action using standard MFEM and functional
   // mfem::Vector g1 = (*J_mfem) * U;
   mfem::Vector g1(U.Size());
@@ -221,7 +212,6 @@ void functional_test(mfem::ParMesh& mesh, H1<p> test, H1<p> trial, Dimension<dim
   }
 
   // Ensure the two methods generate the same result
-  EXPECT_NEAR(0.0, diag_diff.Norml2() / diag_assembled.Norml2(), 1.e-14);
   EXPECT_NEAR(0.0, diff1.Norml2() / g1.Norml2(), 1.e-14);
   EXPECT_NEAR(0.0, diff2.Norml2() / g1.Norml2(), 1.e-14);
 }
@@ -313,15 +303,6 @@ void functional_test(mfem::ParMesh& mesh, H1<p, dim> test, H1<p, dim> trial, Dim
 
   std::unique_ptr<mfem::HypreParMatrix> J_func = assemble(drdU);
 
-  mfem::Vector diag_direct(U.Size());
-  drdU.assembleDiagonal(diag_direct);
-
-  mfem::Vector diag_assembled(U.Size());
-  J_func->GetDiag(diag_assembled);
-
-  mfem::Vector diag_diff(U.Size());
-  subtract(diag_direct, diag_assembled, diag_diff);
-
   // mfem::Vector g1 = (*J_mfem) * U;
   mfem::Vector g1(U.Size());
   J_mfem->Mult(U, g1);
@@ -347,7 +328,6 @@ void functional_test(mfem::ParMesh& mesh, H1<p, dim> test, H1<p, dim> trial, Dim
     std::cout << "||g1-g3||/||g1||: " << diff2.Norml2() / g1.Norml2() << std::endl;
   }
 
-  EXPECT_NEAR(0., diag_diff.Norml2() / diag_assembled.Norml2(), 1.e-14);
   EXPECT_NEAR(0., diff1.Norml2() / g1.Norml2(), 1.e-14);
   EXPECT_NEAR(0., diff2.Norml2() / g1.Norml2(), 1.e-14);
 }
@@ -507,67 +487,6 @@ double time_on_slowest_rank(Function&& function)
 
 }  // namespace
 
-TEST(Elasticity, DiagonalAssemblyBenchmark)
-{
-  if (!run_diagonal_benchmark) {
-    GTEST_SKIP() << "Set --run-diagonal-benchmark to time direct diagonal assembly.";
-  }
-
-  static constexpr int dim = 3;
-  using test_space = H1<2, dim>;
-  using trial_space = H1<2, dim>;
-
-  auto [fespace, fec] = smith::generateParFiniteElementSpace<test_space>(mesh3D.get());
-  (void)fec;
-
-  mfem::ParGridFunction u_global(fespace.get());
-  int seed = 9;
-  u_global.Randomize(seed);
-
-  mfem::Vector U(fespace->TrueVSize());
-  u_global.GetTrueDofs(U);
-
-  Functional<test_space(trial_space), exec_space> residual(fespace.get(), {fespace.get()});
-  Domain domain = EntireDomain(*mesh3D);
-  residual.AddDomainIntegral(Dimension<dim>{}, DependsOn<0>{}, StressFunctor<dim>{}, domain);
-
-  auto [r, drdU] = residual(0.0, differentiate_wrt(U));
-
-  mfem::Vector diag_direct(U.Size());
-  mfem::Vector diag_assembled(U.Size());
-  drdU.assembleDiagonal(diag_direct);
-  std::unique_ptr<mfem::HypreParMatrix> J_warmup = assemble(drdU);
-  J_warmup->GetDiag(diag_assembled);
-
-  const int samples = std::max(diagonal_benchmark_samples, 1);
-  double direct_time = time_on_slowest_rank([&]() {
-    for (int sample = 0; sample < samples; sample++) {
-      drdU.assembleDiagonal(diag_direct);
-    }
-  });
-
-  double sparse_time = time_on_slowest_rank([&]() {
-    for (int sample = 0; sample < samples; sample++) {
-      std::unique_ptr<mfem::HypreParMatrix> J = assemble(drdU);
-      J->GetDiag(diag_assembled);
-    }
-  });
-
-  mfem::Vector diag_diff(U.Size());
-  subtract(diag_direct, diag_assembled, diag_diff);
-  EXPECT_NEAR(0.0, diag_diff.Norml2() / diag_assembled.Norml2(), 1.e-14);
-
-  auto [num_ranks, rank] = smith::getMPIInfo();
-  (void)num_ranks;
-  if (rank == 0) {
-    std::cout << "DiagonalAssemblyBenchmark direct_seconds=" << direct_time / samples
-              << " sparse_getdiag_seconds=" << sparse_time / samples << " speedup=" << sparse_time / direct_time
-              << std::endl;
-  }
-
-  EXPECT_GT(sparse_time / direct_time, 5.0);
-}
-
 // TODO: reenable these once hcurl implements of simplex elements is finished
 // TEST(Hcurl, 2DLinear) { functional_test(*mesh2D, Hcurl<1>{}, Hcurl<1>{}, Dimension<2>{}); }
 // TEST(Hcurl, 2DQuadratic) { functional_test(*mesh2D, Hcurl<2>{}, Hcurl<2>{}, Dimension<2>{}); }
diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp
index 454cb81d2d..268f832703 100644
--- a/src/smith/numerics/mfem_trust_region_subspace.cpp
+++ b/src/smith/numerics/mfem_trust_region_subspace.cpp
@@ -93,50 +93,6 @@ std::pair<std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>> re
   return std::make_pair(directions_new, A_directions_new);
 }
 
-std::tuple<std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>>
-removeDependentDirectionTriples(std::vector<const mfem::Vector*> directions,
-                                std::vector<const mfem::Vector*> A_directions,
-                                std::vector<const mfem::Vector*> previous_A_directions)
-{
-  SMITH_MARK_FUNCTION;
-  MFEM_VERIFY(directions.size() == A_directions.size() && directions.size() == previous_A_directions.size(),
-              "Direction triple lists must have matching sizes.");
-
-  std::vector<double> norms;
-  size_t num_dirs = directions.size();
-
-  for (size_t i = 0; i < num_dirs; ++i) {
-    norms.push_back(std::sqrt(mfem::InnerProduct(MPI_COMM_WORLD, *directions[i], *directions[i])));
-  }
-
-  std::vector<std::pair<const mfem::Vector*, size_t>> kepts;
-  for (size_t i = 0; i < num_dirs; ++i) {
-    bool keepi = norms[i] != 0.0;
-    for (auto&& kept_and_j : kepts) {
-      size_t j = kept_and_j.second;
-      double dot_ij = mfem::InnerProduct(MPI_COMM_WORLD, *directions[i], *kept_and_j.first);
-      if (dot_ij > 0.999 * norms[i] * norms[j]) {
-        keepi = false;
-      }
-    }
-    if (keepi) {
-      kepts.emplace_back(std::make_pair(directions[i], i));
-    }
-  }
-
-  std::vector<const mfem::Vector*> directions_new;
-  std::vector<const mfem::Vector*> A_directions_new;
-  std::vector<const mfem::Vector*> previous_A_directions_new;
-
-  for (auto kept_and_j : kepts) {
-    directions_new.push_back(directions[kept_and_j.second]);
-    A_directions_new.push_back(A_directions[kept_and_j.second]);
-    previous_A_directions_new.push_back(previous_A_directions[kept_and_j.second]);
-  }
-
-  return std::make_tuple(directions_new, A_directions_new, previous_A_directions_new);
-}
-
 #ifdef MFEM_USE_LAPACK
 
 TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
@@ -568,7 +524,7 @@ TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vec
                                                const std::vector<const mfem::Vector*>& A_directions,
                                                const mfem::Vector& b, double delta, int num_leftmost)
 {
-#ifdef SMITH_USE_SLEPC
+#if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE)
   return solveSubspaceProblemPetsc(directions, A_directions, b, delta, num_leftmost);
 #else
   throw PetscException("MFEM trust-region subspace solve requires MFEM LAPACK support.");
diff --git a/src/smith/numerics/petsc_trust_region_subspace.cpp b/src/smith/numerics/petsc_trust_region_subspace.cpp
index aac63c7cd1..2368e06899 100644
--- a/src/smith/numerics/petsc_trust_region_subspace.cpp
+++ b/src/smith/numerics/petsc_trust_region_subspace.cpp
@@ -6,7 +6,7 @@
 
 #include "smith/numerics/trust_region_solver.hpp"
 
-#ifdef SMITH_USE_SLEPC
+#if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE)
 
 #include <iostream>
 
diff --git a/src/smith/numerics/solver_config.hpp b/src/smith/numerics/solver_config.hpp
index 27635aeda3..6cfdc53014 100644
--- a/src/smith/numerics/solver_config.hpp
+++ b/src/smith/numerics/solver_config.hpp
@@ -152,7 +152,6 @@ enum class NonlinearSolver
   LBFGS,                     /**< MFEM-native Limited memory BFGS */
   NewtonLineSearch,          /**< Custom solver using preconditioned earch direction with backtracking line search */
   TrustRegion,               /**< Custom solver using a trust region solver */
-  PcgBlock,                  /**< Custom nonlinear preconditioned conjugate-gradient block solver */
   KINFullStep,               /**< KINSOL Full Newton (Sundials must be enabled) */
   KINBacktrackingLineSearch, /**< KINSOL Newton with Backtracking Line Search (Sundials must be enabled) */
   KINPicard,                 /**< KINSOL Picard (Sundials must be enabled) */
@@ -175,8 +174,6 @@ inline std::string nonlinearName(const NonlinearSolver& s)
       return "NewtonLineSearch";
     case NonlinearSolver::TrustRegion:
       return "TrustRegion";
-    case NonlinearSolver::PcgBlock:
-      return "PcgBlock";
     case NonlinearSolver::KINFullStep:
       return "KINFullStep";
     case NonlinearSolver::KINBacktrackingLineSearch:
@@ -205,7 +202,6 @@ inline std::map<std::string, NonlinearSolver> nonlinearSolverMap = {
     {"LBFGS", NonlinearSolver::LBFGS},
     {"NewtonLineSearch", NonlinearSolver::NewtonLineSearch},
     {"TrustRegion", NonlinearSolver::TrustRegion},
-    {"PcgBlock", NonlinearSolver::PcgBlock},
     {"KINFullStep", NonlinearSolver::KINFullStep},
     {"KINBacktrackingLineSearch", NonlinearSolver::KINBacktrackingLineSearch},
     {"KINPicard", NonlinearSolver::KINPicard},
@@ -465,15 +461,6 @@ struct NonlinearSolverOptions {
   /// Scaling for the initial trust region size
   double trust_region_scaling = 0.1;
 
-  /// Nonmonotone TrustRegion acceptance window. Zero preserves monotone acceptance.
-  int trust_nonmonotone_window = 0;
-
-  /// Use JacobianOperator products and diagonal preconditioning in TrustRegion instead of assembled sparse products.
-  bool trust_use_jacobian_operator = false;
-
-  /// Use a dense cubic subspace model built from retained Hessian-vector changes.
-  bool trust_use_cubic_subspace = false;
-
   /// Option for how when the subspace solver should be utilized within trust-region solver
   SubSpaceOptions subspace_option = SubSpaceOptions::NEVER;
 
@@ -491,57 +478,6 @@ struct NonlinearSolverOptions {
 
   /// Should the gradient be converted to a monolithic matrix
   bool force_monolithic = false;
-
-  /// Number of speculative nonlinear PCG steps per accepted/rejected block
-  int pcg_block_len = 10;
-
-  /// Powell restart threshold for nonlinear PCG residual orthogonality
-  double pcg_powell_eta = 0.005;
-
-  /// Trust-ratio threshold below which the PCG-block trust scale shrinks
-  double pcg_trust_eta_bad = 0.1;
-
-  /// Trust-ratio threshold above which the PCG-block trust scale grows
-  double pcg_trust_eta_good = 0.75;
-
-  /// PCG-block trust-scale shrink factor
-  double pcg_shrink = 0.5;
-
-  /// PCG-block trust-scale growth factor
-  double pcg_growth = 1.25;
-
-  /// Initial PCG-block trust scale
-  double pcg_h_scale_init = 1.0;
-
-  /// Minimum PCG-block trust scale before declaring failure
-  double pcg_min_h_scale = 1e-8;
-
-  /// Maximum number of rejected PCG blocks before declaring failure
-  int pcg_max_block_retries = 20;
-
-  /// Nonmonotone cumulative gradient-work acceptance window
-  int pcg_window = 5;
-
-  /// Armijo coefficient for PCG-block inner step backtracking
-  double pcg_ls_armijo_c = 1e-4;
-
-  /// Maximum number of PCG-block inner step backtracks
-  int pcg_ls_max_backtracks = 8;
-
-  /// PCG-block inner step backtracking shrink factor
-  double pcg_ls_shrink = 0.5;
-
-  /// Descent and model denominator tolerance for PCG-block guards
-  double pcg_eps_descent = 1e-12;
-
-  /// Running-mean window for successful PCG-block trust-radius reference steps
-  int pcg_delta_avg_window = 5;
-
-  /// Use a direct scalar diagonal extracted from the JacobianOperator as the PCG-block preconditioner
-  bool pcg_use_jacobian_diagonal_preconditioner = false;
-
-  /// Relative floor used when inverting the absolute Jacobian diagonal for PCG-block diagonal preconditioning
-  double pcg_diagonal_floor = 1e-14;
 };
 // _nonlinear_options_end
 
diff --git a/src/smith/numerics/tests/test_equationsolver.cpp b/src/smith/numerics/tests/test_equationsolver.cpp
index a534acd8f1..edab4fd012 100644
--- a/src/smith/numerics/tests/test_equationsolver.cpp
+++ b/src/smith/numerics/tests/test_equationsolver.cpp
@@ -124,206 +124,6 @@ TEST_P(EquationSolverSuite, All)
   }
 }
 
-TEST(EquationSolver, PcgBlockUsesMatrixFreeTangentAction)
-{
-  auto mesh = mfem::Mesh::MakeCartesian2D(1, 1, mfem::Element::QUADRILATERAL);
-  auto pmesh = mfem::ParMesh(MPI_COMM_WORLD, mesh);
-
-  pmesh.EnsureNodes();
-  pmesh.ExchangeFaceNbrData();
-
-  constexpr int p = 1;
-  constexpr int dim = 2;
-  using test_space = H1<p>;
-  using trial_space = H1<p>;
-
-  auto [fes, fec] = smith::generateParFiniteElementSpace<test_space>(&pmesh);
-  (void)fec;
-
-  mfem::HypreParVector x_exact(fes.get());
-  mfem::HypreParVector x_computed(fes.get());
-  x_exact.Randomize(0);
-  x_computed = 0.0;
-
-  std::unique_ptr<mfem::HypreParMatrix> J;
-
-  Functional<test_space(trial_space)> residual(fes.get(), {fes.get()});
-  Domain domain = EntireDomain(pmesh);
-  residual.AddDomainIntegral(
-      Dimension<dim>{}, DependsOn<0>{},
-      [](double /*t*/, auto, auto scalar) {
-        auto [u, du_dx] = scalar;
-        return smith::tuple{u, du_dx};
-      },
-      domain);
-
-  StdFunctionOperator residual_opr(
-      fes->TrueVSize(),
-      [&x_exact, &residual](const mfem::Vector& x, mfem::Vector& r) {
-        constexpr double time = 0.0;
-        r = residual(time, x);
-        r -= residual(time, x_exact);
-      },
-      [&residual, &J](const mfem::Vector& x) -> mfem::Operator& {
-        constexpr double time = 0.0;
-        auto [val, grad] = residual(time, differentiate_wrt(x));
-        J = assemble(grad);
-        return *J;
-      });
-
-  const LinearSolverOptions lin_opts = {.linear_solver = LinearSolver::CG,
-                                        .preconditioner = Preconditioner::HypreJacobi,
-                                        .relative_tol = 1.0e-12,
-                                        .absolute_tol = 1.0e-14,
-                                        .max_iterations = 500,
-                                        .print_level = 0};
-
-  const NonlinearSolverOptions nonlin_opts = {.nonlin_solver = NonlinearSolver::PcgBlock,
-                                              .relative_tol = 1.0e-12,
-                                              .absolute_tol = 1.0e-14,
-                                              .max_iterations = 500,
-                                              .print_level = 0};
-
-  EquationSolver eq_solver(nonlin_opts, lin_opts);
-  eq_solver.setOperator(residual_opr);
-
-  int num_tangent_actions = 0;
-  eq_solver.setMatrixFreeTangentAction(
-      [&residual, &num_tangent_actions](const mfem::Vector& x, const mfem::Vector& dx, mfem::Vector& y) {
-        constexpr double time = 0.0;
-        auto [val, grad] = residual(time, differentiate_wrt(x));
-        grad.Mult(dx, y);
-        ++num_tangent_actions;
-      });
-
-  eq_solver.solve(x_computed);
-
-  const auto diagnostics = eq_solver.pcgBlockDiagnostics();
-  ASSERT_TRUE(diagnostics.has_value());
-  EXPECT_GT(num_tangent_actions, 0);
-  EXPECT_EQ(diagnostics->num_hess_vecs, static_cast<size_t>(num_tangent_actions));
-  EXPECT_TRUE(eq_solver.nonlinearSolver().GetConverged());
-  EXPECT_LT(x_computed.DistanceTo(x_exact.GetData()), 1.0e-10);
-}
-
-TEST(EquationSolver, PcgBlockUsesJacobianOperator)
-{
-  class MatrixJacobianOperator : public JacobianOperator {
-   public:
-    explicit MatrixJacobianOperator(std::unique_ptr<mfem::HypreParMatrix> matrix)
-        : JacobianOperator(matrix->Height(), matrix->Width()), matrix_(std::move(matrix))
-    {
-    }
-
-    void Mult(const mfem::Vector& dx, mfem::Vector& y) const override { matrix_->Mult(dx, y); }
-
-    std::unique_ptr<mfem::HypreParMatrix> assemble() override
-    {
-      return std::make_unique<mfem::HypreParMatrix>(*matrix_);
-    }
-
-    void assembleDiagonal(mfem::Vector& diag) const override { matrix_->GetDiag(diag); }
-
-   private:
-    std::unique_ptr<mfem::HypreParMatrix> matrix_;
-  };
-
-  auto mesh = mfem::Mesh::MakeCartesian2D(1, 1, mfem::Element::QUADRILATERAL);
-  auto pmesh = mfem::ParMesh(MPI_COMM_WORLD, mesh);
-
-  pmesh.EnsureNodes();
-  pmesh.ExchangeFaceNbrData();
-
-  constexpr int p = 1;
-  constexpr int dim = 2;
-  using test_space = H1<p>;
-  using trial_space = H1<p>;
-
-  auto [fes, fec] = smith::generateParFiniteElementSpace<test_space>(&pmesh);
-  (void)fec;
-
-  mfem::HypreParVector x_exact(fes.get());
-  mfem::HypreParVector x_computed(fes.get());
-  x_exact.Randomize(0);
-  x_computed = 0.0;
-
-  std::unique_ptr<mfem::HypreParMatrix> J;
-
-  Functional<test_space(trial_space)> residual(fes.get(), {fes.get()});
-  Domain domain = EntireDomain(pmesh);
-  residual.AddDomainIntegral(
-      Dimension<dim>{}, DependsOn<0>{},
-      [](double /*t*/, auto, auto scalar) {
-        auto [u, du_dx] = scalar;
-        return smith::tuple{u, du_dx};
-      },
-      domain);
-
-  {
-    constexpr double time = 0.0;
-    auto [val, grad] = residual(time, differentiate_wrt(x_exact));
-    FunctionalJacobianOperator<decltype(grad)> jacobian_operator(grad);
-
-    mfem::Vector dx(x_exact.Size());
-    mfem::Vector y_grad(x_exact.Size());
-    mfem::Vector y_operator(x_exact.Size());
-    dx.Randomize(1);
-    grad.Mult(dx, y_grad);
-    jacobian_operator.Mult(dx, y_operator);
-
-    EXPECT_LT(y_operator.DistanceTo(y_grad.GetData()), 1.0e-14);
-  }
-
-  StdFunctionOperator residual_opr(
-      fes->TrueVSize(),
-      [&x_exact, &residual](const mfem::Vector& x, mfem::Vector& r) {
-        constexpr double time = 0.0;
-        r = residual(time, x);
-        r -= residual(time, x_exact);
-      },
-      [&residual, &J](const mfem::Vector& x) -> mfem::Operator& {
-        constexpr double time = 0.0;
-        auto [val, grad] = residual(time, differentiate_wrt(x));
-        J = assemble(grad);
-        return *J;
-      });
-
-  const LinearSolverOptions lin_opts = {.linear_solver = LinearSolver::CG,
-                                        .preconditioner = Preconditioner::HypreJacobi,
-                                        .relative_tol = 1.0e-12,
-                                        .absolute_tol = 1.0e-14,
-                                        .max_iterations = 500,
-                                        .print_level = 0};
-
-  const NonlinearSolverOptions nonlin_opts = {.nonlin_solver = NonlinearSolver::PcgBlock,
-                                              .relative_tol = 1.0e-12,
-                                              .absolute_tol = 1.0e-14,
-                                              .max_iterations = 500,
-                                              .print_level = 0};
-
-  EquationSolver eq_solver(nonlin_opts, lin_opts);
-  eq_solver.setOperator(residual_opr);
-
-  int num_operator_evals = 0;
-  eq_solver.setJacobianOperator([&residual, &num_operator_evals](const mfem::Vector& x) {
-    constexpr double time = 0.0;
-    auto [val, grad] = residual(time, differentiate_wrt(x));
-    ++num_operator_evals;
-    return std::make_unique<MatrixJacobianOperator>(assemble(grad));
-  });
-
-  eq_solver.solve(x_computed);
-
-  const auto diagnostics = eq_solver.pcgBlockDiagnostics();
-  ASSERT_TRUE(diagnostics.has_value());
-  EXPECT_GT(num_operator_evals, 0);
-  EXPECT_EQ(diagnostics->num_jacobian_operator_evals, static_cast<size_t>(num_operator_evals));
-  EXPECT_GE(diagnostics->num_hess_vecs, diagnostics->num_jacobian_operator_evals);
-  EXPECT_EQ(diagnostics->num_diagonal_assembles, 0u);
-  EXPECT_TRUE(eq_solver.nonlinearSolver().GetConverged());
-  EXPECT_LT(x_computed.DistanceTo(x_exact.GetData()), 1.0e-10);
-}
-
 /**
  * @brief Nonlinear solvers to test. Always includes NonlinearSolver::Newton and NonlinearSolver::LBFGS
  * If SMITH_USE_SUNDIALS is set, adds: NonlinearSolver::KINFullStep, NonlinearSolver::KINBacktrackingLineSearch, and
diff --git a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
index 6e52393681..62c7730205 100644
--- a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
+++ b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
@@ -112,53 +112,7 @@ TEST(TrustRegionSubspaceMfem, RemoveDependentDirectionsDropsDuplicatesAndZero)
   expectNearVector(*hdirs_new[0], hd1, 0.0);
 }
 
-TEST(TrustRegionSubspaceMfem, RemoveDependentDirectionTriplesKeepsHistoryAligned)
-{
-  mfem::Vector d1(3);
-  mfem::Vector d2(3);
-  mfem::Vector d3(3);
-  mfem::Vector hd1(3);
-  mfem::Vector hd2(3);
-  mfem::Vector hd3(3);
-  mfem::Vector old_hd1(3);
-  mfem::Vector old_hd2(3);
-  mfem::Vector old_hd3(3);
-
-  d1 = 0.0;
-  d2 = 0.0;
-  d3 = 0.0;
-  hd1 = 0.0;
-  hd2 = 0.0;
-  hd3 = 0.0;
-  old_hd1 = 0.0;
-  old_hd2 = 0.0;
-  old_hd3 = 0.0;
 
-  d1[0] = 1.0;
-  d2 = d1;
-  d2 *= 2.0;
-  d3[2] = 1.0;
-  hd1[0] = 3.0;
-  hd2[0] = 6.0;
-  hd3[2] = 4.0;
-  old_hd1[0] = 2.0;
-  old_hd2[0] = 4.0;
-  old_hd3[2] = 5.0;
-
-  std::vector<const mfem::Vector*> dirs = {&d1, &d2, &d3};
-  std::vector<const mfem::Vector*> hdirs = {&hd1, &hd2, &hd3};
-  std::vector<const mfem::Vector*> old_hdirs = {&old_hd1, &old_hd2, &old_hd3};
-
-  auto [dirs_new, hdirs_new, old_hdirs_new] = smith::removeDependentDirectionTriples(dirs, hdirs, old_hdirs);
-
-  ASSERT_EQ(dirs_new.size(), 2);
-  expectNearVector(*dirs_new[0], d1, 0.0);
-  expectNearVector(*hdirs_new[0], hd1, 0.0);
-  expectNearVector(*old_hdirs_new[0], old_hd1, 0.0);
-  expectNearVector(*dirs_new[1], d3, 0.0);
-  expectNearVector(*hdirs_new[1], hd3, 0.0);
-  expectNearVector(*old_hdirs_new[1], old_hd3, 0.0);
-}
 
 TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary)
 {
@@ -236,261 +190,6 @@ TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection)
   EXPECT_LT(energy, 0.0);
 }
 
-TEST(TrustRegionCubicSubspaceMfem, ZeroCubicMatchesInteriorQuadraticSolve)
-{
-  mfem::DenseMatrix A(2);
-  A = 0.0;
-  A(0, 0) = 4.0;
-  A(1, 1) = 2.0;
-
-  mfem::Vector b(2);
-  b[0] = 2.0;
-  b[1] = -1.0;
-
-  std::vector<mfem::DenseMatrix> cubic(2, mfem::DenseMatrix(2));
-  for (auto& matrix : cubic) {
-    matrix = 0.0;
-  }
-
-  auto [x, energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 10.0);
-
-  EXPECT_NEAR(x[0], 0.5, 1.0e-10);
-  EXPECT_NEAR(x[1], -0.5, 1.0e-10);
-  EXPECT_NEAR(energy, -0.75, 1.0e-10);
-}
-
-TEST(TrustRegionCubicSubspaceMfem, CubicTermChangesOneDimensionalMinimizer)
-{
-  mfem::DenseMatrix A(1);
-  A(0, 0) = 1.0;
-
-  mfem::Vector b(1);
-  b[0] = 1.0;
-
-  std::vector<mfem::DenseMatrix> cubic(1, mfem::DenseMatrix(1));
-  cubic[0](0, 0) = 6.0;
-
-  auto [x, energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 1.0);
-
-  const double expected = (-1.0 + std::sqrt(13.0)) / 6.0;
-  EXPECT_NEAR(x[0], expected, 2.0e-3);
-  EXPECT_NEAR(energy, 0.5 * expected * expected - expected + expected * expected * expected, 5.0e-6);
-}
-
-TEST(TrustRegionCubicSubspaceMfem, RespectsTrustRegionBoundary)
-{
-  mfem::DenseMatrix A(1);
-  A(0, 0) = 1.0;
-
-  mfem::Vector b(1);
-  b[0] = 10.0;
-
-  std::vector<mfem::DenseMatrix> cubic(1, mfem::DenseMatrix(1));
-  cubic[0] = 0.0;
-
-  auto [x, energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 0.25);
-
-  EXPECT_NEAR(x.Norml2(), 0.25, 1.0e-12);
-  EXPECT_NEAR(x[0], 0.25, 1.0e-12);
-  EXPECT_NEAR(energy, 0.5 * 0.25 * 0.25 - 10.0 * 0.25, 1.0e-12);
-}
-
-TEST(TrustRegionCubicSubspaceMfem, HistoryProjectedSubspaceSolveRuns)
-{
-  mfem::Vector e1(2);
-  mfem::Vector e2(2);
-  e1 = 0.0;
-  e2 = 0.0;
-  e1[0] = 1.0;
-  e2[1] = 1.0;
-
-  mfem::Vector h1(2);
-  mfem::Vector h2(2);
-  mfem::Vector old_h1(2);
-  mfem::Vector old_h2(2);
-  h1 = 0.0;
-  h2 = 0.0;
-  old_h1 = 0.0;
-  old_h2 = 0.0;
-  h1[0] = 2.0;
-  h2[1] = 3.0;
-  old_h1[0] = 1.0;
-  old_h2[1] = 3.0;
-
-  mfem::Vector previous_step(2);
-  previous_step = 0.0;
-  previous_step[0] = 1.0;
-
-  mfem::Vector b(2);
-  b[0] = 1.0;
-  b[1] = 0.25;
-
-  std::vector<const mfem::Vector*> directions = {&e1, &e2};
-  std::vector<const mfem::Vector*> h_directions = {&h1, &h2};
-  std::vector<const mfem::Vector*> old_h_directions = {&old_h1, &old_h2};
-
-  auto [x, leftvecs, leftvals, energy] =
-      smith::solveCubicSubspaceProblemMfem(directions, h_directions, old_h_directions, previous_step, b, 0.5, 1);
-
-  EXPECT_LE(x.Norml2(), 0.5 + 1.0e-12);
-  EXPECT_FALSE(leftvecs.empty());
-  EXPECT_EQ(leftvals.size(), 1);
-  EXPECT_LT(energy, 0.0);
-}
-
-TEST(TrustRegionCubicSubspaceMfem, FallsBackToQuadraticWhenCubicPredictionDoesNotImprove)
-{
-  mfem::Vector e1(1);
-  mfem::Vector h1(1);
-  mfem::Vector old_h1(1);
-  mfem::Vector previous_step(1);
-  mfem::Vector b(1);
-
-  e1[0] = 1.0;
-  h1[0] = 1.0;
-  old_h1[0] = 1.0;
-  previous_step[0] = 1.0;
-  b[0] = 1.0;
-
-  std::vector<const mfem::Vector*> directions = {&e1};
-  std::vector<const mfem::Vector*> h_directions = {&h1};
-  std::vector<const mfem::Vector*> old_h_directions = {&old_h1};
-
-  auto [cubic_x, cubic_leftvecs, cubic_leftvals, cubic_energy] =
-      smith::solveCubicSubspaceProblemMfem(directions, h_directions, old_h_directions, previous_step, b, 1.0, 1);
-  auto [quadratic_x, quadratic_leftvecs, quadratic_leftvals, quadratic_energy] =
-      smith::solveSubspaceProblemMfem(directions, h_directions, b, 1.0, 1);
-
-  expectNearVector(cubic_x, quadratic_x, 1.0e-12);
-  EXPECT_EQ(cubic_leftvecs.size(), quadratic_leftvecs.size());
-  EXPECT_EQ(cubic_leftvals.size(), quadratic_leftvals.size());
-  EXPECT_NEAR(cubic_energy, quadratic_energy, 1.0e-12);
-}
-
-TEST(TrustRegionCubicSubspaceMfem, PreviousStepSecantIsExactForCompatibleCubic)
-{
-  mfem::Vector e1(2);
-  mfem::Vector e2(2);
-  e1 = 0.0;
-  e2 = 0.0;
-  e1[0] = 1.0;
-  e2[1] = 1.0;
-
-  mfem::Vector h1(2);
-  mfem::Vector h2(2);
-  mfem::Vector old_h1(2);
-  mfem::Vector old_h2(2);
-  h1 = 0.0;
-  h2 = 0.0;
-  old_h1 = 0.0;
-  old_h2 = 0.0;
-  h1[0] = 1.0;
-  h2[1] = 1.0;
-  old_h1[0] = 7.0;
-  old_h2[1] = 1.0;
-
-  mfem::Vector previous_step(2);
-  previous_step = 0.0;
-  previous_step[0] = 1.0;
-
-  mfem::Vector b(2);
-  b = 0.0;
-  b[0] = 0.1;
-
-  std::vector<const mfem::Vector*> directions = {&e1, &e2};
-  std::vector<const mfem::Vector*> h_directions = {&h1, &h2};
-  std::vector<const mfem::Vector*> old_h_directions = {&old_h1, &old_h2};
-
-  bool used_cubic = false;
-  auto [x, leftvecs, leftvals, energy] =
-      smith::solveCubicSubspaceProblemMfem(directions, h_directions, old_h_directions, previous_step, b, 1.0, 1,
-                                           &used_cubic);
-
-  mfem::DenseMatrix A(2);
-  A = 0.0;
-  A(0, 0) = 1.0;
-  A(1, 1) = 1.0;
-  std::vector<mfem::DenseMatrix> cubic(2, mfem::DenseMatrix(2));
-  cubic[0] = 0.0;
-  cubic[1] = 0.0;
-  cubic[0](0, 0) = -6.0;
-  auto [expected_x, expected_energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 1.0);
-
-  EXPECT_TRUE(used_cubic);
-  expectNearVector(x, expected_x, 1.0e-12);
-  EXPECT_NEAR(energy, expected_energy, 1.0e-12);
-  EXPECT_FALSE(leftvecs.empty());
-  EXPECT_EQ(leftvals.size(), 1);
-}
-
-TEST(TrustRegionCubicSubspaceMfem, PreviousStepSecantIsExactForRotatedCompatibleCubic)
-{
-  mfem::Vector e1(2);
-  mfem::Vector e2(2);
-  e1 = 0.0;
-  e2 = 0.0;
-  e1[0] = 1.0;
-  e2[1] = 1.0;
-
-  constexpr double lambda = -6.0;
-  mfem::Vector previous_step(2);
-  previous_step[0] = 1.0;
-  previous_step[1] = 1.0;
-  mfem::Vector u(previous_step);
-  u /= u.Norml2();
-
-  mfem::DenseMatrix delta_h(2);
-  delta_h = 0.0;
-  for (int i = 0; i < 2; ++i) {
-    for (int j = 0; j < 2; ++j) {
-      delta_h(i, j) = lambda * previous_step.Norml2() * u[i] * u[j];
-    }
-  }
-
-  mfem::Vector h1(e1);
-  mfem::Vector h2(e2);
-  mfem::Vector old_h1(e1);
-  mfem::Vector old_h2(e2);
-  for (int i = 0; i < 2; ++i) {
-    old_h1[i] -= delta_h(i, 0);
-    old_h2[i] -= delta_h(i, 1);
-  }
-
-  mfem::Vector b(2);
-  b[0] = 0.1 * u[0];
-  b[1] = 0.1 * u[1];
-
-  std::vector<const mfem::Vector*> directions = {&e1, &e2};
-  std::vector<const mfem::Vector*> h_directions = {&h1, &h2};
-  std::vector<const mfem::Vector*> old_h_directions = {&old_h1, &old_h2};
-
-  bool used_cubic = false;
-  auto [x, leftvecs, leftvals, energy] =
-      smith::solveCubicSubspaceProblemMfem(directions, h_directions, old_h_directions, previous_step, b, 1.0, 1,
-                                           &used_cubic);
-
-  mfem::DenseMatrix A(2);
-  A = 0.0;
-  A(0, 0) = 1.0;
-  A(1, 1) = 1.0;
-  std::vector<mfem::DenseMatrix> cubic(2, mfem::DenseMatrix(2));
-  cubic[0] = 0.0;
-  cubic[1] = 0.0;
-  for (int k = 0; k < 2; ++k) {
-    for (int i = 0; i < 2; ++i) {
-      for (int j = 0; j < 2; ++j) {
-        cubic[size_t(k)](i, j) = lambda * u[k] * u[i] * u[j];
-      }
-    }
-  }
-  auto [expected_x, expected_energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 1.0);
-
-  EXPECT_TRUE(used_cubic);
-  expectNearVector(x, expected_x, 1.0e-12);
-  EXPECT_NEAR(energy, expected_energy, 1.0e-12);
-  EXPECT_FALSE(leftvecs.empty());
-  EXPECT_EQ(leftvals.size(), 1);
-}
 
 int main(int argc, char* argv[])
 {
diff --git a/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp b/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp
index 1e3eae5433..55c7a16f77 100644
--- a/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp
+++ b/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp
@@ -23,6 +23,8 @@
 #include "smith/physics/state/finite_element_vector.hpp"
 #include "smith/numerics/petsc_solvers.hpp"
 
+#ifdef SMITH_TRUST_REGION_USE_PETSC_SUBSPACE
+
 const std::string MESHTAG = "mesh";
 
 static constexpr int scalar_field_order = 1;
@@ -213,6 +215,8 @@ TEST_F(MeshFixture, MfemSubspaceSolveMatchesPetsc)
   MatDestroy(&A_parallel);
 }
 
+#endif  // SMITH_TRUST_REGION_USE_PETSC_SUBSPACE
+
 int main(int argc, char* argv[])
 {
   ::testing::InitGoogleTest(&argc, argv);
diff --git a/src/smith/numerics/trust_region_cubic_subspace.cpp b/src/smith/numerics/trust_region_cubic_subspace.cpp
deleted file mode 100644
index 2bbc86b16c..0000000000
--- a/src/smith/numerics/trust_region_cubic_subspace.cpp
+++ /dev/null
@@ -1,461 +0,0 @@
-// Copyright (c) Lawrence Livermore National Security, LLC and
-// other Smith Project Developers. See the top-level LICENSE file for
-// details.
-//
-// SPDX-License-Identifier: (BSD-3-Clause)
-
-#include "smith/numerics/trust_region_solver.hpp"
-
-#include <cmath>
-
-#include "smith/infrastructure/profiling.hpp"
-
-namespace smith {
-
-#ifdef MFEM_USE_LAPACK
-
-namespace {
-
-double dot(const mfem::Vector& a, const mfem::Vector& b)
-{
-  return a * b;
-}
-
-void symmetrize(mfem::DenseMatrix& A)
-{
-  MFEM_VERIFY(A.Height() == A.Width(), "symmetrize requires square matrix.");
-  for (int i = 0; i < A.Height(); ++i) {
-    for (int j = 0; j < i; ++j) {
-      const double value = 0.5 * (A(i, j) + A(j, i));
-      A(i, j) = value;
-      A(j, i) = value;
-    }
-  }
-}
-
-mfem::Vector matrixColumn(const mfem::DenseMatrix& A, int j)
-{
-  mfem::Vector col(A.Height());
-  for (int i = 0; i < A.Height(); ++i) {
-    col[i] = A(i, j);
-  }
-  return col;
-}
-
-mfem::DenseMatrix columnsToMatrix(const std::vector<mfem::Vector>& cols)
-{
-  mfem::DenseMatrix A(cols.empty() ? 0 : cols[0].Size(), static_cast<int>(cols.size()));
-  for (int j = 0; j < A.Width(); ++j) {
-    for (int i = 0; i < A.Height(); ++i) {
-      A(i, j) = cols[size_t(j)][i];
-    }
-  }
-  return A;
-}
-
-mfem::DenseMatrix denseDot(const std::vector<const mfem::Vector*>& s, const std::vector<const mfem::Vector*>& As)
-{
-  MFEM_VERIFY(s.size() == As.size(), "Dense dot requires matching direction counts.");
-  mfem::DenseMatrix result(static_cast<int>(s.size()));
-  for (int i = 0; i < result.Height(); ++i) {
-    for (int j = 0; j < result.Width(); ++j) {
-      result(i, j) = innerProduct(*s[size_t(i)], *As[size_t(j)], MPI_COMM_WORLD);
-    }
-  }
-  return result;
-}
-
-mfem::Vector denseDot(const std::vector<const mfem::Vector*>& s, const mfem::Vector& b)
-{
-  mfem::Vector result(static_cast<int>(s.size()));
-  for (int i = 0; i < result.Size(); ++i) {
-    result[i] = innerProduct(*s[size_t(i)], b, MPI_COMM_WORLD);
-  }
-  return result;
-}
-
-mfem::DenseMatrix orthonormalBasisTransform(const mfem::DenseMatrix& gram)
-{
-  mfem::DenseMatrix gram_copy(gram);
-  mfem::Vector evals;
-  mfem::DenseMatrix evecs;
-  gram_copy.Eigensystem(evals, evecs);
-
-  double trace_mag = 0.0;
-  for (int i = 0; i < evals.Size(); ++i) {
-    trace_mag += std::abs(evals[i]);
-  }
-
-  std::vector<mfem::Vector> kept_columns;
-  for (int i = 0; i < evals.Size(); ++i) {
-    if (evals[i] > 1e-9 * trace_mag) {
-      mfem::Vector col = matrixColumn(evecs, i);
-      col /= std::sqrt(evals[i]);
-      kept_columns.emplace_back(std::move(col));
-    }
-  }
-
-  return columnsToMatrix(kept_columns);
-}
-
-mfem::DenseMatrix tripleProduct(const mfem::DenseMatrix& L, const mfem::DenseMatrix& A, const mfem::DenseMatrix& R)
-{
-  mfem::DenseMatrix tmp(A.Height(), R.Width());
-  mfem::Mult(A, R, tmp);
-  mfem::DenseMatrix out(L.Width(), R.Width());
-  mfem::MultAtB(L, tmp, out);
-  return out;
-}
-
-mfem::Vector projectWithTranspose(const mfem::DenseMatrix& A, const mfem::Vector& x)
-{
-  mfem::Vector out(A.Width());
-  A.MultTranspose(x, out);
-  return out;
-}
-
-mfem::DenseMatrix orthonormalBasisWithFirstVector(const mfem::Vector& first)
-{
-  const int n = first.Size();
-  mfem::DenseMatrix Q(n);
-  Q = 0.0;
-
-  mfem::Vector q0(first);
-  q0 /= q0.Norml2();
-  for (int i = 0; i < n; ++i) {
-    Q(i, 0) = q0[i];
-  }
-
-  int col = 1;
-  for (int seed = 0; seed < n && col < n; ++seed) {
-    mfem::Vector candidate(n);
-    candidate = 0.0;
-    candidate[seed] = 1.0;
-    for (int j = 0; j < col; ++j) {
-      const mfem::Vector qj = matrixColumn(Q, j);
-      candidate.Add(-dot(candidate, qj), qj);
-    }
-    const double norm = candidate.Norml2();
-    if (norm > 1.0e-12) {
-      candidate /= norm;
-      for (int i = 0; i < n; ++i) {
-        Q(i, col) = candidate[i];
-      }
-      ++col;
-    }
-  }
-
-  MFEM_VERIFY(col == n, "Failed to build orthonormal basis for cubic tensor completion.");
-  return Q;
-}
-
-std::vector<mfem::DenseMatrix> completeSymmetricCubicTensor(const mfem::DenseMatrix& deltaA,
-                                                            const mfem::Vector& previous_step)
-{
-  const int n = previous_step.Size();
-  const double step_norm = previous_step.Norml2();
-  MFEM_VERIFY(step_norm > 0.0, "Cannot complete cubic tensor with zero previous step.");
-
-  const mfem::DenseMatrix Q = orthonormalBasisWithFirstVector(previous_step);
-  mfem::DenseMatrix delta_hat = tripleProduct(Q, deltaA, Q);
-  symmetrize(delta_hat);
-
-  std::vector<mfem::DenseMatrix> tensor_hat(static_cast<size_t>(n), mfem::DenseMatrix(n));
-  for (auto& matrix : tensor_hat) {
-    matrix = 0.0;
-  }
-
-  for (int i = 0; i < n; ++i) {
-    for (int j = 0; j < n; ++j) {
-      const double value = delta_hat(i, j) / step_norm;
-      tensor_hat[0](i, j) = value;
-      tensor_hat[size_t(i)](0, j) = value;
-      tensor_hat[size_t(i)](j, 0) = value;
-    }
-  }
-
-  std::vector<mfem::DenseMatrix> tensor(static_cast<size_t>(n), mfem::DenseMatrix(n));
-  for (auto& matrix : tensor) {
-    matrix = 0.0;
-  }
-
-  for (int a = 0; a < n; ++a) {
-    for (int b = 0; b < n; ++b) {
-      for (int c = 0; c < n; ++c) {
-        double value = 0.0;
-        for (int alpha = 0; alpha < n; ++alpha) {
-          for (int beta = 0; beta < n; ++beta) {
-            for (int gamma = 0; gamma < n; ++gamma) {
-              value += Q(a, alpha) * Q(b, beta) * Q(c, gamma) * tensor_hat[size_t(alpha)](beta, gamma);
-            }
-          }
-        }
-        tensor[size_t(a)](b, c) = value;
-      }
-    }
-  }
-
-  return tensor;
-}
-
-mfem::Vector combineDirections(const std::vector<const mfem::Vector*>& states, const mfem::Vector& coeffs)
-{
-  mfem::Vector out(*states[0]);
-  out = 0.0;
-  for (int i = 0; i < coeffs.Size(); ++i) {
-    out.Add(coeffs[i], *states[size_t(i)]);
-  }
-  return out;
-}
-
-void verifyCubicInputs(const mfem::DenseMatrix& A, const mfem::Vector& b, const std::vector<mfem::DenseMatrix>& cubic,
-                       double delta)
-{
-  MFEM_VERIFY(A.Height() == A.Width(), "Dense cubic trust-region matrix must be square.");
-  MFEM_VERIFY(A.Height() == b.Size(), "Dense cubic trust-region linear term has incompatible size.");
-  MFEM_VERIFY(delta >= 0.0, "Dense cubic trust-region radius must be nonnegative.");
-  MFEM_VERIFY(static_cast<int>(cubic.size()) == b.Size(), "Dense cubic tensor must have one matrix per dimension.");
-  for (const auto& matrix : cubic) {
-    MFEM_VERIFY(matrix.Height() == b.Size() && matrix.Width() == b.Size(),
-                "Dense cubic tensor matrix has incompatible size.");
-  }
-}
-
-double cubicEnergy(const mfem::DenseMatrix& A, const mfem::Vector& b, const std::vector<mfem::DenseMatrix>& cubic,
-                   const mfem::Vector& x)
-{
-  mfem::Vector Ax(x.Size());
-  A.Mult(x, Ax);
-  double energy = 0.5 * dot(x, Ax) - dot(x, b);
-  for (int k = 0; k < x.Size(); ++k) {
-    cubic[size_t(k)].Mult(x, Ax);
-    energy += (x[k] * dot(x, Ax)) / 6.0;
-  }
-  return energy;
-}
-
-mfem::Vector cubicGradient(const mfem::DenseMatrix& A, const mfem::Vector& b,
-                           const std::vector<mfem::DenseMatrix>& cubic, const mfem::Vector& x)
-{
-  mfem::Vector grad(x.Size());
-  A.Mult(x, grad);
-  grad -= b;
-
-  mfem::Vector tmp(x.Size());
-  for (int i = 0; i < x.Size(); ++i) {
-    double correction = 0.0;
-    cubic[size_t(i)].Mult(x, tmp);
-    correction += dot(x, tmp);
-    for (int k = 0; k < x.Size(); ++k) {
-      for (int j = 0; j < x.Size(); ++j) {
-        correction += x[k] * (cubic[size_t(k)](i, j) + cubic[size_t(k)](j, i)) * x[j];
-      }
-    }
-    grad[i] += correction / 6.0;
-  }
-
-  return grad;
-}
-
-void projectToBall(mfem::Vector& x, double delta)
-{
-  const double norm = x.Norml2();
-  if (norm > delta && norm > 0.0) {
-    x *= delta / norm;
-  }
-}
-
-mfem::Vector solveQuadraticCandidate(mfem::DenseMatrix A, const mfem::Vector& b, double delta)
-{
-  const int n = b.Size();
-  mfem::DenseMatrix shifted(A);
-  double trace = 0.0;
-  for (int i = 0; i < n; ++i) {
-    trace += std::abs(A(i, i));
-  }
-  const double regularization = std::max(1.0e-14, 1.0e-12 * trace / std::max(n, 1));
-  for (int i = 0; i < n; ++i) {
-    shifted(i, i) += regularization;
-  }
-
-  mfem::DenseMatrixInverse inv(shifted);
-  mfem::Vector x(n);
-  inv.Mult(b, x);
-  projectToBall(x, delta);
-  return x;
-}
-
-mfem::Vector projectedGradientSolve(const mfem::DenseMatrix& A, const mfem::Vector& b,
-                                    const std::vector<mfem::DenseMatrix>& cubic, mfem::Vector x, double delta)
-{
-  double energy = cubicEnergy(A, b, cubic, x);
-  constexpr int max_iters = 200;
-  constexpr double grad_tol = 1.0e-11;
-
-  for (int iter = 0; iter < max_iters; ++iter) {
-    mfem::Vector grad = cubicGradient(A, b, cubic, x);
-    if (grad.Norml2() <= grad_tol * std::max(1.0, b.Norml2())) {
-      break;
-    }
-
-    double step = 0.25;
-    bool accepted = false;
-    for (int ls = 0; ls < 30; ++ls) {
-      mfem::Vector trial(x);
-      trial.Add(-step, grad);
-      projectToBall(trial, delta);
-      const double trial_energy = cubicEnergy(A, b, cubic, trial);
-      if (trial_energy < energy - 1.0e-14) {
-        x = trial;
-        energy = trial_energy;
-        accepted = true;
-        break;
-      }
-      step *= 0.5;
-    }
-    if (!accepted) {
-      break;
-    }
-  }
-
-  return x;
-}
-
-}  // namespace
-
-DenseCubicTrustRegionResult solveDenseCubicTrustRegionProblemMfem(const mfem::DenseMatrix& A, const mfem::Vector& b,
-                                                                  const std::vector<mfem::DenseMatrix>& cubic,
-                                                                  double delta)
-{
-  SMITH_MARK_FUNCTION;
-  verifyCubicInputs(A, b, cubic, delta);
-
-  mfem::Vector best(b.Size());
-  best = 0.0;
-  double best_energy = cubicEnergy(A, b, cubic, best);
-  if (delta == 0.0 || b.Size() == 0) {
-    return std::make_tuple(best, best_energy);
-  }
-
-  std::vector<mfem::Vector> starts;
-  starts.emplace_back(best);
-  starts.emplace_back(solveQuadraticCandidate(A, b, delta));
-
-  mfem::Vector direction(b);
-  if (direction.Norml2() > 0.0) {
-    direction *= delta / direction.Norml2();
-    starts.emplace_back(direction);
-    direction *= -1.0;
-    starts.emplace_back(direction);
-  }
-
-  for (int i = 0; i < b.Size(); ++i) {
-    mfem::Vector axis(b.Size());
-    axis = 0.0;
-    axis[i] = delta;
-    starts.emplace_back(axis);
-    axis[i] = -delta;
-    starts.emplace_back(axis);
-  }
-
-  for (const auto& start : starts) {
-    mfem::Vector candidate = projectedGradientSolve(A, b, cubic, start, delta);
-    const double energy = cubicEnergy(A, b, cubic, candidate);
-    if (energy < best_energy) {
-      best = candidate;
-      best_energy = energy;
-    }
-  }
-
-  return std::make_tuple(best, best_energy);
-}
-
-TrustRegionSubspaceResult solveCubicSubspaceProblemMfem(
-    const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
-    const std::vector<const mfem::Vector*>& previous_A_directions, const mfem::Vector& previous_step,
-    const mfem::Vector& b, double delta, int num_leftmost, bool* used_cubic)
-{
-  SMITH_MARK_FUNCTION;
-  MFEM_VERIFY(directions.size() == A_directions.size(), "Cubic subspace directions and A_directions differ.");
-  MFEM_VERIFY(directions.size() == previous_A_directions.size(),
-              "Cubic subspace directions and previous_A_directions differ.");
-  MFEM_VERIFY(!directions.empty(), "Cubic subspace solve requires at least one direction.");
-
-  mfem::DenseMatrix ss = denseDot(directions, directions);
-  symmetrize(ss);
-  mfem::DenseMatrix T = orthonormalBasisTransform(ss);
-  MFEM_VERIFY(T.Width() > 0, "No independent directions in cubic MFEM subspace solve.");
-
-  mfem::DenseMatrix sAs = denseDot(directions, A_directions);
-  symmetrize(sAs);
-  mfem::DenseMatrix pAp = tripleProduct(T, sAs, T);
-  symmetrize(pAp);
-
-  mfem::DenseMatrix sDeltaA = denseDot(directions, previous_A_directions);
-  sDeltaA *= -1.0;
-  sDeltaA += sAs;
-  symmetrize(sDeltaA);
-  mfem::DenseMatrix pDeltaAp = tripleProduct(T, sDeltaA, T);
-  symmetrize(pDeltaAp);
-
-  mfem::Vector previous_coeffs = denseDot(directions, previous_step);
-  previous_coeffs = projectWithTranspose(T, previous_coeffs);
-  const double previous_norm_squared = dot(previous_coeffs, previous_coeffs);
-
-  std::vector<mfem::DenseMatrix> cubic(size_t(T.Width()), mfem::DenseMatrix(T.Width()));
-  for (auto& matrix : cubic) {
-    matrix = 0.0;
-  }
-  if (previous_norm_squared > 0.0) {
-    cubic = completeSymmetricCubicTensor(pDeltaAp, previous_coeffs);
-  }
-
-  const mfem::Vector sb = denseDot(directions, b);
-  const mfem::Vector pb = projectWithTranspose(T, sb);
-  auto [reduced_x, energy] = solveDenseCubicTrustRegionProblemMfem(pAp, pb, cubic, delta);
-
-  mfem::Vector coeffs(T.Height());
-  T.Mult(reduced_x, coeffs);
-  mfem::Vector sol = combineDirections(directions, coeffs);
-
-  auto [quadratic_sol, leftmosts, leftvals, quadratic_energy] =
-      solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost);
-  (void)quadratic_energy;
-
-  const mfem::Vector quadratic_s_coeffs = denseDot(directions, quadratic_sol);
-  const mfem::Vector quadratic_reduced_x = projectWithTranspose(T, quadratic_s_coeffs);
-  const double quadratic_cubic_energy = cubicEnergy(pAp, pb, cubic, quadratic_reduced_x);
-  if (quadratic_cubic_energy <= energy) {
-    if (used_cubic != nullptr) {
-      *used_cubic = false;
-    }
-    return std::make_tuple(quadratic_sol, leftmosts, leftvals, quadratic_cubic_energy);
-  }
-
-  if (used_cubic != nullptr) {
-    *used_cubic = true;
-  }
-  return std::make_tuple(sol, leftmosts, leftvals, energy);
-}
-
-#else
-
-DenseCubicTrustRegionResult solveDenseCubicTrustRegionProblemMfem(const mfem::DenseMatrix&, const mfem::Vector& b,
-                                                                  const std::vector<mfem::DenseMatrix>&, double)
-{
-  throw PetscException("MFEM dense cubic trust-region solve requires MFEM LAPACK support.");
-  return std::make_tuple(b, 0.0);
-}
-
-TrustRegionSubspaceResult solveCubicSubspaceProblemMfem(const std::vector<const mfem::Vector*>&,
-                                                        const std::vector<const mfem::Vector*>&,
-                                                        const std::vector<const mfem::Vector*>&,
-                                                        const mfem::Vector&, const mfem::Vector& b, double, int, bool*)
-{
-  throw PetscException("MFEM dense cubic trust-region solve requires MFEM LAPACK support.");
-  return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>> {}, std::vector<double> {}, 0.0);
-}
-
-#endif  // MFEM_USE_LAPACK
-
-}  // namespace smith
diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp
index f076520f0e..960024b33d 100644
--- a/src/smith/numerics/trust_region_solver.hpp
+++ b/src/smith/numerics/trust_region_solver.hpp
@@ -66,8 +66,6 @@ void resetTrustRegionSubspaceTimings();
 
 TrustRegionSubspaceTimings trustRegionSubspaceTimings();
 
-using DenseCubicTrustRegionResult = std::tuple<mfem::Vector, double>;
-
 /// @brief computes the global size of mfem::Vector
 int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm);
 
@@ -80,7 +78,7 @@ TrustRegionSubspaceResult solveSubspaceProblem(
     const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
     const mfem::Vector& b, double delta, int num_leftmost);
 
-#ifdef SMITH_USE_SLEPC
+#if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE)
 TrustRegionSubspaceResult solveSubspaceProblemPetsc(
     const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
     const mfem::Vector& b, double delta, int num_leftmost);
@@ -90,22 +88,7 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(
     const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
     const mfem::Vector& b, double delta, int num_leftmost);
 
-/// @brief solves a small dense cubic trust-region model
-///   1/2 x^T A x - b^T x + 1/6 sum_k x_k x^T cubic[k] x, ||x|| <= delta.
-DenseCubicTrustRegionResult solveDenseCubicTrustRegionProblemMfem(
-    const mfem::DenseMatrix& A, const mfem::Vector& b, const std::vector<mfem::DenseMatrix>& cubic, double delta);
-
-TrustRegionSubspaceResult solveCubicSubspaceProblemMfem(
-    const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
-    const std::vector<const mfem::Vector*>& previous_A_directions, const mfem::Vector& previous_step,
-    const mfem::Vector& b, double delta, int num_leftmost, bool* used_cubic = nullptr);
-
 std::pair<std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>> removeDependentDirections(
     std::vector<const mfem::Vector*> directions, std::vector<const mfem::Vector*> A_directions);
 
-std::tuple<std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>>
-removeDependentDirectionTriples(std::vector<const mfem::Vector*> directions,
-                                std::vector<const mfem::Vector*> A_directions,
-                                std::vector<const mfem::Vector*> previous_A_directions);
-
 }  // namespace smith
diff --git a/src/smith/physics/dfem_weak_form.hpp b/src/smith/physics/dfem_weak_form.hpp
index f55598039c..83a55d6ddd 100644
--- a/src/smith/physics/dfem_weak_form.hpp
+++ b/src/smith/physics/dfem_weak_form.hpp
@@ -213,18 +213,6 @@ class DfemWeakForm : public WeakForm {
     return std::make_unique<mfem::HypreParMatrix>();
   }
 
-  /// @overload
-  std::unique_ptr<JacobianOperator> jacobianOperator(
-      TimeInfo time_info, ConstFieldPtr /*shape_disp*/, const std::vector<ConstFieldPtr>& /*fields*/,
-      size_t /*input_col*/, const std::vector<ConstQuadratureFieldPtr>& /*quad_fields*/ = {}) const override
-  {
-    SLIC_ERROR_ROOT("DfemWeakForm does not support JacobianOperator construction");
-    dt_ = time_info.dt();
-    cycle_ = time_info.cycle();
-
-    return nullptr;
-  }
-
   /// @overload
   void jvp(TimeInfo time_info, ConstFieldPtr /*shape_disp*/, const std::vector<ConstFieldPtr>& /*fields*/,
            const std::vector<ConstQuadratureFieldPtr>& /*quad_fields*/, ConstFieldPtr /*v_shape_disp*/,
diff --git a/src/smith/physics/functional_weak_form.hpp b/src/smith/physics/functional_weak_form.hpp
index 8e99e71afc..5852a0388a 100644
--- a/src/smith/physics/functional_weak_form.hpp
+++ b/src/smith/physics/functional_weak_form.hpp
@@ -332,26 +332,6 @@ class FunctionalWeakForm<spatial_dim, OutputSpace, Parameters<InputSpaces...>,
     return J;
   }
 
-  /// @overload
-  std::unique_ptr<JacobianOperator> jacobianOperator(
-      TimeInfo time_info, ConstFieldPtr shape_disp, const std::vector<ConstFieldPtr>& fields, size_t input_col,
-      [[maybe_unused]] const std::vector<ConstQuadratureFieldPtr>& quad_fields = {}) const override
-  {
-    SLIC_ERROR_IF(input_col >= fields.size(), "Invalid JacobianOperator input column.");
-
-    dt_ = time_info.dt();
-    cycle_ = time_info.cycle();
-
-    auto jacs = jacobianFunctions(std::make_integer_sequence<int, sizeof...(input_indices)>{}, time_info.time(),
-                                  shape_disp, fields);
-    auto K = smith::get<DERIVATIVE>(jacs[input_col](time_info.time(), shape_disp, fields));
-
-    SLIC_ERROR_IF(K.Height() != K.Width(),
-                  "WeakForm::jacobianOperator currently supports square one-field derivatives only.");
-
-    return std::make_unique<FunctionalJacobianOperator<decltype(K)>>(std::move(K));
-  }
-
   /// @overload
   void jvp(TimeInfo time_info, ConstFieldPtr shape_disp, const std::vector<ConstFieldPtr>& fields,
            [[maybe_unused]] const std::vector<ConstQuadratureFieldPtr>& quad_fields,
diff --git a/src/smith/physics/solid_mechanics.hpp b/src/smith/physics/solid_mechanics.hpp
index 504538d4e6..a2d66cf8d5 100644
--- a/src/smith/physics/solid_mechanics.hpp
+++ b/src/smith/physics/solid_mechanics.hpp
@@ -56,17 +56,6 @@
 
 namespace smith {
 
-struct SolidMechanicsJacobianTimings {
-  size_t legacy_jacobian_evals = 0;
-  size_t jacobian_operator_evals = 0;
-  size_t jacobian_operator_assemblies = 0;
-  double legacy_derivative_seconds = 0.0;
-  double legacy_sparse_assembly_seconds = 0.0;
-  double legacy_essential_elimination_seconds = 0.0;
-  double jacobian_operator_derivative_seconds = 0.0;
-  double jacobian_operator_sparse_assembly_seconds = 0.0;
-  double jacobian_operator_essential_elimination_seconds = 0.0;
-};
 namespace solid_mechanics {
 
 namespace detail {
@@ -1065,126 +1054,16 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
         // gradient of residual function
         [this](const mfem::Vector& u) -> mfem::Operator& {
           SMITH_MARK_FUNCTION;
-          using Clock = std::chrono::steady_clock;
-          auto seconds_since = [](Clock::time_point start) {
-            return std::chrono::duration_cast<std::chrono::duration<double>>(Clock::now() - start).count();
-          };
-          auto derivative_start = Clock::now();
           auto [r, drdu] = (*residual_)(time_, shapeDisplacement(), differentiate_wrt(u), acceleration_,
                                         *parameters_[parameter_indices].state...);
-          jacobian_timings_.legacy_derivative_seconds += seconds_since(derivative_start);
-          ++jacobian_timings_.legacy_jacobian_evals;
           J_.reset();
-          auto assembly_start = Clock::now();
           J_ = assemble(drdu);
-          jacobian_timings_.legacy_sparse_assembly_seconds += seconds_since(assembly_start);
           J_e_.reset();
-          auto elimination_start = Clock::now();
           J_e_ = bcs_.eliminateAllEssentialDofsFromMatrix(*J_);
-          jacobian_timings_.legacy_essential_elimination_seconds += seconds_since(elimination_start);
           return *J_;
         });
   }
 
-  /// @brief Matrix-free action of the quasistatic tangent with essential boundary conditions applied.
-  void quasistaticTangentAction(const mfem::Vector& u, const mfem::Vector& du, mfem::Vector& dr) const
-  {
-    SMITH_MARK_FUNCTION;
-
-    mfem::Vector du_interior(du);
-    du_interior.SetSubVector(bcs_.allEssentialTrueDofs(), 0.0);
-
-    auto [r, drdu] = (*residual_)(time_, shapeDisplacement(), differentiate_wrt(u), acceleration_,
-                                  *parameters_[parameter_indices].state...);
-    drdu.Mult(du_interior, dr);
-
-    const auto& constrained_dofs = bcs_.allEssentialTrueDofs();
-    for (int i = 0; i < constrained_dofs.Size(); ++i) {
-      const int dof = constrained_dofs[i];
-      dr[dof] = du[dof];
-    }
-  }
-
-  /// @brief Build a quasistatic JacobianOperator with essential boundary conditions applied.
-  std::unique_ptr<JacobianOperator> quasistaticJacobianOperator(const mfem::Vector& u) const
-  {
-    SMITH_MARK_FUNCTION;
-
-    using Clock = std::chrono::steady_clock;
-    auto seconds_since = [](Clock::time_point start) {
-      return std::chrono::duration_cast<std::chrono::duration<double>>(Clock::now() - start).count();
-    };
-    auto derivative_start = Clock::now();
-    auto [r, drdu] = (*residual_)(time_, shapeDisplacement(), differentiate_wrt(u), acceleration_,
-                                  *parameters_[parameter_indices].state...);
-    jacobian_timings_.jacobian_operator_derivative_seconds += seconds_since(derivative_start);
-    ++jacobian_timings_.jacobian_operator_evals;
-
-    using GradientT = std::remove_reference_t<decltype(drdu)>;
-
-    class QuasistaticJacobianOperator : public JacobianOperator {
-     public:
-      QuasistaticJacobianOperator(
-          const GradientT& gradient, const mfem::Array<int>& constrained_dofs,
-          std::function<std::unique_ptr<mfem::HypreParMatrix>(mfem::HypreParMatrix&)> eliminate_essential_dofs,
-          SolidMechanicsJacobianTimings& timings)
-          : JacobianOperator(gradient.Height(), gradient.Width()),
-            gradient_(gradient),
-            constrained_dofs_(constrained_dofs),
-            eliminate_essential_dofs_(std::move(eliminate_essential_dofs)),
-            timings_(timings)
-      {
-      }
-
-      void Mult(const mfem::Vector& du, mfem::Vector& dr) const override
-      {
-        mfem::Vector du_interior(du);
-        du_interior.SetSubVector(constrained_dofs_, 0.0);
-
-        gradient_.Mult(du_interior, dr);
-        for (int i = 0; i < constrained_dofs_.Size(); ++i) {
-          const int dof = constrained_dofs_[i];
-          dr[dof] = du[dof];
-        }
-      }
-
-      std::unique_ptr<mfem::HypreParMatrix> assemble() override
-      {
-        using AssemblyClock = std::chrono::steady_clock;
-        auto seconds_since = [](AssemblyClock::time_point start) {
-          return std::chrono::duration_cast<std::chrono::duration<double>>(AssemblyClock::now() - start).count();
-        };
-        auto assembly_start = AssemblyClock::now();
-        std::unique_ptr<mfem::HypreParMatrix> matrix = gradient_.assemble();
-        timings_.jacobian_operator_sparse_assembly_seconds += seconds_since(assembly_start);
-        auto elimination_start = AssemblyClock::now();
-        eliminate_essential_dofs_(*matrix);
-        timings_.jacobian_operator_essential_elimination_seconds += seconds_since(elimination_start);
-        ++timings_.jacobian_operator_assemblies;
-        return matrix;
-      }
-
-      void assembleDiagonal(mfem::Vector& diag) const override
-      {
-        gradient_.assembleDiagonal(diag);
-        for (int i = 0; i < constrained_dofs_.Size(); ++i) {
-          diag[constrained_dofs_[i]] = 1.0;
-        }
-      }
-
-     private:
-      GradientT gradient_;
-      mfem::Array<int> constrained_dofs_;
-      std::function<std::unique_ptr<mfem::HypreParMatrix>(mfem::HypreParMatrix&)> eliminate_essential_dofs_;
-      SolidMechanicsJacobianTimings& timings_;
-    };
-
-    return std::make_unique<QuasistaticJacobianOperator>(
-        drdu, bcs_.allEssentialTrueDofs(),
-        [this](mfem::HypreParMatrix& matrix) { return bcs_.eliminateAllEssentialDofsFromMatrix(matrix); },
-        jacobian_timings_);
-  }
-
   /**
    * @brief Return the assembled stiffness matrix
    *
@@ -1263,11 +1142,6 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
 #endif
 
     nonlin_solver_->setOperator(*residual_with_bcs_);
-    if (is_quasistatic_) {
-      nonlin_solver_->setMatrixFreeTangentAction([this](const mfem::Vector& u, const mfem::Vector& du,
-                                                        mfem::Vector& dr) { quasistaticTangentAction(u, du, dr); });
-      nonlin_solver_->setJacobianOperator([this](const mfem::Vector& u) { return quasistaticJacobianOperator(u); });
-    }
 
     if (checkpoint_to_disk_) {
       outputStateToDisk();
@@ -1512,18 +1386,6 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
   /// @brief getter for nodal forces (before zeroing-out essential dofs)
   const smith::FiniteElementDual& reactions() const { return reactions_; };
 
-  /// @brief Get the equation solver used by this physics module
-  smith::EquationSolver& equationSolver() { return *nonlin_solver_; }
-
-  /// @overload
-  const smith::EquationSolver& equationSolver() const { return *nonlin_solver_; }
-
-  /// @brief Return accumulated Jacobian construction timings for this physics object.
-  const SolidMechanicsJacobianTimings& jacobianTimings() const { return jacobian_timings_; }
-
-  /// @brief Reset accumulated Jacobian construction timings for this physics object.
-  void resetJacobianTimings() const { jacobian_timings_ = {}; }
-
  protected:
   /// The compile-time finite element trial space for displacement and velocity (H1 of order p)
   using trial = H1<order, dim>;
@@ -1592,9 +1454,6 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
   /// because are associated with essential boundary conditions
   std::unique_ptr<mfem::HypreParMatrix> J_e_;
 
-  /// Accumulated timing diagnostics for quasistatic Jacobian construction paths.
-  mutable SolidMechanicsJacobianTimings jacobian_timings_;
-
   /// an intermediate variable used to store the predicted end-step displacement
   mfem::Vector predicted_displacement_;
 
diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp
index a94a61bb63..4299c14874 100644
--- a/src/smith/physics/tests/shallow_arch_buckling.cpp
+++ b/src/smith/physics/tests/shallow_arch_buckling.cpp
@@ -31,16 +31,10 @@ constexpr double end_tol = 1.0e-8;
 constexpr double top_tol = 1.0e-8;
 std::string solver_name = "TrustRegion";
 int print_level = 2;
-int pcg_block_len = 10;
-double pcg_powell_eta = 0.005;
 int nonlinear_max_iterations = 300000;
-bool pcg_diagonal_preconditioner = false;
 int trust_subspace_option = static_cast<int>(SubSpaceOptions::NEVER);
 int trust_num_leftmost = 1;
 int trust_num_past_steps = 0;
-int trust_nonmonotone_window = 0;
-bool trust_use_jacobian_operator = false;
-bool trust_use_cubic_subspace = false;
 bool trust_use_solve_start_direction = false;
 bool trust_use_min_residual_direction = false;
 
@@ -52,12 +46,9 @@ NonlinearSolver selectedNonlinearSolver()
   if (solver_name == "TrustRegion") {
     return NonlinearSolver::TrustRegion;
   }
-  if (solver_name == "PcgBlock") {
-    return NonlinearSolver::PcgBlock;
-  }
 
   throw std::runtime_error("Unknown --solver value '" + solver_name +
-                           "'. Use NewtonLineSearch, TrustRegion, or PcgBlock.");
+                           "'. Use NewtonLineSearch or TrustRegion.");
 }
 
 void parseCommandLine(int& argc, char** argv)
@@ -69,29 +60,14 @@ void parseCommandLine(int& argc, char** argv)
       solver_name = arg.substr(std::string("--solver=").size());
     } else if (arg.rfind("--print-level=", 0) == 0) {
       print_level = std::stoi(arg.substr(std::string("--print-level=").size()));
-    } else if (arg.rfind("--pcg-block-len=", 0) == 0) {
-      pcg_block_len = std::stoi(arg.substr(std::string("--pcg-block-len=").size()));
-    } else if (arg.rfind("--pcg-powell-eta=", 0) == 0) {
-      pcg_powell_eta = std::stod(arg.substr(std::string("--pcg-powell-eta=").size()));
     } else if (arg.rfind("--nonlinear-max-iterations=", 0) == 0) {
       nonlinear_max_iterations = std::stoi(arg.substr(std::string("--nonlinear-max-iterations=").size()));
-    } else if (arg.rfind("--pcg-diagonal-preconditioner=", 0) == 0) {
-      const std::string value = arg.substr(std::string("--pcg-diagonal-preconditioner=").size());
-      pcg_diagonal_preconditioner = (value == "1" || value == "true" || value == "on");
     } else if (arg.rfind("--trust-subspace-option=", 0) == 0) {
       trust_subspace_option = std::stoi(arg.substr(std::string("--trust-subspace-option=").size()));
     } else if (arg.rfind("--trust-num-leftmost=", 0) == 0) {
       trust_num_leftmost = std::stoi(arg.substr(std::string("--trust-num-leftmost=").size()));
     } else if (arg.rfind("--trust-num-past-steps=", 0) == 0) {
       trust_num_past_steps = std::stoi(arg.substr(std::string("--trust-num-past-steps=").size()));
-    } else if (arg.rfind("--trust-nonmonotone-window=", 0) == 0) {
-      trust_nonmonotone_window = std::stoi(arg.substr(std::string("--trust-nonmonotone-window=").size()));
-    } else if (arg.rfind("--trust-use-jacobian-operator=", 0) == 0) {
-      const std::string value = arg.substr(std::string("--trust-use-jacobian-operator=").size());
-      trust_use_jacobian_operator = (value == "1" || value == "true" || value == "on");
-    } else if (arg.rfind("--trust-use-cubic-subspace=", 0) == 0) {
-      const std::string value = arg.substr(std::string("--trust-use-cubic-subspace=").size());
-      trust_use_cubic_subspace = (value == "1" || value == "true" || value == "on");
     } else if (arg.rfind("--trust-use-solve-start-direction=", 0) == 0) {
       const std::string value = arg.substr(std::string("--trust-use-solve-start-direction=").size());
       trust_use_solve_start_direction = (value == "1" || value == "true" || value == "on");
@@ -154,18 +130,11 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough)
       .absolute_tol = 1.0e-10,
       .max_iterations = nonlinear_max_iterations,
       .print_level = print_level,
-      .trust_nonmonotone_window = trust_nonmonotone_window,
-      .trust_use_jacobian_operator = trust_use_jacobian_operator,
-      .trust_use_cubic_subspace = trust_use_cubic_subspace,
       .subspace_option = static_cast<SubSpaceOptions>(trust_subspace_option),
       .num_leftmost = trust_num_leftmost,
       .trust_num_past_steps = trust_num_past_steps,
       .trust_use_solve_start_direction = trust_use_solve_start_direction,
-      .trust_use_min_residual_direction = trust_use_min_residual_direction,
-      .pcg_block_len = pcg_block_len,
-      .pcg_powell_eta = pcg_powell_eta,
-      .pcg_max_block_retries = 40,
-      .pcg_use_jacobian_diagonal_preconditioner = pcg_diagonal_preconditioner};
+      .trust_use_min_residual_direction = trust_use_min_residual_direction};
 
   SolidMechanics<p, dim> solid(nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options,
                                "compressed_beam", mesh);
@@ -197,164 +166,16 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough)
     mfem::out << "Compressed thin beam snap-through run: solver = " << solver_name
               << ", trust_subspace_option = " << trust_subspace_option
               << ", trust_num_leftmost = " << trust_num_leftmost
-              << ", trust_num_past_steps = " << trust_num_past_steps
-              << ", trust_nonmonotone_window = " << trust_nonmonotone_window
-              << ", trust_use_jacobian_operator = " << trust_use_jacobian_operator
-              << ", trust_use_cubic_subspace = " << trust_use_cubic_subspace
-              << ", pcg_diagonal_preconditioner = " << pcg_diagonal_preconditioner << '\n';
+              << ", trust_num_past_steps = " << trust_num_past_steps << '\n';
   }
 
   constexpr int num_steps = 5;
-  int num_converged_steps = 0;
   for (int step = 0; step < num_steps; ++step) {
-    solid.resetJacobianTimings();
     solid.advanceTimestep(1.0 / num_steps);
-    const auto& nonlinear_solver = solid.equationSolver().nonlinearSolver();
-    if (nonlinear_solver.GetConverged()) {
-      ++num_converged_steps;
-    }
     if (rank == 0) {
-      mfem::out << "Load step " << step + 1 << "/" << num_steps
-                << ": converged = " << nonlinear_solver.GetConverged()
-                << ", nonlinear iterations = " << nonlinear_solver.GetNumIterations()
-                << ", final relative residual = " << nonlinear_solver.GetFinalRelNorm() << '\n';
+      mfem::out << "Load step " << step + 1 << "/" << num_steps << '\n';
     }
     solid.outputStateToDisk("shallow_arch_buckling");
-    if (rank == 0 && print_level >= 1) {
-      if (const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics()) {
-        mfem::out << "  PCG diagnostics: residuals = " << diagnostics->num_residuals
-                  << ", hess-vecs = " << diagnostics->num_hess_vecs
-                  << ", preconditioner applications = " << diagnostics->num_preconds
-                  << ", Jacobian assemblies = " << diagnostics->num_jacobian_assembles
-                  << ", Jacobian operator evals = " << diagnostics->num_jacobian_operator_evals
-                  << ", diagonal assemblies = " << diagnostics->num_diagonal_assembles
-                  << ", preconditioner updates = " << diagnostics->num_preconditioner_updates
-                  << ", accepted blocks = " << diagnostics->num_blocks
-                  << ", accepted steps = " << diagnostics->num_accepted_steps
-                  << ", block rejects = " << diagnostics->num_block_rejects
-                  << ", prefix accepts = " << diagnostics->num_prefix_accepts
-                  << ", momentum resets = " << diagnostics->num_momentum_resets
-                  << ", nonzero beta = " << diagnostics->num_nonzero_beta
-                  << ", zero beta = " << diagnostics->num_zero_beta
-                  << ", Powell restarts = " << diagnostics->num_powell_restarts
-                  << ", descent restarts = " << diagnostics->num_descent_restarts
-                  << ", negative curvature = " << diagnostics->num_negative_curvature
-                  << ", trust capped steps = " << diagnostics->num_trust_capped_steps
-                  << ", line-search backtracks = " << diagnostics->num_line_search_backtracks
-                  << ", final h_scale = " << diagnostics->final_h_scale
-                  << ", last trust ratio = " << diagnostics->last_trust_ratio << '\n';
-        mfem::out << "  PCG timings: residual = " << diagnostics->residual_seconds
-                  << ", hess-vec = " << diagnostics->hess_vec_seconds
-                  << ", operator hess-vec = " << diagnostics->jacobian_operator_hess_vec_seconds
-                  << ", assembled hess-vec = " << diagnostics->assembled_hess_vec_seconds
-                  << ", matrix-free hess-vec = " << diagnostics->matrix_free_hess_vec_seconds
-                  << ", preconditioner = " << diagnostics->preconditioner_seconds
-                  << ", Jacobian operator eval = " << diagnostics->jacobian_operator_eval_seconds
-                  << ", Jacobian assembly = " << diagnostics->jacobian_assembly_seconds
-                  << ", diagonal assembly = " << diagnostics->diagonal_assembly_seconds
-                  << ", diagonal invert = " << diagnostics->diagonal_invert_seconds
-                  << ", preconditioner update = " << diagnostics->preconditioner_update_seconds
-                  << ", preconditioner setup = " << diagnostics->preconditioner_setup_seconds << '\n';
-      }
-      if (const auto diagnostics = solid.equationSolver().trustRegionDiagnostics()) {
-        const double operator_timed_seconds =
-            diagnostics->residual_seconds + diagnostics->hess_vec_seconds + diagnostics->preconditioner_seconds +
-            diagnostics->jacobian_operator_eval_seconds + diagnostics->diagonal_assembly_seconds +
-            diagnostics->diagonal_invert_seconds + diagnostics->jacobian_assembly_seconds +
-            diagnostics->preconditioner_update_seconds;
-        const double assembled_hess_vec_seconds =
-            diagnostics->hess_vec_seconds - diagnostics->jacobian_operator_hess_vec_seconds;
-        mfem::out << "  TrustRegion diagnostics: residuals = " << diagnostics->num_residuals
-                  << ", hess-vecs = " << diagnostics->num_hess_vecs
-                  << ", model hess-vecs = " << diagnostics->num_model_hess_vecs
-                  << ", cauchy hess-vecs = " << diagnostics->num_cauchy_hess_vecs
-                  << ", line-search hess-vecs = " << diagnostics->num_line_search_hess_vecs
-                  << ", preconditioner applications = " << diagnostics->num_preconds
-                  << ", Jacobian assemblies = " << diagnostics->num_jacobian_assembles
-                  << ", Jacobian operator evals = " << diagnostics->num_jacobian_operator_evals
-                  << ", diagonal assemblies = " << diagnostics->num_diagonal_assembles
-                  << ", CG iterations = " << diagnostics->num_cg_iterations
-                  << ", subspace solves = " << diagnostics->num_subspace_solves
-                  << ", subspace leftmost hess-vecs = " << diagnostics->num_subspace_leftmost_hess_vecs
-                  << ", subspace hess-vec batches = " << diagnostics->num_subspace_hess_vec_batches
-                  << ", subspace batched hess-vecs = " << diagnostics->num_subspace_batched_hess_vecs
-                  << ", subspace past-step vectors = " << diagnostics->num_subspace_past_step_vectors
-                  << ", subspace past-step hess-vecs = " << diagnostics->num_subspace_past_step_hess_vecs
-                  << ", quadratic subspace solves = " << diagnostics->num_quadratic_subspace_solves
-                  << ", cubic subspace attempts = " << diagnostics->num_cubic_subspace_attempts
-                  << ", cubic subspace uses = " << diagnostics->num_cubic_subspace_uses
-                  << ", cubic subspace quadratic fallbacks = " << diagnostics->num_cubic_subspace_quadratic_fallbacks
-                  << ", nonmonotone work accepts = " << diagnostics->num_nonmonotone_work_accepts
-                  << ", monotone work would reject = " << diagnostics->num_monotone_work_would_reject
-                  << ", preconditioner updates = " << diagnostics->num_preconditioner_updates << '\n';
-        mfem::out << "  TrustRegion timings: total = " << diagnostics->total_seconds
-                  << ", operator-timed = " << operator_timed_seconds << ", residual = " << diagnostics->residual_seconds
-                  << ", hess-vec = " << diagnostics->hess_vec_seconds
-                  << ", model hess-vec = " << diagnostics->model_hess_vec_seconds
-                  << ", cauchy hess-vec = " << diagnostics->cauchy_hess_vec_seconds
-                  << ", line-search hess-vec = " << diagnostics->line_search_hess_vec_seconds
-                  << ", operator hess-vec = " << diagnostics->jacobian_operator_hess_vec_seconds
-                  << ", assembled hess-vec = " << assembled_hess_vec_seconds
-                  << ", preconditioner = " << diagnostics->preconditioner_seconds
-                  << ", Jacobian operator eval = " << diagnostics->jacobian_operator_eval_seconds
-                  << ", diagonal assembly = " << diagnostics->diagonal_assembly_seconds
-                  << ", diagonal invert = " << diagnostics->diagonal_invert_seconds
-                  << ", model solve = " << diagnostics->model_solve_seconds
-                  << ", subspace = " << diagnostics->subspace_seconds
-                  << ", subspace leftmost = " << diagnostics->subspace_leftmost_seconds
-                  << ", subspace hess-vec batches = " << diagnostics->subspace_hess_vec_batch_seconds
-                  << ", subspace filter = " << diagnostics->subspace_filter_seconds
-                  << ", subspace backend = " << diagnostics->subspace_backend_seconds
-                  << ", subspace project A = " << diagnostics->subspace_project_A_seconds
-                  << ", subspace project gram = " << diagnostics->subspace_project_gram_seconds
-                  << ", subspace project b = " << diagnostics->subspace_project_b_seconds
-                  << ", subspace basis = " << diagnostics->subspace_basis_seconds
-                  << ", subspace reduced A = " << diagnostics->subspace_reduced_A_seconds
-                  << ", subspace dense eigensystem = " << diagnostics->subspace_dense_eigensystem_seconds
-                  << ", subspace dense trust solve = " << diagnostics->subspace_dense_trust_solve_seconds
-                  << ", subspace reconstruct solution = " << diagnostics->subspace_reconstruct_solution_seconds
-                  << ", subspace reconstruct leftmost = " << diagnostics->subspace_reconstruct_leftmost_seconds
-                  << ", subspace finalize = " << diagnostics->subspace_finalize_seconds
-                  << ", cauchy point = " << diagnostics->cauchy_point_seconds
-                  << ", dogleg = " << diagnostics->dogleg_seconds
-                  << ", line search = " << diagnostics->line_search_seconds << ", dot = " << diagnostics->dot_seconds
-                  << ", dot count = " << diagnostics->num_dot_products
-                  << ", dot reductions = " << diagnostics->num_dot_reductions
-                  << ", model dots = " << diagnostics->num_model_dot_products << " / " << diagnostics->model_dot_seconds
-                  << ", cauchy dots = " << diagnostics->num_cauchy_dot_products << " / "
-                  << diagnostics->cauchy_dot_seconds << ", dogleg dots = " << diagnostics->num_dogleg_dot_products
-                  << " / " << diagnostics->dogleg_dot_seconds
-                  << ", line-search dots = " << diagnostics->num_line_search_dot_products << " / "
-                  << diagnostics->line_search_dot_seconds << ", setup dots = " << diagnostics->num_setup_dot_products
-                  << " / " << diagnostics->setup_dot_seconds
-                  << ", vector update = " << diagnostics->vector_update_seconds
-                  << ", vector copy/scale = " << diagnostics->vector_copy_scale_seconds
-                  << ", projection = " << diagnostics->projection_seconds
-                  << ", Jacobian assembly = " << diagnostics->jacobian_assembly_seconds
-                  << ", preconditioner update = " << diagnostics->preconditioner_update_seconds
-                  << ", preconditioner setup = " << diagnostics->preconditioner_setup_seconds
-                  << ", work objective = " << diagnostics->last_work_objective
-                  << ", nonmonotone work reference = " << diagnostics->last_nonmonotone_work_reference << '\n';
-      }
-      const auto& jacobian_timings = solid.jacobianTimings();
-      mfem::out << "  Solid Jacobian timings: legacy evals = " << jacobian_timings.legacy_jacobian_evals
-                << ", legacy derivative = " << jacobian_timings.legacy_derivative_seconds
-                << ", legacy sparse assembly = " << jacobian_timings.legacy_sparse_assembly_seconds
-                << ", legacy EBC elimination = " << jacobian_timings.legacy_essential_elimination_seconds
-                << ", operator evals = " << jacobian_timings.jacobian_operator_evals
-                << ", operator assemblies = " << jacobian_timings.jacobian_operator_assemblies
-                << ", operator derivative = " << jacobian_timings.jacobian_operator_derivative_seconds
-                << ", operator sparse assembly = " << jacobian_timings.jacobian_operator_sparse_assembly_seconds
-                << ", operator EBC elimination = " << jacobian_timings.jacobian_operator_essential_elimination_seconds
-                << '\n';
-    }
-    if (!nonlinear_solver.GetConverged()) {
-      throw std::runtime_error("Nonlinear solve failed to converge at load step " + std::to_string(step + 1));
-    }
-  }
-
-  if (rank == 0) {
-    mfem::out << "Converged load steps: " << num_converged_steps << "/" << num_steps << '\n';
   }
 }
 
diff --git a/src/smith/physics/tests/solid.cpp b/src/smith/physics/tests/solid.cpp
index 44a68c4240..e48bed601f 100644
--- a/src/smith/physics/tests/solid.cpp
+++ b/src/smith/physics/tests/solid.cpp
@@ -236,66 +236,6 @@ TEST(SolidMechanics, 2DQuadParameterizedStatic) { functional_parameterized_solid
 
 TEST(SolidMechanics, 3DQuadStaticJ2) { functional_solid_test_static_J2(); }
 
-TEST(SolidMechanics, PcgBlockLinearElasticity)
-{
-  MPI_Barrier(MPI_COMM_WORLD);
-
-  constexpr int p = 1;
-  constexpr int dim = 2;
-  constexpr int serial_refinement = 1;
-  constexpr int parallel_refinement = 0;
-
-  axom::sidre::DataStore datastore;
-  smith::StateManager::initialize(datastore, "pcg_block_linear_elasticity");
-
-  std::string filename = SMITH_REPO_DIR "/data/meshes/square.mesh";
-  auto mesh =
-      std::make_shared<smith::Mesh>(buildMeshFromFile(filename), "mesh", serial_refinement, parallel_refinement);
-  mesh->addDomainOfBoundaryElements("fixed", by_attr<dim>(1));
-
-  smith::LinearSolverOptions linear_options{.linear_solver = LinearSolver::CG,
-                                            .preconditioner = Preconditioner::HypreL1Jacobi,
-                                            .relative_tol = 1.0e-14,
-                                            .absolute_tol = 1.0e-16,
-                                            .max_iterations = 500,
-                                            .print_level = 0};
-
-  smith::NonlinearSolverOptions nonlinear_options{.nonlin_solver = NonlinearSolver::PcgBlock,
-                                                  .relative_tol = 1.0e-12,
-                                                  .absolute_tol = 1.0e-14,
-                                                  .max_iterations = 200,
-                                                  .print_level = 0,
-                                                  .pcg_block_len = 10};
-
-  SolidMechanics<p, dim> solid(nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options,
-                               "pcg_block_solid", mesh);
-
-  solid_mechanics::LinearIsotropic mat{.density = 1.0, .K = 0.5, .G = 1.0};
-  solid.setMaterial(mat, mesh->entireBody());
-  solid.setFixedBCs(mesh->domain("fixed"));
-
-  tensor<double, dim> constant_force{};
-  constant_force[0] = 0.1;
-  constant_force[1] = -0.05;
-  solid_mechanics::ConstantBodyForce<dim> force{constant_force};
-  solid.addBodyForce(force, mesh->entireBody());
-
-  solid.completeSetup();
-  solid.advanceTimestep(1.0);
-
-  const auto& nonlinear_solver = solid.equationSolver().nonlinearSolver();
-  const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics();
-
-  ASSERT_TRUE(diagnostics.has_value());
-  EXPECT_TRUE(nonlinear_solver.GetConverged());
-  EXPECT_LE(nonlinear_solver.GetNumIterations(), solid.displacement().space().GlobalTrueVSize());
-  EXPECT_LT(nonlinear_solver.GetFinalRelNorm(), 1.0e-10);
-  EXPECT_EQ(diagnostics->num_block_rejects, 0u);
-  EXPECT_EQ(diagnostics->num_powell_restarts, 0u);
-  EXPECT_EQ(diagnostics->num_negative_curvature, 0u);
-  EXPECT_EQ(diagnostics->num_line_search_backtracks, 0u);
-}
-
 TEST(SolidMechanics, TDofBoundaryCondition)
 {
   /*
diff --git a/src/smith/physics/tests/solid_statics_patch.cpp b/src/smith/physics/tests/solid_statics_patch.cpp
index 2d09ab2cff..9ed9daa247 100644
--- a/src/smith/physics/tests/solid_statics_patch.cpp
+++ b/src/smith/physics/tests/solid_statics_patch.cpp
@@ -241,78 +241,6 @@ double solution_error(PatchBoundaryCondition bc)
   return computeL2Error(solid.displacement(), exact_solution_coef);
 }
 
-template <typename element_type>
-double pcg_block_solution_error(PatchBoundaryCondition bc)
-{
-  MPI_Barrier(MPI_COMM_WORLD);
-
-  axom::sidre::DataStore datastore;
-  smith::StateManager::initialize(datastore, "solid_static_pcg_block_solve");
-
-  constexpr int p = element_type::order;
-  constexpr int dim = dimension_of(element_type::geometry);
-
-  static_assert(dim == 2 || dim == 3, "Dimension must be 2 or 3 for solid test");
-
-  AffineSolution<dim> exact_displacement;
-
-  std::string meshdir = std::string(SMITH_REPO_DIR) + "/data/meshes/";
-  std::string filename;
-  switch (element_type::geometry) {
-    case mfem::Geometry::TRIANGLE:
-      filename = meshdir + "patch2D_tris.mesh";
-      break;
-    case mfem::Geometry::SQUARE:
-      filename = meshdir + "patch2D_quads.mesh";
-      break;
-    case mfem::Geometry::TETRAHEDRON:
-      filename = meshdir + "patch3D_tets.mesh";
-      break;
-    case mfem::Geometry::CUBE:
-      filename = meshdir + "patch3D_hexes.mesh";
-      break;
-    default:
-      SLIC_ERROR_ROOT("unsupported element type for patch test");
-      break;
-  }
-
-  auto mesh = std::make_shared<smith::Mesh>(buildMeshFromFile(filename), "mesh_tag");
-
-  smith::NonlinearSolverOptions nonlin_solver_options{.nonlin_solver = NonlinearSolver::PcgBlock,
-                                                      .relative_tol = 0.0,
-                                                      .absolute_tol = 5.0e-14,
-                                                      .max_iterations = 200,
-                                                      .print_level = 0,
-                                                      .pcg_block_len = 10,
-                                                      .pcg_ls_max_backtracks = 8};
-
-  auto equation_solver = std::make_unique<EquationSolver>(
-      nonlin_solver_options, smith::solid_mechanics::default_linear_options, mesh->getComm());
-
-  SolidMechanics<p, dim> solid(std::move(equation_solver), solid_mechanics::default_quasistatic_options, "solid", mesh);
-
-  solid_mechanics::NeoHookean mat{.density = 1.0, .K = 1.0, .G = 1.0};
-  solid.setMaterial(mat, mesh->entireBody());
-
-  mesh->addDomainOfBoundaryElements("essential_boundary", by_attr<dim>(essentialBoundaryAttributes<dim>(bc)));
-  exact_displacement.applyLoads(mat, solid, mesh->domain("essential_boundary"));
-
-  solid.completeSetup();
-  solid.advanceTimestep(1.0);
-
-  const auto& nonlinear_solver = solid.equationSolver().nonlinearSolver();
-  const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics();
-  EXPECT_TRUE(nonlinear_solver.GetConverged());
-  EXPECT_LT(nonlinear_solver.GetFinalRelNorm(), 1.0e-10);
-  EXPECT_TRUE(diagnostics.has_value());
-  if (diagnostics.has_value()) {
-    EXPECT_GT(diagnostics->num_blocks, 0u);
-  }
-
-  mfem::VectorFunctionCoefficient exact_solution_coef(dim, exact_displacement);
-  return computeL2Error(solid.displacement(), exact_solution_coef);
-}
-
 /**
  * @brief Solve pressure-driven problem with 10% uniaxial strain and compare numerical solution to exact answer
  *
@@ -536,13 +464,6 @@ TEST(SolidMechanics, PatchTest2dQ1EssentialAndNaturalBcs)
   EXPECT_LT(quad_error, tol);
 }
 
-TEST(SolidMechanics, PcgBlockPatchTest2dQ1EssentialAndNaturalBcs)
-{
-  using quadrilateral = finite_element<mfem::Geometry::SQUARE, H1<LINEAR> >;
-  double quad_error = pcg_block_solution_error<quadrilateral>(PatchBoundaryCondition::EssentialAndNatural);
-  EXPECT_LT(quad_error, 1.0e-6);
-}
-
 TEST(SolidMechanics, PatchTest3dQ1EssentialAndNaturalBcs)
 {
   using tetrahedron = finite_element<mfem::Geometry::TETRAHEDRON, H1<LINEAR> >;
diff --git a/src/smith/physics/tests/test_functional_weak_form.cpp b/src/smith/physics/tests/test_functional_weak_form.cpp
index 0dc318ab82..61ea04e68d 100644
--- a/src/smith/physics/tests/test_functional_weak_form.cpp
+++ b/src/smith/physics/tests/test_functional_weak_form.cpp
@@ -247,38 +247,6 @@ TEST_F(WeakFormFixture, JvpConsistency)
   }
 }
 
-TEST_F(WeakFormFixture, JacobianOperatorConsistency)
-{
-  auto input_fields = getConstFieldPointers(states, params);
-  auto field_tangents = getConstFieldPointers(state_tangents, param_tangents);
-
-  std::vector<double> jacobian_weights(input_fields.size());
-  jacobian_weights[DISP] = 1.0;
-
-  auto J = weak_form->jacobian(time_info, shape_disp.get(), input_fields, jacobian_weights);
-  auto J_op = weak_form->jacobianOperator(time_info, shape_disp.get(), input_fields, DISP);
-
-  smith::FiniteElementDual jvp_slow(states[DISP].space(), "jvp_slow");
-  smith::FiniteElementDual jvp_op(states[DISP].space(), "jvp_op");
-  J->Mult(*field_tangents[DISP], jvp_slow);
-  J_op->Mult(*field_tangents[DISP], jvp_op);
-  EXPECT_NEAR(jvp_slow.Norml2(), jvp_op.Norml2(), 1e-12);
-
-  std::unique_ptr<mfem::HypreParMatrix> J_op_assembled = J_op->assemble();
-  smith::FiniteElementDual jvp_op_assembled(states[DISP].space(), "jvp_op_assembled");
-  J_op_assembled->Mult(*field_tangents[DISP], jvp_op_assembled);
-  EXPECT_NEAR(jvp_slow.Norml2(), jvp_op_assembled.Norml2(), 1e-12);
-
-  mfem::Vector diag_direct(J_op->Height());
-  mfem::Vector diag_assembled(J->Height());
-  J_op->assembleDiagonal(diag_direct);
-  J->GetDiag(diag_assembled);
-
-  mfem::Vector diag_diff(diag_direct.Size());
-  subtract(diag_direct, diag_assembled, diag_diff);
-  EXPECT_NEAR(0.0, diag_diff.Norml2() / diag_assembled.Norml2(), 1.e-14);
-}
-
 int main(int argc, char* argv[])
 {
   ::testing::InitGoogleTest(&argc, argv);
diff --git a/src/smith/physics/weak_form.hpp b/src/smith/physics/weak_form.hpp
index 1675545ba2..8bd7e48e8d 100644
--- a/src/smith/physics/weak_form.hpp
+++ b/src/smith/physics/weak_form.hpp
@@ -26,7 +26,6 @@ class HypreParMatrix;
 
 namespace smith {
 
-class JacobianOperator;
 class FiniteElementState;
 class FiniteElementDual;
 
@@ -71,22 +70,6 @@ class WeakForm {
       const std::vector<double>& field_argument_tangents,
       const std::vector<ConstQuadratureFieldPtr>& quad_fields = {}) const = 0;
 
-  /** @brief Derivative of the residual with respect to one field argument as a solver-facing JacobianOperator.
-   *
-   * The returned operator represents one derivative column, d{r}/d{fields}_field_argument_index. The first supported
-   * use case is the square solved-field derivative used by PCG-block tangent products and diagonal extraction.
-   *
-   * @param time_info time and timestep information
-   * @param shape_disp smith::FiniteElementState*, change in model coordinates relative to the initially read in mesh
-   * @param fields vector of smith::FiniteElementState*
-   * @param field_argument_index field argument to differentiate with respect to
-   * @param quad_fields vector of ConstQuadratureFieldPtr
-   * @return std::unique_ptr<JacobianOperator> returns d{r}/d{fields}_field_argument_index
-   */
-  virtual std::unique_ptr<JacobianOperator> jacobianOperator(
-      TimeInfo time_info, ConstFieldPtr shape_disp, const std::vector<ConstFieldPtr>& fields,
-      size_t field_argument_index, const std::vector<ConstQuadratureFieldPtr>& quad_fields = {}) const = 0;
-
   /**
    * @brief Jacobian-vector product, will overwrite any existing values in jvp_reactions
    * @param time_info time and timestep information

From db08e49d5d0471c61cebc6b4713cacd81c937e38 Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Fri, 8 May 2026 18:29:05 -0600
Subject: [PATCH 11/27] Cleanup debug timings.

---
 src/smith/numerics/equation_solver.cpp        | 508 ++----------------
 src/smith/numerics/equation_solver.hpp        | 136 -----
 .../numerics/mfem_trust_region_subspace.cpp   |  55 --
 src/smith/numerics/trust_region_solver.hpp    |  21 -
 4 files changed, 46 insertions(+), 674 deletions(-)

diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index 4db0fc096d..d5712c9b30 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -6,7 +6,6 @@
 
 #include "smith/numerics/equation_solver.hpp"
 
-#include <chrono>
 #include <cstdlib>
 #include <functional>
 #include <iomanip>
@@ -26,17 +25,6 @@
 
 namespace smith {
 
-namespace {
-
-using Clock = std::chrono::steady_clock;
-
-double secondsSince(Clock::time_point start)
-{
-  return std::chrono::duration_cast<std::chrono::duration<double>>(Clock::now() - start).count();
-}
-
-}  // namespace
-
 /// Newton solver with a 2-way line-search.  Reverts to regular Newton if max_line_search_iterations is set to 0.
 class NewtonSolver : public mfem::NewtonSolver {
  protected:
@@ -377,114 +365,6 @@ class TrustRegion : public mfem::NewtonSolver {
   mutable size_t print_level = 0;
 
  public:
-  /// internal counter for hess-vecs
-  mutable size_t num_hess_vecs = 0;
-  /// internal counter for model CG hess-vecs
-  mutable size_t num_model_hess_vecs = 0;
-  /// internal counter for Cauchy-point hess-vecs
-  mutable size_t num_cauchy_hess_vecs = 0;
-  /// internal counter for line-search hess-vecs
-  mutable size_t num_line_search_hess_vecs = 0;
-  /// internal counter for preconditions
-  mutable size_t num_preconds = 0;
-  /// internal counter for residuals
-  mutable size_t num_residuals = 0;
-  /// internal counter for subspace solves
-  mutable size_t num_subspace_solves = 0;
-  /// internal counter for retained-leftmost Hessian-vector products used by subspace solves
-  mutable size_t num_subspace_leftmost_hess_vecs = 0;
-  /// internal counter for batched Hessian-vector groups used by subspace solves
-  mutable size_t num_subspace_hess_vec_batches = 0;
-  /// internal counter for Hessian-vector products inside subspace batches
-  mutable size_t num_subspace_batched_hess_vecs = 0;
-  /// internal counter for accepted-step history vectors added to subspace solves
-  mutable size_t num_subspace_past_step_vectors = 0;
-  /// internal counter for accepted-step history Hessian-vector products
-  mutable size_t num_subspace_past_step_hess_vecs = 0;
-  /// internal counter for nonlinear-solve-start directions added to subspace solves
-  mutable size_t num_subspace_solve_start_vectors = 0;
-  /// internal counter for nonlinear-solve-start Hessian-vector products
-  mutable size_t num_subspace_solve_start_hess_vecs = 0;
-  /// internal counter for quadratic subspace backend solves
-  mutable size_t num_quadratic_subspace_solves = 0;
-  /// internal counter for matrix assembles
-  mutable size_t num_jacobian_assembles = 0;
-  /// internal counter for model CG iterations
-  mutable size_t num_cg_iterations = 0;
-  /// internal counter for preconditioner operator updates
-  mutable size_t num_preconditioner_updates = 0;
-  /// time spent evaluating residuals
-  mutable double residual_seconds = 0.0;
-  /// time spent applying Hessian-vector products
-  mutable double hess_vec_seconds = 0.0;
-  /// time spent applying model CG Hessian-vector products
-  mutable double model_hess_vec_seconds = 0.0;
-  /// time spent applying Cauchy-point Hessian-vector products
-  mutable double cauchy_hess_vec_seconds = 0.0;
-  /// time spent applying line-search Hessian-vector products
-  mutable double line_search_hess_vec_seconds = 0.0;
-  /// time spent applying preconditioners
-  mutable double preconditioner_seconds = 0.0;
-  /// total time spent in the nonlinear solve
-  mutable double total_seconds = 0.0;
-  /// time spent solving trust-region model problems
-  mutable double model_solve_seconds = 0.0;
-  /// total time spent in trust-region subspace solves
-  mutable double subspace_seconds = 0.0;
-  /// time spent building retained leftmost subspace directions
-  mutable double subspace_leftmost_seconds = 0.0;
-  /// time spent in subspace Hessian-vector batches
-  mutable double subspace_hess_vec_batch_seconds = 0.0;
-  /// time spent removing dependent directions for subspace solves
-  mutable double subspace_filter_seconds = 0.0;
-  /// time spent in dense subspace backend assembly/solve work
-  mutable double subspace_backend_seconds = 0.0;
-  /// time spent in subspace postprocessing and model-energy comparison
-  mutable double subspace_finalize_seconds = 0.0;
-  /// time spent building the Cauchy point
-  mutable double cauchy_point_seconds = 0.0;
-  /// time spent constructing dogleg steps
-  mutable double dogleg_seconds = 0.0;
-  /// time spent in line-search and trust-radius acceptance logic
-  mutable double line_search_seconds = 0.0;
-  /// time spent in dot products
-  mutable double dot_seconds = 0.0;
-  /// number of dot products
-  mutable size_t num_dot_products = 0;
-  /// number of dot product batches/reductions
-  mutable size_t num_dot_reductions = 0;
-  /// number of dot products in trust-region model solves
-  mutable size_t num_model_dot_products = 0;
-  /// number of dot products in Cauchy-point construction
-  mutable size_t num_cauchy_dot_products = 0;
-  /// number of dot products in dogleg construction
-  mutable size_t num_dogleg_dot_products = 0;
-  /// number of dot products in line-search and acceptance logic
-  mutable size_t num_line_search_dot_products = 0;
-  /// number of setup dot products outside the main per-step kernels
-  mutable size_t num_setup_dot_products = 0;
-  /// time spent in trust-region model-solve dot products
-  mutable double model_dot_seconds = 0.0;
-  /// time spent in Cauchy-point dot products
-  mutable double cauchy_dot_seconds = 0.0;
-  /// time spent in dogleg dot products
-  mutable double dogleg_dot_seconds = 0.0;
-  /// time spent in line-search dot products
-  mutable double line_search_dot_seconds = 0.0;
-  /// time spent in setup dot products
-  mutable double setup_dot_seconds = 0.0;
-  /// time spent in vector add/update operations
-  mutable double vector_update_seconds = 0.0;
-  /// time spent in vector copies and scaling operations
-  mutable double vector_copy_scale_seconds = 0.0;
-  /// time spent in boundary projection operations
-  mutable double projection_seconds = 0.0;
-  /// time spent assembling Jacobians
-  mutable double jacobian_assembly_seconds = 0.0;
-  /// time spent refreshing preconditioners
-  mutable double preconditioner_update_seconds = 0.0;
-  /// time spent in preconditioner SetOperator calls
-  mutable double preconditioner_setup_seconds = 0.0;
 #ifdef MFEM_USE_MPI
   /// constructor
   TrustRegion(MPI_Comm comm_, const NonlinearSolverOptions& nonlinear_opts, const LinearSolverOptions& linear_opts,
@@ -494,32 +374,17 @@ class TrustRegion : public mfem::NewtonSolver {
   }
 #endif
 
-  /// Timed dot product with global and grouped accounting.
-  double timedDot(const mfem::Vector& a, const mfem::Vector& b, size_t& group_count, double& group_seconds) const
-  {
-    auto start = Clock::now();
-    const double value = Dot(a, b);
-    const double seconds = secondsSince(start);
-    ++num_dot_products;
-    ++num_dot_reductions;
-    ++group_count;
-    dot_seconds += seconds;
-    group_seconds += seconds;
-    return value;
-  }
-
-  /// Timed pair of dot products with one local vector pass and one MPI reduction when possible.
-  std::pair<double, double> timedDot2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
-                                      const mfem::Vector& b1, size_t& group_count, double& group_seconds) const
+  /// Pair of dot products with one local vector pass and one MPI reduction when possible.
+  std::pair<double, double> dot2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
+                                 const mfem::Vector& b1) const
   {
     if (dot_oper) {
-      return {timedDot(a0, b0, group_count, group_seconds), timedDot(a1, b1, group_count, group_seconds)};
+      return {Dot(a0, b0), Dot(a1, b1)};
     }
 
     MFEM_ASSERT(a0.Size() == b0.Size(), "Incompatible vector sizes.");
     MFEM_ASSERT(a1.Size() == b1.Size(), "Incompatible vector sizes.");
 
-    auto start = Clock::now();
     mfem::real_t products[2] = {0.0, 0.0};
     if (a0.Size() == a1.Size()) {
       for (int i = 0; i < a0.Size(); ++i) {
@@ -545,12 +410,6 @@ class TrustRegion : public mfem::NewtonSolver {
     }
 #endif
 
-    const double seconds = secondsSince(start);
-    num_dot_products += 2;
-    ++num_dot_reductions;
-    group_count += 2;
-    dot_seconds += seconds;
-    group_seconds += seconds;
     return {products[0], products[1]};
   }
 
@@ -561,16 +420,16 @@ class TrustRegion : public mfem::NewtonSolver {
     double v3 = 0.0;
   };
 
-  /// Timed four-dot batch with one local vector pass and one MPI reduction when possible.
-  Dot4Result timedDot4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, const mfem::Vector& b1,
-                       const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3,
-                       const mfem::Vector& b3, size_t& group_count, double& group_seconds) const
+  /// Four-dot batch with one local vector pass and one MPI reduction when possible.
+  Dot4Result dot4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, const mfem::Vector& b1,
+                  const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3,
+                  const mfem::Vector& b3) const
   {
     if (dot_oper) {
-      return {.v0 = timedDot(a0, b0, group_count, group_seconds),
-              .v1 = timedDot(a1, b1, group_count, group_seconds),
-              .v2 = timedDot(a2, b2, group_count, group_seconds),
-              .v3 = timedDot(a3, b3, group_count, group_seconds)};
+      return {.v0 = Dot(a0, b0),
+              .v1 = Dot(a1, b1),
+              .v2 = Dot(a2, b2),
+              .v3 = Dot(a3, b3)};
     }
 
     MFEM_ASSERT(a0.Size() == b0.Size(), "Incompatible vector sizes.");
@@ -580,7 +439,6 @@ class TrustRegion : public mfem::NewtonSolver {
     MFEM_ASSERT(a0.Size() == a1.Size() && a0.Size() == a2.Size() && a0.Size() == a3.Size(),
                 "timedDot4 currently requires equal vector sizes.");
 
-    auto start = Clock::now();
     mfem::real_t products[4] = {0.0, 0.0, 0.0, 0.0};
     for (int i = 0; i < a0.Size(); ++i) {
       products[0] += a0[i] * b0[i];
@@ -600,12 +458,6 @@ class TrustRegion : public mfem::NewtonSolver {
     }
 #endif
 
-    const double seconds = secondsSince(start);
-    num_dot_products += 4;
-    ++num_dot_reductions;
-    group_count += 4;
-    dot_seconds += seconds;
-    group_seconds += seconds;
     return {.v0 = products[0], .v1 = products[1], .v2 = products[2], .v3 = products[3]};
   }
 
@@ -618,40 +470,9 @@ class TrustRegion : public mfem::NewtonSolver {
       return;
     }
 
-    auto start = Clock::now();
-    ++num_subspace_hess_vec_batches;
-    num_subspace_batched_hess_vecs += inputs.size();
     for (size_t i = 0; i < inputs.size(); ++i) {
       hess_vec_func(*inputs[i], *outputs[i]);
     }
-    subspace_hess_vec_batch_seconds += secondsSince(start);
-  }
-
-  template <typename HessVecFunc>
-  void timedModelHessVec(HessVecFunc hess_vec_func, const mfem::Vector& input, mfem::Vector& output) const
-  {
-    auto start = Clock::now();
-    hess_vec_func(input, output);
-    model_hess_vec_seconds += secondsSince(start);
-    ++num_model_hess_vecs;
-  }
-
-  template <typename HessVecFunc>
-  void timedCauchyHessVec(HessVecFunc hess_vec_func, const mfem::Vector& input, mfem::Vector& output) const
-  {
-    auto start = Clock::now();
-    hess_vec_func(input, output);
-    cauchy_hess_vec_seconds += secondsSince(start);
-    ++num_cauchy_hess_vecs;
-  }
-
-  template <typename HessVecFunc>
-  void timedLineSearchHessVec(HessVecFunc hess_vec_func, const mfem::Vector& input, mfem::Vector& output) const
-  {
-    auto start = Clock::now();
-    hess_vec_func(input, output);
-    line_search_hess_vec_seconds += secondsSince(start);
-    ++num_line_search_hess_vecs;
   }
 
   void pushAcceptedStepHistory(const mfem::Vector& step) const
@@ -672,13 +493,11 @@ class TrustRegion : public mfem::NewtonSolver {
   void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd,
                                   double dd) const
   {
-    auto start = Clock::now();
     // find z + tau d
     double deltadelta_m_zz = delta * delta - zz;
     if (deltadelta_m_zz == 0) return;  // already on boundary
     double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd;
     z.Add(tau, d);
-    projection_seconds += secondsSince(start);
   }
 
   /// solve the exact trust-region subspace problem with directions ds, and the leftmosts
@@ -691,9 +510,6 @@ class TrustRegion : public mfem::NewtonSolver {
                                [[maybe_unused]] std::vector<std::shared_ptr<mfem::Vector>>& candidate_left_mosts) const
   {
     SMITH_MARK_FUNCTION;
-    auto subspace_start = Clock::now();
-    ++num_subspace_solves;
-
     std::vector<const mfem::Vector*> directions;
     for (auto& d : ds) {
       directions.emplace_back(d);
@@ -719,20 +535,15 @@ class TrustRegion : public mfem::NewtonSolver {
     double energy_change;
 
     try {
-      auto backend_start = Clock::now();
-      ++num_quadratic_subspace_solves;
       std::tie(sol, leftvecs, leftvals, energy_change) =
           solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost);
-      subspace_backend_seconds += secondsSince(backend_start);
     } catch (const std::exception& e) {
       if (print_level >= 1) {
         mfem::out << "subspace solve failed with " << e.what() << std::endl;
       }
-      subspace_seconds += secondsSince(subspace_start);
       return;
     }
 
-    auto finalize_start = Clock::now();
     candidate_left_mosts.clear();
     for (auto& lv : leftvecs) {
       candidate_left_mosts.emplace_back(std::move(lv));
@@ -750,55 +561,41 @@ class TrustRegion : public mfem::NewtonSolver {
     if (subspace_energy < base_energy) {
       z = sol;
     }
-    subspace_finalize_seconds += secondsSince(finalize_start);
-    subspace_seconds += secondsSince(subspace_start);
   }
 
   /// finds tau s.t. (z + tau*(y-z))^2 = trSize^2
   void projectToBoundaryBetweenWithCoefs(mfem::Vector& z, const mfem::Vector& y, double trSize, double zz, double zy,
                                          double yy) const
   {
-    auto start = Clock::now();
     double dd = yy - 2 * zy + zz;
     double zd = zy - zz;
     double tau = (std::sqrt((trSize * trSize - zz) * dd + zd * zd) - zd) / dd;
     z.Add(-tau, z);
     z.Add(tau, y);
-    projection_seconds += secondsSince(start);
   }
 
   /// take a dogleg step in direction s, solution norm must be within trSize
   void doglegStep(const mfem::Vector& cp, const mfem::Vector& newtonP, double trSize, mfem::Vector& s) const
   {
     SMITH_MARK_FUNCTION;
-    auto [cc, nn] = timedDot2(cp, cp, newtonP, newtonP, num_dogleg_dot_products, dogleg_dot_seconds);
+    auto [cc, nn] = dot2(cp, cp, newtonP, newtonP);
     double tt = trSize * trSize;
 
-    auto update_start = Clock::now();
     s = 0.0;
-    vector_copy_scale_seconds += secondsSince(update_start);
     if (cc >= tt) {
-      update_start = Clock::now();
       add(s, std::sqrt(tt / cc), cp, s);
-      vector_update_seconds += secondsSince(update_start);
-    } else if (cc > nn) {
+      } else if (cc > nn) {
       if (print_level >= 2) {
         mfem::out << "cp outside newton, preconditioner likely inaccurate\n";
       }
-      update_start = Clock::now();
       add(s, 1.0, cp, s);
-      vector_update_seconds += secondsSince(update_start);
-    } else if (nn > tt) {  // on the dogleg (we have nn >= cc, and tt >= cc)
-      update_start = Clock::now();
+      } else if (nn > tt) {  // on the dogleg (we have nn >= cc, and tt >= cc)
       add(s, 1.0, cp, s);
-      vector_update_seconds += secondsSince(update_start);
-      double cn = timedDot(cp, newtonP, num_dogleg_dot_products, dogleg_dot_seconds);
+        double cn = Dot(cp, newtonP);
       projectToBoundaryBetweenWithCoefs(s, newtonP, trSize, cc, cn, nn);
     } else {
-      update_start = Clock::now();
       s = newtonP;
-      vector_copy_scale_seconds += secondsSince(update_start);
-    }
+      }
   }
 
   /// compute the energy of the linearized system for a given solution vector z
@@ -806,11 +603,11 @@ class TrustRegion : public mfem::NewtonSolver {
   double computeEnergy(const mfem::Vector& r_local, const HessVecFunc& H, const mfem::Vector& z) const
   {
     SMITH_MARK_FUNCTION;
-    double rz = timedDot(r_local, z, num_line_search_dot_products, line_search_dot_seconds);
+    double rz = Dot(r_local, z);
     mfem::Vector tmp(r_local);
     tmp = 0.0;
     H(z, tmp);
-    return rz + 0.5 * timedDot(z, tmp, num_line_search_dot_products, line_search_dot_seconds);
+    return rz + 0.5 * Dot(z, tmp);
   }
 
   /// Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner
@@ -840,36 +637,30 @@ class TrustRegion : public mfem::NewtonSolver {
       return;
     }
 
-    auto copy_start = Clock::now();
     rCurrent = r0;
-    vector_copy_scale_seconds += secondsSince(copy_start);
     precond(rCurrent, Pr);
 
     // d = -Pr
-    copy_start = Clock::now();
     d = Pr;
     d *= -1.0;
 
     z = 0.0;
-    vector_copy_scale_seconds += secondsSince(copy_start);
     double zz = 0.;
-    double rPr = timedDot(rCurrent, Pr, num_model_dot_products, model_dot_seconds);
+    double rPr = Dot(rCurrent, Pr);
 
     // std::cout << "initial energy = " << computeEnergy(r0, hess_vec_func, z) << std::endl;
 
     for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) {
       hess_vec_func(d, Hd);
-      const auto dots = timedDot4(d, rCurrent, d, Hd, z, d, d, d, num_model_dot_products, model_dot_seconds);
+      const auto dots = dot4(d, rCurrent, d, Hd, z, d, d, d);
       double descent_check = dots.v0;
       double curvature = dots.v1;
       double zd = dots.v2;
       double dd = dots.v3;
       if (descent_check > 0) {
-        copy_start = Clock::now();
         d *= -1;
         Hd *= -1;
-        vector_copy_scale_seconds += secondsSince(copy_start);
-        results.interior_status = TrustRegionResults::Status::NonDescentDirection;
+            results.interior_status = TrustRegionResults::Status::NonDescentDirection;
         descent_check *= -1.0;
         curvature *= -1.0;
         zd *= -1.0;
@@ -892,14 +683,10 @@ class TrustRegion : public mfem::NewtonSolver {
       auto& zPred = Pr;  // re-use Pr memory.
                          // This predicted step will no longer be used by the time Pr is, so we can avoid an extra
                          // vector floating around
-      auto update_start = Clock::now();
-      add(z, alphaCg, d, zPred);
-      vector_update_seconds += secondsSince(update_start);
-
-      copy_start = Clock::now();
+        add(z, alphaCg, d, zPred);
+  
       z = zPred;
-      vector_copy_scale_seconds += secondsSince(copy_start);
-
+  
       if (results.interior_status == TrustRegionResults::Status::NonDescentDirection) {
         if (print_level >= 2) {
           mfem::out << "Found a non descent direction\n";
@@ -907,23 +694,19 @@ class TrustRegion : public mfem::NewtonSolver {
         return;
       }
 
-      update_start = Clock::now();
       add(rCurrent, alphaCg, Hd, rCurrent);
-      vector_update_seconds += secondsSince(update_start);
-
+  
       precond(rCurrent, Pr);
       auto [rPrNp1, r_current_norm_squared] =
-          timedDot2(rCurrent, Pr, rCurrent, rCurrent, num_model_dot_products, model_dot_seconds);
+          dot2(rCurrent, Pr, rCurrent, rCurrent);
       if (r_current_norm_squared <= cg_tol_squared && cgIter >= settings.min_cg_iterations) {
         return;
       }
 
       double beta = rPrNp1 / rPr;
       rPr = rPrNp1;
-      update_start = Clock::now();
       add(-1.0, Pr, beta, d, d);
-      vector_update_seconds += secondsSince(update_start);
-
+  
       zz = zzNp1;
     }
     cgIter--;  // if all cg iterations are taken, correct for output
@@ -947,185 +730,46 @@ class TrustRegion : public mfem::NewtonSolver {
   void assembleJacobian(const mfem::Vector& x) const
   {
     SMITH_MARK_FUNCTION;
-    auto start = Clock::now();
-    ++num_jacobian_assembles;
     grad = &oper->GetGradient(x);
     if (nonlinear_options.force_monolithic) {
       auto* grad_blocked = dynamic_cast<mfem::BlockOperator*>(grad);
       if (grad_blocked) grad = buildMonolithicMatrix(*grad_blocked).release();
     }
-    jacobian_assembly_seconds += secondsSince(start);
   }
 
   /// evaluate the nonlinear residual
   mfem::real_t computeResidual(const mfem::Vector& x_, mfem::Vector& r_) const
   {
     SMITH_MARK_FUNCTION;
-    auto start = Clock::now();
-    ++num_residuals;
     oper->Mult(x_, r_);
-    const auto norm = Norm(r_);
-    residual_seconds += secondsSince(start);
-    return norm;
+    return Norm(r_);
   }
 
   /// apply the action of the current Jacobian representation to a vector
   void hessVec(const mfem::Vector& x_, mfem::Vector& v_) const
   {
     SMITH_MARK_FUNCTION;
-    auto start = Clock::now();
-    ++num_hess_vecs;
     grad->Mult(x_, v_);
-    hess_vec_seconds += secondsSince(start);
   }
 
   /// apply trust region specific preconditioner
   void precond(const mfem::Vector& x_, mfem::Vector& v_) const
   {
     SMITH_MARK_FUNCTION;
-    auto start = Clock::now();
-    ++num_preconds;
     tr_precond.Mult(x_, v_);
-    preconditioner_seconds += secondsSince(start);
   };
 
-  /// Return solver diagnostic counters.
-  TrustRegionDiagnostics diagnostics() const
-  {
-    return {.num_residuals = num_residuals,
-            .num_hess_vecs = num_hess_vecs,
-            .num_model_hess_vecs = num_model_hess_vecs,
-            .num_cauchy_hess_vecs = num_cauchy_hess_vecs,
-            .num_line_search_hess_vecs = num_line_search_hess_vecs,
-            .num_preconds = num_preconds,
-            .num_jacobian_assembles = num_jacobian_assembles,
-            .num_cg_iterations = num_cg_iterations,
-            .num_subspace_solves = num_subspace_solves,
-            .num_subspace_leftmost_hess_vecs = num_subspace_leftmost_hess_vecs,
-            .num_subspace_hess_vec_batches = num_subspace_hess_vec_batches,
-            .num_subspace_batched_hess_vecs = num_subspace_batched_hess_vecs,
-            .num_subspace_past_step_vectors = num_subspace_past_step_vectors,
-            .num_subspace_past_step_hess_vecs = num_subspace_past_step_hess_vecs,
-            .num_subspace_solve_start_vectors = num_subspace_solve_start_vectors,
-            .num_subspace_solve_start_hess_vecs = num_subspace_solve_start_hess_vecs,
-            .num_quadratic_subspace_solves = num_quadratic_subspace_solves,
-            .num_preconditioner_updates = num_preconditioner_updates,
-            .residual_seconds = residual_seconds,
-            .hess_vec_seconds = hess_vec_seconds,
-            .model_hess_vec_seconds = model_hess_vec_seconds,
-            .cauchy_hess_vec_seconds = cauchy_hess_vec_seconds,
-            .line_search_hess_vec_seconds = line_search_hess_vec_seconds,
-            .preconditioner_seconds = preconditioner_seconds,
-            .total_seconds = total_seconds,
-            .model_solve_seconds = model_solve_seconds,
-            .subspace_seconds = subspace_seconds,
-            .subspace_leftmost_seconds = subspace_leftmost_seconds,
-            .subspace_hess_vec_batch_seconds = subspace_hess_vec_batch_seconds,
-            .subspace_filter_seconds = subspace_filter_seconds,
-            .subspace_backend_seconds = subspace_backend_seconds,
-            .subspace_project_A_seconds = trustRegionSubspaceTimings().project_A_seconds,
-            .subspace_project_gram_seconds = trustRegionSubspaceTimings().project_gram_seconds,
-            .subspace_project_b_seconds = trustRegionSubspaceTimings().project_b_seconds,
-            .subspace_basis_seconds = trustRegionSubspaceTimings().basis_seconds,
-            .subspace_reduced_A_seconds = trustRegionSubspaceTimings().reduced_A_seconds,
-            .subspace_dense_eigensystem_seconds = trustRegionSubspaceTimings().dense_eigensystem_seconds,
-            .subspace_dense_trust_solve_seconds = trustRegionSubspaceTimings().dense_trust_solve_seconds,
-            .subspace_reconstruct_solution_seconds = trustRegionSubspaceTimings().reconstruct_solution_seconds,
-            .subspace_reconstruct_leftmost_seconds = trustRegionSubspaceTimings().reconstruct_leftmost_seconds,
-            .subspace_finalize_seconds = subspace_finalize_seconds,
-            .cauchy_point_seconds = cauchy_point_seconds,
-            .dogleg_seconds = dogleg_seconds,
-            .line_search_seconds = line_search_seconds,
-            .dot_seconds = dot_seconds,
-            .num_dot_products = num_dot_products,
-            .num_dot_reductions = num_dot_reductions,
-            .num_model_dot_products = num_model_dot_products,
-            .num_cauchy_dot_products = num_cauchy_dot_products,
-            .num_dogleg_dot_products = num_dogleg_dot_products,
-            .num_line_search_dot_products = num_line_search_dot_products,
-            .num_setup_dot_products = num_setup_dot_products,
-            .model_dot_seconds = model_dot_seconds,
-            .cauchy_dot_seconds = cauchy_dot_seconds,
-            .dogleg_dot_seconds = dogleg_dot_seconds,
-            .line_search_dot_seconds = line_search_dot_seconds,
-            .setup_dot_seconds = setup_dot_seconds,
-            .vector_update_seconds = vector_update_seconds,
-            .vector_copy_scale_seconds = vector_copy_scale_seconds,
-            .projection_seconds = projection_seconds,
-            .jacobian_assembly_seconds = jacobian_assembly_seconds,
-            .preconditioner_update_seconds = preconditioner_update_seconds,
-            .preconditioner_setup_seconds = preconditioner_setup_seconds};
-  }
-
   /// @overload
   void Mult(const mfem::Vector&, mfem::Vector& X) const
   {
     MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator).");
     MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver).");
-    auto total_start = Clock::now();
-
     print_level = static_cast<size_t>(std::max(nonlinear_options.print_level, 0));
     print_level = print_options.iterations ? std::max<size_t>(1, print_level) : print_level;
     print_level = print_options.summary ? std::max<size_t>(2, print_level) : print_level;
 
     using real_t = mfem::real_t;
 
-    num_hess_vecs = 0;
-    num_model_hess_vecs = 0;
-    num_cauchy_hess_vecs = 0;
-    num_line_search_hess_vecs = 0;
-    num_preconds = 0;
-    num_residuals = 0;
-    num_subspace_solves = 0;
-    num_subspace_leftmost_hess_vecs = 0;
-    num_subspace_hess_vec_batches = 0;
-    num_subspace_batched_hess_vecs = 0;
-    num_subspace_past_step_vectors = 0;
-    num_subspace_past_step_hess_vecs = 0;
-    num_subspace_solve_start_vectors = 0;
-    num_subspace_solve_start_hess_vecs = 0;
-    num_quadratic_subspace_solves = 0;
-    num_jacobian_assembles = 0;
-    num_cg_iterations = 0;
-    num_preconditioner_updates = 0;
-    residual_seconds = 0.0;
-    hess_vec_seconds = 0.0;
-    model_hess_vec_seconds = 0.0;
-    cauchy_hess_vec_seconds = 0.0;
-    line_search_hess_vec_seconds = 0.0;
-    preconditioner_seconds = 0.0;
-    total_seconds = 0.0;
-    model_solve_seconds = 0.0;
-    subspace_seconds = 0.0;
-    subspace_leftmost_seconds = 0.0;
-    subspace_hess_vec_batch_seconds = 0.0;
-    subspace_filter_seconds = 0.0;
-    subspace_backend_seconds = 0.0;
-    subspace_finalize_seconds = 0.0;
-    cauchy_point_seconds = 0.0;
-    dogleg_seconds = 0.0;
-    line_search_seconds = 0.0;
-    dot_seconds = 0.0;
-    num_dot_products = 0;
-    num_dot_reductions = 0;
-    num_model_dot_products = 0;
-    num_cauchy_dot_products = 0;
-    num_dogleg_dot_products = 0;
-    num_line_search_dot_products = 0;
-    num_setup_dot_products = 0;
-    model_dot_seconds = 0.0;
-    cauchy_dot_seconds = 0.0;
-    dogleg_dot_seconds = 0.0;
-    line_search_dot_seconds = 0.0;
-    setup_dot_seconds = 0.0;
-    vector_update_seconds = 0.0;
-    vector_copy_scale_seconds = 0.0;
-    projection_seconds = 0.0;
-    jacobian_assembly_seconds = 0.0;
-    preconditioner_update_seconds = 0.0;
-    preconditioner_setup_seconds = 0.0;
-    accepted_step_history.clear();
-    resetTrustRegionSubspaceTimings();
     solve_start_x.SetSize(X.Size());
     solve_start_x = X;
     min_residual_x.SetSize(X.Size());
@@ -1163,11 +807,9 @@ class TrustRegion : public mfem::NewtonSolver {
     int subspace_option = nonlinear_options.subspace_option;
     int num_leftmost = nonlinear_options.num_leftmost;
 
-    auto copy_start = Clock::now();
     scratch = 1.0;
-    vector_copy_scale_seconds += secondsSince(copy_start);
     double tr_size = nonlinear_options.trust_region_scaling *
-                     std::sqrt(timedDot(scratch, scratch, num_setup_dot_products, setup_dot_seconds));
+                     std::sqrt(Dot(scratch, scratch));
     size_t cumulative_cg_iters_from_last_precond_update = 0;
 
     int it = 0;
@@ -1202,12 +844,7 @@ class TrustRegion : public mfem::NewtonSolver {
 
       if (it == 0 || (trResults.cg_iterations_count >= settings.max_cg_iterations ||
                       cumulative_cg_iters_from_last_precond_update >= settings.max_cumulative_iteration)) {
-        auto preconditioner_update_start = Clock::now();
-        auto preconditioner_setup_start = Clock::now();
         tr_precond.SetOperator(*grad);
-        preconditioner_setup_seconds += secondsSince(preconditioner_setup_start);
-        preconditioner_update_seconds += secondsSince(preconditioner_update_start);
-        ++num_preconditioner_updates;
         cumulative_cg_iters_from_last_precond_update = 0;
       }
 
@@ -1218,28 +855,22 @@ class TrustRegion : public mfem::NewtonSolver {
       trResults.reset();
 
       {
-        auto cauchy_start = Clock::now();
-        timedCauchyHessVec(hess_vec_func, r, trResults.H_d);
-        const double gKg = timedDot(r, trResults.H_d, num_cauchy_dot_products, cauchy_dot_seconds);
+        hess_vec_func(r, trResults.H_d);
+        const double gKg = Dot(r, trResults.H_d);
         const double residual_norm_squared = norm * norm;
         if (gKg > 0) {
           const double alphaCp = -residual_norm_squared / gKg;
-          auto update_start = Clock::now();
-          add(trResults.cauchy_point, alphaCp, r, trResults.cauchy_point);
-          vector_update_seconds += secondsSince(update_start);
-          cauchyPointNormSquared =
-              timedDot(trResults.cauchy_point, trResults.cauchy_point, num_cauchy_dot_products, cauchy_dot_seconds);
+                add(trResults.cauchy_point, alphaCp, r, trResults.cauchy_point);
+                cauchyPointNormSquared =
+              Dot(trResults.cauchy_point, trResults.cauchy_point);
         } else {
           const double alphaTr = -tr_size / norm;
-          auto update_start = Clock::now();
-          add(trResults.cauchy_point, alphaTr, r, trResults.cauchy_point);
-          vector_update_seconds += secondsSince(update_start);
-          if (print_level >= 2) {
+                add(trResults.cauchy_point, alphaTr, r, trResults.cauchy_point);
+                if (print_level >= 2) {
             mfem::out << "Negative curvature un-preconditioned cauchy point direction found."
                       << "\n";
           }
         }
-        cauchy_point_seconds += secondsSince(cauchy_start);
       }
 
       if (cauchyPointNormSquared >= tr_size * tr_size) {
@@ -1254,16 +885,10 @@ class TrustRegion : public mfem::NewtonSolver {
         trResults.interior_status = TrustRegionResults::Status::OnBoundary;
       } else {
         settings.cg_tol = std::max(0.5 * norm_goal, 5e-5 * norm);
-        auto model_start = Clock::now();
-        auto model_hess_vec_func = [&](const mfem::Vector& x_, mfem::Vector& v_) {
-          timedModelHessVec(hess_vec_func, x_, v_);
-        };
-        solveTrustRegionModelProblem(r, scratch, model_hess_vec_func, precond_func, settings, tr_size, trResults,
+        solveTrustRegionModelProblem(r, scratch, hess_vec_func, precond_func, settings, tr_size, trResults,
                                      norm * norm);
-        model_solve_seconds += secondsSince(model_start);
       }
       cumulative_cg_iters_from_last_precond_update += trResults.cg_iterations_count;
-      num_cg_iterations += trResults.cg_iterations_count;
 
       bool have_computed_Hvs = false;
       bool have_computed_H_left_mosts = false;
@@ -1271,17 +896,13 @@ class TrustRegion : public mfem::NewtonSolver {
 
       int lineSearchIter = 0;
       while (lineSearchIter <= nonlinear_options.max_line_search_iterations) {
-        auto line_search_start = Clock::now();
         ++lineSearchIter;
 
-        auto dogleg_start = Clock::now();
         doglegStep(trResults.cauchy_point, trResults.z, tr_size, trResults.d);
-        dogleg_seconds += secondsSince(dogleg_start);
-
         const bool check_subspace_boundary = subspace_option >= 1;
         const double d_norm =
             check_subspace_boundary
-                ? std::sqrt(timedDot(trResults.d, trResults.d, num_line_search_dot_products, line_search_dot_seconds))
+                ? std::sqrt(Dot(trResults.d, trResults.d))
                 : 0.0;
         bool use_with_option1 =
             (subspace_option >= 1) && (trResults.interior_status == TrustRegionResults::Status::NonDescentDirection ||
@@ -1306,7 +927,6 @@ class TrustRegion : public mfem::NewtonSolver {
 
           if (!have_computed_H_left_mosts) {
             have_computed_H_left_mosts = true;
-            auto leftmost_start = Clock::now();
             previous_H_left_mosts = H_left_mosts;
             H_left_mosts.clear();
             std::vector<const mfem::Vector*> leftmost_inputs;
@@ -1315,9 +935,7 @@ class TrustRegion : public mfem::NewtonSolver {
               H_left_mosts.emplace_back(std::make_shared<mfem::Vector>(*left));
               leftmost_inputs.push_back(left.get());
               leftmost_outputs.push_back(H_left_mosts.back().get());
-              ++num_subspace_leftmost_hess_vecs;
             }
-            subspace_leftmost_seconds += secondsSince(leftmost_start);
             batchedSubspaceHessVec(hess_vec_func, leftmost_inputs, leftmost_outputs);
           }
 
@@ -1344,8 +962,6 @@ class TrustRegion : public mfem::NewtonSolver {
             past_step_outputs.push_back(&H_past_steps.back());
           }
           if (!past_step_inputs.empty()) {
-            num_subspace_past_step_vectors += past_step_inputs.size();
-            num_subspace_past_step_hess_vecs += past_step_inputs.size();
             batchedSubspaceHessVec(hess_vec_func, past_step_inputs, past_step_outputs);
             for (size_t i = 0; i < past_step_inputs.size(); ++i) {
               ds.push_back(past_step_inputs[i]);
@@ -1362,8 +978,6 @@ class TrustRegion : public mfem::NewtonSolver {
               H_solve_start_direction.SetSize(X.Size());
               std::vector<const mfem::Vector*> solve_start_inputs{&solve_start_direction};
               std::vector<mfem::Vector*> solve_start_outputs{&H_solve_start_direction};
-              ++num_subspace_solve_start_vectors;
-              ++num_subspace_solve_start_hess_vecs;
               batchedSubspaceHessVec(hess_vec_func, solve_start_inputs, solve_start_outputs);
               ds.push_back(&solve_start_direction);
               H_ds.push_back(&H_solve_start_direction);
@@ -1380,8 +994,6 @@ class TrustRegion : public mfem::NewtonSolver {
               std::vector<const mfem::Vector*> min_res_inputs{&min_residual_direction};
               std::vector<mfem::Vector*> min_res_outputs{&H_min_residual_direction};
               // Reusing solve_start counters for now
-              ++num_subspace_solve_start_vectors;
-              ++num_subspace_solve_start_hess_vecs;
               batchedSubspaceHessVec(hess_vec_func, min_res_inputs, min_res_outputs);
               ds.push_back(&min_residual_direction);
               H_ds.push_back(&H_min_residual_direction);
@@ -1392,15 +1004,12 @@ class TrustRegion : public mfem::NewtonSolver {
 
         static constexpr double roundOffTol = 0.0;  // 1e-14;
 
-        timedLineSearchHessVec(hess_vec_func, trResults.d, trResults.H_d);
-        const auto [dHd, rd] = timedDot2(trResults.d, trResults.H_d, r, trResults.d, num_line_search_dot_products,
-                                         line_search_dot_seconds);
+        hess_vec_func(trResults.d, trResults.H_d);
+        const auto [dHd, rd] = dot2(trResults.d, trResults.H_d, r, trResults.d);
         double modelObjective = rd + 0.5 * dHd - roundOffTol;
 
-        auto update_start = Clock::now();
-        add(X, trResults.d, x_pred);
-        vector_update_seconds += secondsSince(update_start);
-
+            add(X, trResults.d, x_pred);
+    
         double realObjective = std::numeric_limits<double>::max();
         double normPred = std::numeric_limits<double>::max();
         try {
@@ -1410,7 +1019,7 @@ class TrustRegion : public mfem::NewtonSolver {
             min_residual_x = x_pred;
           }
           double obj1 =
-              0.5 * (rd + timedDot(r_pred, trResults.d, num_line_search_dot_products, line_search_dot_seconds)) -
+              0.5 * (rd + Dot(r_pred, trResults.d)) -
               roundOffTol;
           realObjective = obj1;
         } catch (const std::exception&) {
@@ -1426,12 +1035,9 @@ class TrustRegion : public mfem::NewtonSolver {
           if (!candidate_left_mosts.empty()) {
             left_mosts = std::move(candidate_left_mosts);
           }
-          copy_start = Clock::now();
           X = x_pred;
           r = r_pred;
-          vector_copy_scale_seconds += secondsSince(copy_start);
-          norm = normPred;
-          line_search_seconds += secondsSince(line_search_start);
+                norm = normPred;
           if (print_level >= 2) {
             printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, true);
             trResults.cg_iterations_count =
@@ -1486,15 +1092,11 @@ class TrustRegion : public mfem::NewtonSolver {
           if (!candidate_left_mosts.empty()) {
             left_mosts = std::move(candidate_left_mosts);
           }
-          copy_start = Clock::now();
           X = x_pred;
           r = r_pred;
-          vector_copy_scale_seconds += secondsSince(copy_start);
-          norm = normPred;
-          line_search_seconds += secondsSince(line_search_start);
+                norm = normPred;
           break;
         }
-        line_search_seconds += secondsSince(line_search_start);
       }
     }
 
@@ -1508,15 +1110,6 @@ class TrustRegion : public mfem::NewtonSolver {
     if (!converged && print_level >= 1) {  // (print_options.summary || print_options.warnings)) {
       mfem::out << "TrustRegion: No convergence!\n";
     }
-
-    if (false && print_level >= 2) {
-      mfem::out << "num hess vecs = " << num_hess_vecs << "\n";
-      mfem::out << "num preconds = " << num_preconds << "\n";
-      mfem::out << "num residuals = " << num_residuals << "\n";
-      mfem::out << "num subspace solves = " << num_subspace_solves << "\n";
-      mfem::out << "num jacobian_assembles = " << num_jacobian_assembles << "\n";
-    }
-    total_seconds = secondsSince(total_start);
   }
 };
 
@@ -1562,15 +1155,6 @@ void EquationSolver::solve(mfem::Vector& x) const
   nonlin_solver_->Mult(zero, x);
 }
 
-std::optional<TrustRegionDiagnostics> EquationSolver::trustRegionDiagnostics() const
-{
-  auto* trust_region = dynamic_cast<const TrustRegion*>(nonlin_solver_.get());
-  if (!trust_region) {
-    return std::nullopt;
-  }
-  return trust_region->diagnostics();
-}
-
 void SuperLUSolver::Mult(const mfem::Vector& input, mfem::Vector& output) const
 {
   SLIC_ERROR_ROOT_IF(!superlu_mat_, "Operator must be set prior to solving with SuperLU");
diff --git a/src/smith/numerics/equation_solver.hpp b/src/smith/numerics/equation_solver.hpp
index 3ddf35edef..a1fb36a788 100644
--- a/src/smith/numerics/equation_solver.hpp
+++ b/src/smith/numerics/equation_solver.hpp
@@ -28,136 +28,6 @@
 
 namespace smith {
 
-/// Diagnostic counters for the TrustRegion nonlinear solver
-struct TrustRegionDiagnostics {
-  /// Number of nonlinear residual evaluations
-  size_t num_residuals = 0;
-  /// Number of Jacobian-vector products
-  size_t num_hess_vecs = 0;
-  /// Number of Hessian-vector products in model CG solves
-  size_t num_model_hess_vecs = 0;
-  /// Number of Hessian-vector products in Cauchy-point construction
-  size_t num_cauchy_hess_vecs = 0;
-  /// Number of Hessian-vector products in line-search model checks
-  size_t num_line_search_hess_vecs = 0;
-  /// Number of preconditioner applications
-  size_t num_preconds = 0;
-  /// Number of assembled Jacobians
-  size_t num_jacobian_assembles = 0;
-  /// Number of trust-region model CG iterations
-  size_t num_cg_iterations = 0;
-  /// Number of subspace solves
-  size_t num_subspace_solves = 0;
-  /// Number of retained-leftmost Hessian-vector products for subspace solves
-  size_t num_subspace_leftmost_hess_vecs = 0;
-  /// Number of batched Hessian-vector groups used for subspace solves
-  size_t num_subspace_hess_vec_batches = 0;
-  /// Number of Hessian-vector products inside subspace batches
-  size_t num_subspace_batched_hess_vecs = 0;
-  /// Number of accepted-step history vectors added to subspace solves
-  size_t num_subspace_past_step_vectors = 0;
-  /// Number of Hessian-vector products for accepted-step history vectors
-  size_t num_subspace_past_step_hess_vecs = 0;
-  /// Number of nonlinear-solve-start directions added to subspace solves
-  size_t num_subspace_solve_start_vectors = 0;
-  /// Number of Hessian-vector products for nonlinear-solve-start directions
-  size_t num_subspace_solve_start_hess_vecs = 0;
-  /// Number of quadratic subspace backend solves
-  size_t num_quadratic_subspace_solves = 0;
-  /// Number of preconditioner operator updates
-  size_t num_preconditioner_updates = 0;
-  /// Time spent evaluating nonlinear residuals
-  double residual_seconds = 0.0;
-  /// Time spent applying Jacobian-vector products
-  double hess_vec_seconds = 0.0;
-  /// Time spent applying Hessian-vector products in model CG solves
-  double model_hess_vec_seconds = 0.0;
-  /// Time spent applying Hessian-vector products in Cauchy-point construction
-  double cauchy_hess_vec_seconds = 0.0;
-  /// Time spent applying Hessian-vector products in line-search model checks
-  double line_search_hess_vec_seconds = 0.0;
-  /// Time spent applying preconditioners
-  double preconditioner_seconds = 0.0;
-  /// Total time spent in the nonlinear solve
-  double total_seconds = 0.0;
-  /// Time spent solving trust-region model problems
-  double model_solve_seconds = 0.0;
-  /// Total time spent in trust-region subspace solves
-  double subspace_seconds = 0.0;
-  /// Time spent building/applying retained leftmost directions for subspace solves
-  double subspace_leftmost_seconds = 0.0;
-  /// Time spent in subspace Hessian-vector batches
-  double subspace_hess_vec_batch_seconds = 0.0;
-  /// Time spent removing dependent directions before subspace solves
-  double subspace_filter_seconds = 0.0;
-  /// Time spent in dense subspace backend assembly/solve work
-  double subspace_backend_seconds = 0.0;
-  /// Time spent projecting dense subspace Hessian
-  double subspace_project_A_seconds = 0.0;
-  /// Time spent projecting dense subspace Gram matrix
-  double subspace_project_gram_seconds = 0.0;
-  /// Time spent projecting dense subspace gradient
-  double subspace_project_b_seconds = 0.0;
-  /// Time spent building dense subspace orthonormal basis
-  double subspace_basis_seconds = 0.0;
-  /// Time spent forming reduced dense Hessian
-  double subspace_reduced_A_seconds = 0.0;
-  /// Time spent in dense subspace eigensystems
-  double subspace_dense_eigensystem_seconds = 0.0;
-  /// Time spent in dense trust-region solve outside eigensystems
-  double subspace_dense_trust_solve_seconds = 0.0;
-  /// Time spent reconstructing full-space subspace solution
-  double subspace_reconstruct_solution_seconds = 0.0;
-  /// Time spent reconstructing retained leftmost vectors
-  double subspace_reconstruct_leftmost_seconds = 0.0;
-  /// Time spent in subspace postprocessing and model-energy comparison
-  double subspace_finalize_seconds = 0.0;
-  /// Time spent building the Cauchy point
-  double cauchy_point_seconds = 0.0;
-  /// Time spent in dogleg step construction
-  double dogleg_seconds = 0.0;
-  /// Time spent in line-search and trust-radius acceptance logic
-  double line_search_seconds = 0.0;
-  /// Time spent in TrustRegion dot products
-  double dot_seconds = 0.0;
-  /// Number of TrustRegion dot products
-  size_t num_dot_products = 0;
-  /// Number of TrustRegion dot batches/reductions
-  size_t num_dot_reductions = 0;
-  /// Number of dot products in trust-region model solves
-  size_t num_model_dot_products = 0;
-  /// Number of dot products in Cauchy-point construction
-  size_t num_cauchy_dot_products = 0;
-  /// Number of dot products in dogleg construction
-  size_t num_dogleg_dot_products = 0;
-  /// Number of dot products in line-search and acceptance logic
-  size_t num_line_search_dot_products = 0;
-  /// Number of setup dot products outside the main per-step kernels
-  size_t num_setup_dot_products = 0;
-  /// Time spent in trust-region model-solve dot products
-  double model_dot_seconds = 0.0;
-  /// Time spent in Cauchy-point dot products
-  double cauchy_dot_seconds = 0.0;
-  /// Time spent in dogleg dot products
-  double dogleg_dot_seconds = 0.0;
-  /// Time spent in line-search dot products
-  double line_search_dot_seconds = 0.0;
-  /// Time spent in setup dot products
-  double setup_dot_seconds = 0.0;
-  /// Time spent in TrustRegion vector add/update operations
-  double vector_update_seconds = 0.0;
-  /// Time spent in TrustRegion vector copies and scaling operations
-  double vector_copy_scale_seconds = 0.0;
-  /// Time spent in TrustRegion boundary projection operations
-  double projection_seconds = 0.0;
-  /// Time spent assembling sparse Jacobians
-  double jacobian_assembly_seconds = 0.0;
-  /// Time spent refreshing preconditioner data
-  double preconditioner_update_seconds = 0.0;
-  /// Time spent in preconditioner SetOperator calls
-  double preconditioner_setup_seconds = 0.0;
-};
-
 /**
  * @brief This class manages the objects typically required to solve a nonlinear set of equations arising from
  * discretization of a PDE of the form F(x) = 0. Specifically, it has
@@ -226,12 +96,6 @@ class EquationSolver {
    */
   const mfem::NewtonSolver& nonlinearSolver() const { return *nonlin_solver_; }
 
-  /**
-   * Returns diagnostic counters when the nonlinear solver is TrustRegion.
-   * @return Optional TrustRegion diagnostics; empty for other nonlinear solvers
-   */
-  std::optional<TrustRegionDiagnostics> trustRegionDiagnostics() const;
-
   /**
    * Returns the underlying linear solver object
    * @return A non-owning reference to the underlying linear solver
diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp
index 268f832703..19d1e9c147 100644
--- a/src/smith/numerics/mfem_trust_region_subspace.cpp
+++ b/src/smith/numerics/mfem_trust_region_subspace.cpp
@@ -7,7 +7,6 @@
 #include "smith/numerics/trust_region_solver.hpp"
 
 #include <algorithm>
-#include <chrono>
 #include <cmath>
 #include <vector>
 
@@ -17,31 +16,8 @@ namespace smith {
 
 namespace {
 
-using Clock = std::chrono::steady_clock;
-
-double secondsSince(Clock::time_point start)
-{
-  return std::chrono::duration_cast<std::chrono::duration<double>>(Clock::now() - start).count();
-}
-
-TrustRegionSubspaceTimings& mutableTrustRegionSubspaceTimings()
-{
-  static TrustRegionSubspaceTimings timings;
-  return timings;
-}
-
 }  // namespace
 
-void resetTrustRegionSubspaceTimings()
-{
-  mutableTrustRegionSubspaceTimings() = TrustRegionSubspaceTimings {};
-}
-
-TrustRegionSubspaceTimings trustRegionSubspaceTimings()
-{
-  return mutableTrustRegionSubspaceTimings();
-}
-
 int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm)
 {
   int local_size = parallel_v.Size();
@@ -284,7 +260,6 @@ mfem::DenseMatrix columnsToMatrix(const std::vector<mfem::Vector>& cols)
 std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> exactTrustRegionSolve(
     mfem::DenseMatrix A, const mfem::Vector& b, double delta, int num_leftmost)
 {
-  auto dense_solve_start = Clock::now();
   if (A.Height() != A.Width()) {
     throw PetscException("Exact trust region solver requires square matrices");
   }
@@ -294,10 +269,7 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
 
   mfem::Vector sigs;
   mfem::DenseMatrix V;
-  auto eig_start = Clock::now();
   A.Eigensystem(sigs, V);
-  mutableTrustRegionSubspaceTimings().dense_eigensystem_seconds += secondsSince(eig_start);
-
   std::vector<mfem::Vector> leftmosts;
   std::vector<double> minsigs;
   const int num_leftmost_possible = std::min(num_leftmost, sigs.Size());
@@ -320,7 +292,6 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
   const double eps = 1e-12 * sigScale;
 
   if ((minSig >= eps) && (norm(bvOverSigs) <= delta)) {
-    mutableTrustRegionSubspaceTimings().dense_trust_solve_seconds += secondsSince(dense_solve_start);
     return std::make_tuple(solveDense(A, b), leftmosts, minsigs, true);
   }
 
@@ -351,7 +322,6 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
     const double e1 = quadraticEnergy(A, b, x1);
     const double e2 = quadraticEnergy(A, b, x2);
 
-    mutableTrustRegionSubspaceTimings().dense_trust_solve_seconds += secondsSince(dense_solve_start);
     return std::make_tuple(e1 < e2 ? x1 : x2, leftmosts, minsigs, true);
   }
 
@@ -391,7 +361,6 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
 
   x *= (e2 < e1 ? -delta : delta) / norm(x);
 
-  mutableTrustRegionSubspaceTimings().dense_trust_solve_seconds += secondsSince(dense_solve_start);
   return std::make_tuple(x, leftmosts, minsigs, success);
 }
 
@@ -452,15 +421,8 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem:
                                                    const mfem::Vector& b, double delta, int num_leftmost)
 {
   SMITH_MARK_FUNCTION;
-  auto& timings = mutableTrustRegionSubspaceTimings();
-  ++timings.num_solves;
-  timings.total_input_dim += states.size();
-  timings.max_input_dim = std::max(timings.max_input_dim, states.size());
-
-  auto project_A_start = Clock::now();
   SubspaceProjections projections = denseSubspaceProjections(states, Astates, b);
   mfem::DenseMatrix& sAs = projections.sAs;
-  timings.project_A_seconds += secondsSince(project_A_start);
   symmetrize(sAs);
 
   for (int i = 0; i < sAs.Height(); ++i) {
@@ -471,50 +433,33 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem:
     }
   }
 
-  auto project_gram_start = Clock::now();
   mfem::DenseMatrix& ss = projections.ss;
-  timings.project_gram_seconds += secondsSince(project_gram_start);
   symmetrize(ss);
 
   double trace_mag = 0.0;
-  auto basis_start = Clock::now();
   mfem::DenseMatrix T = orthonormalBasisTransform(ss, trace_mag);
-  timings.basis_seconds += secondsSince(basis_start);
   if (T.Width() == 0) {
     throw PetscException("No independent directions in MFEM subspace solve.");
   }
-  timings.total_reduced_dim += static_cast<size_t>(T.Width());
-  timings.max_reduced_dim = std::max(timings.max_reduced_dim, static_cast<size_t>(T.Width()));
-
-  auto reduced_A_start = Clock::now();
   mfem::DenseMatrix pAp = tripleProduct(T, sAs, T);
-  timings.reduced_A_seconds += secondsSince(reduced_A_start);
   symmetrize(pAp);
 
-  auto project_b_start = Clock::now();
   const mfem::Vector& sb = projections.sb;
-  timings.project_b_seconds += secondsSince(project_b_start);
   const mfem::Vector pb = projectWithTranspose(T, sb);
 
   auto [reduced_x, leftvecs, leftvals, success] = exactTrustRegionSolve(pAp, pb, delta, num_leftmost);
   (void)success;
   const double energy = quadraticEnergy(pAp, pb, reduced_x);
 
-  auto reconstruct_solution_start = Clock::now();
   mfem::Vector coeffs(T.Height());
   T.Mult(reduced_x, coeffs);
   mfem::Vector sol = combineDirections(states, coeffs);
-  timings.reconstruct_solution_seconds += secondsSince(reconstruct_solution_start);
-
-  auto reconstruct_leftmost_start = Clock::now();
   std::vector<std::shared_ptr<mfem::Vector>> leftmosts;
   for (const auto& leftvec : leftvecs) {
     mfem::Vector left_coeffs(T.Height());
     T.Mult(leftvec, left_coeffs);
     leftmosts.emplace_back(std::make_shared<mfem::Vector>(combineDirections(states, left_coeffs)));
   }
-  timings.reconstruct_leftmost_seconds += secondsSince(reconstruct_leftmost_start);
-
   return std::make_tuple(sol, leftmosts, leftvals, energy);
 }
 
diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp
index 960024b33d..2f5290b93f 100644
--- a/src/smith/numerics/trust_region_solver.hpp
+++ b/src/smith/numerics/trust_region_solver.hpp
@@ -45,27 +45,6 @@ enum class TrustRegionSubspaceBackend {
 using TrustRegionSubspaceResult =
     std::tuple<mfem::Vector, std::vector<std::shared_ptr<mfem::Vector>>, std::vector<double>, double>;
 
-struct TrustRegionSubspaceTimings {
-  size_t num_solves = 0;
-  size_t total_input_dim = 0;
-  size_t total_reduced_dim = 0;
-  size_t max_input_dim = 0;
-  size_t max_reduced_dim = 0;
-  double project_A_seconds = 0.0;
-  double project_gram_seconds = 0.0;
-  double project_b_seconds = 0.0;
-  double basis_seconds = 0.0;
-  double reduced_A_seconds = 0.0;
-  double dense_eigensystem_seconds = 0.0;
-  double dense_trust_solve_seconds = 0.0;
-  double reconstruct_solution_seconds = 0.0;
-  double reconstruct_leftmost_seconds = 0.0;
-};
-
-void resetTrustRegionSubspaceTimings();
-
-TrustRegionSubspaceTimings trustRegionSubspaceTimings();
-
 /// @brief computes the global size of mfem::Vector
 int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm);
 

From f5b87e72d467c273b22c69ac685ee443fa599797 Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Sat, 9 May 2026 11:19:40 -0600
Subject: [PATCH 12/27] Some cleanup of the subspace improvements.

---
 .../numerics/mfem_trust_region_subspace.cpp   | 55 +++++++++++--------
 .../physics/tests/shallow_arch_buckling.cpp   |  1 +
 2 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp
index 19d1e9c147..2f4bf90713 100644
--- a/src/smith/numerics/mfem_trust_region_subspace.cpp
+++ b/src/smith/numerics/mfem_trust_region_subspace.cpp
@@ -14,10 +14,6 @@
 
 namespace smith {
 
-namespace {
-
-}  // namespace
-
 int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm)
 {
   int local_size = parallel_v.Size();
@@ -153,8 +149,8 @@ struct SubspaceProjections {
   mfem::Vector sb;
 };
 
-SubspaceProjections denseSubspaceProjections(const std::vector<const mfem::Vector*>& states,
-                                             const std::vector<const mfem::Vector*>& Astates, const mfem::Vector& b)
+void checkProjectionInputs(const std::vector<const mfem::Vector*>& states,
+                           const std::vector<const mfem::Vector*>& Astates, const mfem::Vector& b)
 {
   MFEM_VERIFY(states.size() == Astates.size(),
               "Search directions and their linear operator result must have same number of columns");
@@ -167,7 +163,13 @@ SubspaceProjections denseSubspaceProjections(const std::vector<const mfem::Vecto
     MFEM_VERIFY(Astates[size_t(j)]->Size() == vector_size, "Subspace Hessian-vector sizes differ.");
   }
   MFEM_VERIFY(b.Size() == vector_size, "Subspace right-hand-side size differs.");
+}
 
+SubspaceProjections globalSubspaceProjectionFromLocalInnerProducts(const std::vector<const mfem::Vector*>& states,
+                                                                   const std::vector<const mfem::Vector*>& Astates,
+                                                                   const mfem::Vector& b)
+{
+  const int n = static_cast<int>(states.size());
   const int triangular_size = n * (n + 1) / 2;
   const auto triangular_index = [n](int i, int j) {
     return i * n - (i * (i - 1)) / 2 + (j - i);
@@ -176,32 +178,30 @@ SubspaceProjections denseSubspaceProjections(const std::vector<const mfem::Vecto
   const int ss_offset = triangular_size;
   const int sb_offset = 2 * triangular_size;
   const int buffer_size = 2 * triangular_size + n;
-  std::vector<mfem::real_t> local(size_t(buffer_size), 0.0);
-  std::vector<mfem::real_t> global(size_t(buffer_size), 0.0);
-
-  for (int k = 0; k < vector_size; ++k) {
-    const double b_k = b[k];
-    for (int i = 0; i < n; ++i) {
-      const double s_i = (*states[size_t(i)])[k];
-      local[size_t(sb_offset + i)] += s_i * b_k;
-      for (int j = i; j < n; ++j) {
-        const size_t ij = size_t(triangular_index(i, j));
-        local[size_t(sAs_offset) + ij] += s_i * (*Astates[size_t(j)])[k];
-        local[size_t(ss_offset) + ij] += s_i * (*states[size_t(j)])[k];
-      }
+  std::vector<mfem::real_t> local_projection_entries(size_t(buffer_size), 0.0);
+  std::vector<mfem::real_t> global_projection_entries(size_t(buffer_size), 0.0);
+
+  for (int i = 0; i < n; ++i) {
+    local_projection_entries[size_t(sb_offset + i)] = mfem::InnerProduct(*states[size_t(i)], b);
+    for (int j = i; j < n; ++j) {
+      const size_t ij = size_t(triangular_index(i, j));
+      local_projection_entries[size_t(sAs_offset) + ij] =
+          mfem::InnerProduct(*states[size_t(i)], *Astates[size_t(j)]);
+      local_projection_entries[size_t(ss_offset) + ij] = mfem::InnerProduct(*states[size_t(i)], *states[size_t(j)]);
     }
   }
 
-  MPI_Allreduce(local.data(), global.data(), buffer_size, MFEM_MPI_REAL_T, MPI_SUM, MPI_COMM_WORLD);
+  MPI_Allreduce(local_projection_entries.data(), global_projection_entries.data(), buffer_size, MFEM_MPI_REAL_T,
+                MPI_SUM, MPI_COMM_WORLD);
 
   SubspaceProjections projections{mfem::DenseMatrix(n), mfem::DenseMatrix(n), mfem::Vector(n)};
   for (int i = 0; i < n; ++i) {
-    projections.sb[i] = global[size_t(sb_offset + i)];
+    projections.sb[i] = global_projection_entries[size_t(sb_offset + i)];
     for (int j = i; j < n; ++j) {
       const size_t ij = size_t(triangular_index(i, j));
-      projections.sAs(i, j) = global[size_t(sAs_offset) + ij];
+      projections.sAs(i, j) = global_projection_entries[size_t(sAs_offset) + ij];
       projections.sAs(j, i) = projections.sAs(i, j);
-      projections.ss(i, j) = global[size_t(ss_offset) + ij];
+      projections.ss(i, j) = global_projection_entries[size_t(ss_offset) + ij];
       projections.ss(j, i) = projections.ss(i, j);
     }
   }
@@ -209,6 +209,13 @@ SubspaceProjections denseSubspaceProjections(const std::vector<const mfem::Vecto
   return projections;
 }
 
+SubspaceProjections projectSubspaceGlobally(const std::vector<const mfem::Vector*>& states,
+                                            const std::vector<const mfem::Vector*>& Astates, const mfem::Vector& b)
+{
+  checkProjectionInputs(states, Astates, b);
+  return globalSubspaceProjectionFromLocalInnerProducts(states, Astates, b);
+}
+
 mfem::Vector solveDense(const mfem::DenseMatrix& A, const mfem::Vector& b)
 {
   mfem::DenseMatrix A_copy(A);
@@ -421,7 +428,7 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem:
                                                    const mfem::Vector& b, double delta, int num_leftmost)
 {
   SMITH_MARK_FUNCTION;
-  SubspaceProjections projections = denseSubspaceProjections(states, Astates, b);
+  SubspaceProjections projections = projectSubspaceGlobally(states, Astates, b);
   mfem::DenseMatrix& sAs = projections.sAs;
   symmetrize(sAs);
 
diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp
index 4299c14874..6b1d324712 100644
--- a/src/smith/physics/tests/shallow_arch_buckling.cpp
+++ b/src/smith/physics/tests/shallow_arch_buckling.cpp
@@ -177,6 +177,7 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough)
     }
     solid.outputStateToDisk("shallow_arch_buckling");
   }
+
 }
 
 }  // namespace smith

From 933acffe20581c2686543fd726999124b1ac8059 Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Sat, 9 May 2026 11:48:00 -0600
Subject: [PATCH 13/27] Control when printing occurs better.

---
 src/smith/numerics/equation_solver.cpp | 44 ++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 3 deletions(-)

diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index d5712c9b30..0a194f50ba 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -25,6 +25,26 @@
 
 namespace smith {
 
+namespace {
+
+#ifdef MFEM_USE_MPI
+size_t rootOnlyPrintLevel(size_t level, MPI_Comm comm)
+{
+  if (level > 0 && comm != MPI_COMM_NULL) {
+    int rank = 0;
+    MPI_Comm_rank(comm, &rank);
+    if (rank != 0) {
+      return 0;
+    }
+  }
+  return level;
+}
+#else
+size_t rootOnlyPrintLevel(size_t level) { return level; }
+#endif
+
+}  // namespace
+
 /// Newton solver with a 2-way line-search.  Reverts to regular Newton if max_line_search_iterations is set to 0.
 class NewtonSolver : public mfem::NewtonSolver {
  protected:
@@ -97,6 +117,12 @@ class NewtonSolver : public mfem::NewtonSolver {
     print_level = static_cast<size_t>(std::max(nonlinear_options.print_level, 0));
     print_level = print_options.iterations ? std::max<size_t>(1, print_level) : print_level;
     print_level = print_options.summary ? std::max<size_t>(2, print_level) : print_level;
+    print_level = rootOnlyPrintLevel(print_level
+#ifdef MFEM_USE_MPI
+                                     ,
+                                     GetComm()
+#endif
+    );
 
     using real_t = mfem::real_t;
 
@@ -553,9 +579,15 @@ class TrustRegion : public mfem::NewtonSolver {
     double subspace_energy = computeEnergy(g, hess_vec_func, sol);
 
     if (print_level >= 2) {
-      double leftval = leftvals.size() ? leftvals[0] : 1.0;
-      mfem::out << "Energy using subspace solver from: " << base_energy << ", to: " << subspace_energy << " / "
-                << energy_change << ".  Min eig: " << leftval << std::endl;
+      int rank = 0;
+#ifdef MFEM_USE_MPI
+      MPI_Comm_rank(GetComm(), &rank);
+#endif
+      if (rank == 0) {
+        double leftval = leftvals.size() ? leftvals[0] : 1.0;
+        mfem::out << "Energy using subspace solver from: " << base_energy << ", to: " << subspace_energy << " / "
+                  << energy_change << ".  Min eig: " << leftval << std::endl;
+      }
     }
 
     if (subspace_energy < base_energy) {
@@ -767,6 +799,12 @@ class TrustRegion : public mfem::NewtonSolver {
     print_level = static_cast<size_t>(std::max(nonlinear_options.print_level, 0));
     print_level = print_options.iterations ? std::max<size_t>(1, print_level) : print_level;
     print_level = print_options.summary ? std::max<size_t>(2, print_level) : print_level;
+    print_level = rootOnlyPrintLevel(print_level
+#ifdef MFEM_USE_MPI
+                                     ,
+                                     GetComm()
+#endif
+    );
 
     using real_t = mfem::real_t;
 

From f79f0b2ef8cb19e35b457274542f4bb557f7038d Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Sat, 9 May 2026 11:49:25 -0600
Subject: [PATCH 14/27] another fix.

---
 src/smith/numerics/equation_solver.cpp | 79 ++++++++++----------------
 1 file changed, 30 insertions(+), 49 deletions(-)

diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index 0a194f50ba..57bb75dcb1 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -343,9 +343,9 @@ struct TrustRegionResults {
 /// trust region printing utility function
 void printTrustRegionInfo(double realWork, double modelObjective, size_t cgIters, double trSize, bool willAccept)
 {
-  mfem::out << "real work = " << std::setw(13) << realWork << ", model energy = " << std::setw(13)
-            << modelObjective << ", cg iter = " << std::setw(7) << cgIters << ", next tr size = " << std::setw(8)
-            << trSize << ", accepting = " << willAccept << std::endl;
+  mfem::out << "real work = " << std::setw(13) << realWork << ", model energy = " << std::setw(13) << modelObjective
+            << ", cg iter = " << std::setw(7) << cgIters << ", next tr size = " << std::setw(8) << trSize
+            << ", accepting = " << willAccept << std::endl;
 }
 
 /**
@@ -448,14 +448,10 @@ class TrustRegion : public mfem::NewtonSolver {
 
   /// Four-dot batch with one local vector pass and one MPI reduction when possible.
   Dot4Result dot4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, const mfem::Vector& b1,
-                  const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3,
-                  const mfem::Vector& b3) const
+                  const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3, const mfem::Vector& b3) const
   {
     if (dot_oper) {
-      return {.v0 = Dot(a0, b0),
-              .v1 = Dot(a1, b1),
-              .v2 = Dot(a2, b2),
-              .v3 = Dot(a3, b3)};
+      return {.v0 = Dot(a0, b0), .v1 = Dot(a1, b1), .v2 = Dot(a2, b2), .v3 = Dot(a3, b3)};
     }
 
     MFEM_ASSERT(a0.Size() == b0.Size(), "Incompatible vector sizes.");
@@ -579,15 +575,9 @@ class TrustRegion : public mfem::NewtonSolver {
     double subspace_energy = computeEnergy(g, hess_vec_func, sol);
 
     if (print_level >= 2) {
-      int rank = 0;
-#ifdef MFEM_USE_MPI
-      MPI_Comm_rank(GetComm(), &rank);
-#endif
-      if (rank == 0) {
-        double leftval = leftvals.size() ? leftvals[0] : 1.0;
-        mfem::out << "Energy using subspace solver from: " << base_energy << ", to: " << subspace_energy << " / "
-                  << energy_change << ".  Min eig: " << leftval << std::endl;
-      }
+      double leftval = leftvals.size() ? leftvals[0] : 1.0;
+      mfem::out << "Energy using subspace solver from: " << base_energy << ", to: " << subspace_energy << " / "
+                << energy_change << ".  Min eig: " << leftval << std::endl;
     }
 
     if (subspace_energy < base_energy) {
@@ -616,18 +606,18 @@ class TrustRegion : public mfem::NewtonSolver {
     s = 0.0;
     if (cc >= tt) {
       add(s, std::sqrt(tt / cc), cp, s);
-      } else if (cc > nn) {
+    } else if (cc > nn) {
       if (print_level >= 2) {
         mfem::out << "cp outside newton, preconditioner likely inaccurate\n";
       }
       add(s, 1.0, cp, s);
-      } else if (nn > tt) {  // on the dogleg (we have nn >= cc, and tt >= cc)
+    } else if (nn > tt) {  // on the dogleg (we have nn >= cc, and tt >= cc)
       add(s, 1.0, cp, s);
-        double cn = Dot(cp, newtonP);
+      double cn = Dot(cp, newtonP);
       projectToBoundaryBetweenWithCoefs(s, newtonP, trSize, cc, cn, nn);
     } else {
       s = newtonP;
-      }
+    }
   }
 
   /// compute the energy of the linearized system for a given solution vector z
@@ -692,7 +682,7 @@ class TrustRegion : public mfem::NewtonSolver {
       if (descent_check > 0) {
         d *= -1;
         Hd *= -1;
-            results.interior_status = TrustRegionResults::Status::NonDescentDirection;
+        results.interior_status = TrustRegionResults::Status::NonDescentDirection;
         descent_check *= -1.0;
         curvature *= -1.0;
         zd *= -1.0;
@@ -715,10 +705,10 @@ class TrustRegion : public mfem::NewtonSolver {
       auto& zPred = Pr;  // re-use Pr memory.
                          // This predicted step will no longer be used by the time Pr is, so we can avoid an extra
                          // vector floating around
-        add(z, alphaCg, d, zPred);
-  
+      add(z, alphaCg, d, zPred);
+
       z = zPred;
-  
+
       if (results.interior_status == TrustRegionResults::Status::NonDescentDirection) {
         if (print_level >= 2) {
           mfem::out << "Found a non descent direction\n";
@@ -727,10 +717,9 @@ class TrustRegion : public mfem::NewtonSolver {
       }
 
       add(rCurrent, alphaCg, Hd, rCurrent);
-  
+
       precond(rCurrent, Pr);
-      auto [rPrNp1, r_current_norm_squared] =
-          dot2(rCurrent, Pr, rCurrent, rCurrent);
+      auto [rPrNp1, r_current_norm_squared] = dot2(rCurrent, Pr, rCurrent, rCurrent);
       if (r_current_norm_squared <= cg_tol_squared && cgIter >= settings.min_cg_iterations) {
         return;
       }
@@ -738,7 +727,7 @@ class TrustRegion : public mfem::NewtonSolver {
       double beta = rPrNp1 / rPr;
       rPr = rPrNp1;
       add(-1.0, Pr, beta, d, d);
-  
+
       zz = zzNp1;
     }
     cgIter--;  // if all cg iterations are taken, correct for output
@@ -846,8 +835,7 @@ class TrustRegion : public mfem::NewtonSolver {
     int num_leftmost = nonlinear_options.num_leftmost;
 
     scratch = 1.0;
-    double tr_size = nonlinear_options.trust_region_scaling *
-                     std::sqrt(Dot(scratch, scratch));
+    double tr_size = nonlinear_options.trust_region_scaling * std::sqrt(Dot(scratch, scratch));
     size_t cumulative_cg_iters_from_last_precond_update = 0;
 
     int it = 0;
@@ -898,13 +886,12 @@ class TrustRegion : public mfem::NewtonSolver {
         const double residual_norm_squared = norm * norm;
         if (gKg > 0) {
           const double alphaCp = -residual_norm_squared / gKg;
-                add(trResults.cauchy_point, alphaCp, r, trResults.cauchy_point);
-                cauchyPointNormSquared =
-              Dot(trResults.cauchy_point, trResults.cauchy_point);
+          add(trResults.cauchy_point, alphaCp, r, trResults.cauchy_point);
+          cauchyPointNormSquared = Dot(trResults.cauchy_point, trResults.cauchy_point);
         } else {
           const double alphaTr = -tr_size / norm;
-                add(trResults.cauchy_point, alphaTr, r, trResults.cauchy_point);
-                if (print_level >= 2) {
+          add(trResults.cauchy_point, alphaTr, r, trResults.cauchy_point);
+          if (print_level >= 2) {
             mfem::out << "Negative curvature un-preconditioned cauchy point direction found."
                       << "\n";
           }
@@ -938,10 +925,7 @@ class TrustRegion : public mfem::NewtonSolver {
 
         doglegStep(trResults.cauchy_point, trResults.z, tr_size, trResults.d);
         const bool check_subspace_boundary = subspace_option >= 1;
-        const double d_norm =
-            check_subspace_boundary
-                ? std::sqrt(Dot(trResults.d, trResults.d))
-                : 0.0;
+        const double d_norm = check_subspace_boundary ? std::sqrt(Dot(trResults.d, trResults.d)) : 0.0;
         bool use_with_option1 =
             (subspace_option >= 1) && (trResults.interior_status == TrustRegionResults::Status::NonDescentDirection ||
                                        trResults.interior_status == TrustRegionResults::Status::NegativeCurvature ||
@@ -1046,8 +1030,8 @@ class TrustRegion : public mfem::NewtonSolver {
         const auto [dHd, rd] = dot2(trResults.d, trResults.H_d, r, trResults.d);
         double modelObjective = rd + 0.5 * dHd - roundOffTol;
 
-            add(X, trResults.d, x_pred);
-    
+        add(X, trResults.d, x_pred);
+
         double realObjective = std::numeric_limits<double>::max();
         double normPred = std::numeric_limits<double>::max();
         try {
@@ -1056,9 +1040,7 @@ class TrustRegion : public mfem::NewtonSolver {
             min_residual_norm = normPred;
             min_residual_x = x_pred;
           }
-          double obj1 =
-              0.5 * (rd + Dot(r_pred, trResults.d)) -
-              roundOffTol;
+          double obj1 = 0.5 * (rd + Dot(r_pred, trResults.d)) - roundOffTol;
           realObjective = obj1;
         } catch (const std::exception&) {
           realObjective = std::numeric_limits<double>::max();
@@ -1075,7 +1057,7 @@ class TrustRegion : public mfem::NewtonSolver {
           }
           X = x_pred;
           r = r_pred;
-                norm = normPred;
+          norm = normPred;
           if (print_level >= 2) {
             printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, true);
             trResults.cg_iterations_count =
@@ -1132,7 +1114,7 @@ class TrustRegion : public mfem::NewtonSolver {
           }
           X = x_pred;
           r = r_pred;
-                norm = normPred;
+          norm = normPred;
           break;
         }
       }
@@ -1151,7 +1133,6 @@ class TrustRegion : public mfem::NewtonSolver {
   }
 };
 
-
 EquationSolver::EquationSolver(NonlinearSolverOptions nonlinear_opts, LinearSolverOptions lin_opts, MPI_Comm comm)
 {
   auto [lin_solver, preconditioner] = buildLinearSolverAndPreconditioner(lin_opts, comm);

From a589e33f0adbd58f1c078db7867d450c964186da Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Sat, 9 May 2026 17:16:12 -0600
Subject: [PATCH 15/27] Pull trust-region into its own file.

---
 src/smith/numerics/CMakeLists.txt             |   2 +
 src/smith/numerics/equation_solver.cpp        | 353 +++++-------------
 .../numerics/mfem_trust_region_subspace.cpp   | 166 ++++----
 .../numerics/petsc_trust_region_subspace.cpp  |   4 +-
 src/smith/numerics/steihaug_toint_cg.cpp      | 134 +++++++
 src/smith/numerics/steihaug_toint_cg.hpp      | 140 +++++++
 src/smith/numerics/tests/CMakeLists.txt       |   1 +
 .../numerics/tests/test_steihaug_toint_cg.cpp | 133 +++++++
 .../tests/test_trust_region_solver_mfem.cpp   |  48 +--
 .../tests/test_trust_region_solver_petsc.cpp  |   8 +-
 src/smith/numerics/trust_region_solver.hpp    |   9 +-
 .../physics/tests/shallow_arch_buckling.cpp   |  10 +-
 12 files changed, 586 insertions(+), 422 deletions(-)
 create mode 100644 src/smith/numerics/steihaug_toint_cg.cpp
 create mode 100644 src/smith/numerics/steihaug_toint_cg.hpp
 create mode 100644 src/smith/numerics/tests/test_steihaug_toint_cg.cpp

diff --git a/src/smith/numerics/CMakeLists.txt b/src/smith/numerics/CMakeLists.txt
index 8bc793fedd..f3031866ef 100644
--- a/src/smith/numerics/CMakeLists.txt
+++ b/src/smith/numerics/CMakeLists.txt
@@ -8,6 +8,7 @@ add_subdirectory(functional)
 
 set(numerics_headers
     equation_solver.hpp
+    steihaug_toint_cg.hpp
     odes.hpp
     solver_config.hpp
     stdfunction_operator.hpp
@@ -19,6 +20,7 @@ set(numerics_headers
 
 set(numerics_sources
     equation_solver.cpp
+    steihaug_toint_cg.cpp
     petsc_trust_region_subspace.cpp
     mfem_trust_region_subspace.cpp
     odes.cpp
diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index 57bb75dcb1..cb82935b5d 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -5,8 +5,11 @@
 // SPDX-License-Identifier: (BSD-3-Clause)
 
 #include "smith/numerics/equation_solver.hpp"
+#include "smith/numerics/steihaug_toint_cg.hpp"
 
+#include <array>
 #include <cstdlib>
+#include <deque>
 #include <functional>
 #include <iomanip>
 #include <iostream>
@@ -243,102 +246,7 @@ class NewtonSolver : public mfem::NewtonSolver {
   }
 };
 
-/// Internal structure for storing trust region settings
-struct TrustRegionSettings {
-  /// cg tol
-  double cg_tol = 1e-8;
-  /// min cg iters
-  size_t min_cg_iterations = 0;  //
-  /// max cg iters should be around # of system dofs
-  size_t max_cg_iterations = 10000;  //
-  /// max cumulative iterations
-  size_t max_cumulative_iteration = 1;
-  /// minimum trust region size
-  double min_tr_size = 1e-13;
-  /// trust region decrease factor
-  double t1 = 0.25;
-  /// trust region increase factor
-  double t2 = 1.75;
-  /// worse case energy drop ratio.  trust region accepted if energy drop is better than this.
-  double eta1 = 1e-9;
-  /// non-ideal energy drop ratio.  trust region decreases if energy drop is worse than this.
-  double eta2 = 0.1;
-  /// ideal energy drop ratio.  trust region increases if energy drop is better than this.
-  double eta3 = 0.6;
-  /// parameter limiting how fast the energy can drop relative to the prediction (in case the energy surrogate is poor)
-  double eta4 = 4.2;
-};
-
-/// Internal structure for storing trust region stateful data
-struct TrustRegionResults {
-  /// Constructor takes the size of the solution vector
-  TrustRegionResults(int size)
-  {
-    z.SetSize(size);
-    H_z.SetSize(size);
-    d_old.SetSize(size);
-    H_d_old.SetSize(size);
-    H_d_old_at_accept.SetSize(size);
-    d.SetSize(size);
-    H_d.SetSize(size);
-    Pr.SetSize(size);
-    cauchy_point.SetSize(size);
-    H_cauchy_point.SetSize(size);
-    z = 0.0;
-    H_z = 0.0;
-    d_old = 0.0;
-    H_d_old = 0.0;
-    H_d_old_at_accept = 0.0;
-    d = 0.0;
-    H_d = 0.0;
-    Pr = 0.0;
-    cauchy_point = 0.0;
-    H_cauchy_point = 0.0;
-  }
-
-  /// resets trust region results for a new outer iteration
-  void reset()
-  {
-    z = 0.0;
-    cauchy_point = 0.0;
-  }
-
-  /// enumerates the possible final status of the trust region steps
-  enum class Status
-  {
-    Interior,
-    NegativeCurvature,
-    OnBoundary,
-    NonDescentDirection
-  };
 
-  /// step direction
-  mfem::Vector z;
-  /// action of hessian on current step z
-  mfem::Vector H_z;
-  /// old step direction
-  mfem::Vector d_old;
-  /// action of hessian on previous step z_old
-  mfem::Vector H_d_old;
-  /// action of previous accepted hessian on previous step z_old
-  mfem::Vector H_d_old_at_accept;
-  /// true after at least one accepted line-search step has populated d_old
-  bool has_d_old = false;
-  /// incrementalCG direction
-  mfem::Vector d;
-  /// action of hessian on direction d
-  mfem::Vector H_d;
-  /// preconditioned residual
-  mfem::Vector Pr;
-  /// cauchy point
-  mfem::Vector cauchy_point;
-  /// action of hessian on direction of cauchy point
-  mfem::Vector H_cauchy_point;
-  /// specifies if step is interior, exterior, negative curvature, etc.
-  Status interior_status = Status::Interior;
-  /// iteration counter
-  size_t cg_iterations_count = 0;
-};
 
 /// trust region printing utility function
 void printTrustRegionInfo(double realWork, double modelObjective, size_t cgIters, double trSize, bool willAccept)
@@ -357,7 +265,7 @@ void printTrustRegionInfo(double realWork, double modelObjective, size_t cgIters
  * rely on an incremental work approximation: 0.5 (f^n + f^{n+1}) dot (u^{n+1} - u^n).  While less theoretically sound,
  * it appears to be very effective in practice.
  */
-class TrustRegion : public mfem::NewtonSolver {
+class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
  protected:
   /// predicted solution
   mutable mfem::Vector x_pred;
@@ -372,11 +280,14 @@ class TrustRegion : public mfem::NewtonSolver {
   /// previous accepted-iteration Hessian actions on the retained left most eigenvectors
   mutable std::vector<std::shared_ptr<mfem::Vector>> previous_H_left_mosts;
   /// accepted TrustRegion steps, newest first
-  mutable std::vector<std::shared_ptr<mfem::Vector>> accepted_step_history;
+  mutable std::deque<std::shared_ptr<mfem::Vector>> accepted_step_history;
   /// initial state for this nonlinear solve, used as an optional history direction
   mutable mfem::Vector solve_start_x;
   mutable mfem::Vector min_residual_x;
   mutable double min_residual_norm = -1.0;
+  
+  /// Workspace vector for exact subspace solver to avoid small allocations
+  mutable mfem::Vector exact_solver_workspace;
 
   /// nonlinear solution options
   NonlinearSolverOptions nonlinear_options;
@@ -400,87 +311,72 @@ class TrustRegion : public mfem::NewtonSolver {
   }
 #endif
 
-  /// Pair of dot products with one local vector pass and one MPI reduction when possible.
-  std::pair<double, double> dot2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
-                                 const mfem::Vector& b1) const
+  template <typename... Args>
+  std::array<double, sizeof...(Args) / 2> dot_many(const Args&... args) const
   {
+    static_assert(sizeof...(Args) % 2 == 0, "dot_many requires an even number of arguments");
+    constexpr size_t num_pairs = sizeof...(Args) / 2;
+    std::array<double, num_pairs> products;
+    products.fill(0.0);
+
     if (dot_oper) {
-      return {Dot(a0, b0), Dot(a1, b1)};
+      auto tuple_args = std::tie(args...);
+      auto do_dots = [&]<std::size_t... I>(std::index_sequence<I...>) {
+        ((products[I] = Dot(std::get<2 * I>(tuple_args), std::get<2 * I + 1>(tuple_args))), ...);
+      };
+      do_dots(std::make_index_sequence<num_pairs>{});
+      return products;
     }
 
-    MFEM_ASSERT(a0.Size() == b0.Size(), "Incompatible vector sizes.");
-    MFEM_ASSERT(a1.Size() == b1.Size(), "Incompatible vector sizes.");
+    auto tuple_args = std::tie(args...);
+    std::array<int, num_pairs> sizes;
+    std::array<const double*, num_pairs> ptr_a;
+    std::array<const double*, num_pairs> ptr_b;
+    
+    auto populate_arrays = [&]<std::size_t... I>(std::index_sequence<I...>) {
+      ((
+        sizes[I] = std::get<2 * I>(tuple_args).Size(),
+        [&](){ MFEM_ASSERT(sizes[I] == std::get<2 * I + 1>(tuple_args).Size(), "Incompatible vector sizes."); }(),
+        ptr_a[I] = std::get<2 * I>(tuple_args).GetData(),
+        ptr_b[I] = std::get<2 * I + 1>(tuple_args).GetData()
+      ), ...);
+    };
+    populate_arrays(std::make_index_sequence<num_pairs>{});
+
+    bool all_same_size = true;
+    for (size_t i = 1; i < num_pairs; ++i) {
+      if (sizes[i] != sizes[0]) {
+        all_same_size = false;
+        break;
+      }
+    }
 
-    mfem::real_t products[2] = {0.0, 0.0};
-    if (a0.Size() == a1.Size()) {
-      for (int i = 0; i < a0.Size(); ++i) {
-        products[0] += a0[i] * b0[i];
-        products[1] += a1[i] * b1[i];
+    if (all_same_size && num_pairs > 0) {
+      for (int j = 0; j < sizes[0]; ++j) {
+        for (size_t i = 0; i < num_pairs; ++i) {
+          products[i] += ptr_a[i][j] * ptr_b[i][j];
+        }
       }
     } else {
-      for (int i = 0; i < a0.Size(); ++i) {
-        products[0] += a0[i] * b0[i];
-      }
-      for (int i = 0; i < a1.Size(); ++i) {
-        products[1] += a1[i] * b1[i];
+      for (size_t i = 0; i < num_pairs; ++i) {
+        for (int j = 0; j < sizes[i]; ++j) {
+          products[i] += ptr_a[i][j] * ptr_b[i][j];
+        }
       }
     }
 
 #ifdef MFEM_USE_MPI
     const MPI_Comm dot_comm = GetComm();
     if (dot_comm != MPI_COMM_NULL) {
-      mfem::real_t global_products[2] = {0.0, 0.0};
-      MPI_Allreduce(products, global_products, 2, MFEM_MPI_REAL_T, MPI_SUM, dot_comm);
-      products[0] = global_products[0];
-      products[1] = global_products[1];
-    }
-#endif
-
-    return {products[0], products[1]};
-  }
-
-  struct Dot4Result {
-    double v0 = 0.0;
-    double v1 = 0.0;
-    double v2 = 0.0;
-    double v3 = 0.0;
-  };
-
-  /// Four-dot batch with one local vector pass and one MPI reduction when possible.
-  Dot4Result dot4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, const mfem::Vector& b1,
-                  const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3, const mfem::Vector& b3) const
-  {
-    if (dot_oper) {
-      return {.v0 = Dot(a0, b0), .v1 = Dot(a1, b1), .v2 = Dot(a2, b2), .v3 = Dot(a3, b3)};
-    }
-
-    MFEM_ASSERT(a0.Size() == b0.Size(), "Incompatible vector sizes.");
-    MFEM_ASSERT(a1.Size() == b1.Size(), "Incompatible vector sizes.");
-    MFEM_ASSERT(a2.Size() == b2.Size(), "Incompatible vector sizes.");
-    MFEM_ASSERT(a3.Size() == b3.Size(), "Incompatible vector sizes.");
-    MFEM_ASSERT(a0.Size() == a1.Size() && a0.Size() == a2.Size() && a0.Size() == a3.Size(),
-                "timedDot4 currently requires equal vector sizes.");
-
-    mfem::real_t products[4] = {0.0, 0.0, 0.0, 0.0};
-    for (int i = 0; i < a0.Size(); ++i) {
-      products[0] += a0[i] * b0[i];
-      products[1] += a1[i] * b1[i];
-      products[2] += a2[i] * b2[i];
-      products[3] += a3[i] * b3[i];
-    }
-
-#ifdef MFEM_USE_MPI
-    const MPI_Comm dot_comm = GetComm();
-    if (dot_comm != MPI_COMM_NULL) {
-      mfem::real_t global_products[4] = {0.0, 0.0, 0.0, 0.0};
-      MPI_Allreduce(products, global_products, 4, MFEM_MPI_REAL_T, MPI_SUM, dot_comm);
-      for (int i = 0; i < 4; ++i) {
+      std::array<mfem::real_t, num_pairs> global_products;
+      MPI_Allreduce(products.data(), global_products.data(), num_pairs, MFEM_MPI_REAL_T, MPI_SUM, dot_comm);
+      for (size_t i = 0; i < num_pairs; ++i) {
         products[i] = global_products[i];
       }
     }
 #endif
 
-    return {.v0 = products[0], .v1 = products[1], .v2 = products[2], .v3 = products[3]};
+    return products;
   }
 
   template <typename HessVecFunc>
@@ -504,20 +400,32 @@ class TrustRegion : public mfem::NewtonSolver {
       return;
     }
 
-    accepted_step_history.insert(accepted_step_history.begin(), std::make_shared<mfem::Vector>(step));
+    accepted_step_history.push_front(std::make_shared<mfem::Vector>(step));
     const size_t max_size = static_cast<size_t>(nonlinear_options.trust_num_past_steps) + 1;
     while (accepted_step_history.size() > max_size) {
       accepted_step_history.pop_back();
     }
   }
 
-  /// finds tau s.t. (z + tau*d)^2 = trSize^2
+  std::array<double, 4> dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0,
+                                   const mfem::Vector& a1, const mfem::Vector& b1,
+                                   const mfem::Vector& a2, const mfem::Vector& b2,
+                                   const mfem::Vector& a3, const mfem::Vector& b3) const override
+  {
+    return dot_many(a0, b0, a1, b1, a2, b2, a3, b3);
+  }
+
+  std::array<double, 2> dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0,
+                                   const mfem::Vector& a1, const mfem::Vector& b1) const override
+  {
+    return dot_many(a0, b0, a1, b1);
+  }
+
   void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd,
-                                  double dd) const
+                                  double dd) const override
   {
-    // find z + tau d
     double deltadelta_m_zz = delta * delta - zz;
-    if (deltadelta_m_zz == 0) return;  // already on boundary
+    if (deltadelta_m_zz == 0) return;
     double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd;
     z.Add(tau, d);
   }
@@ -557,8 +465,11 @@ class TrustRegion : public mfem::NewtonSolver {
     double energy_change;
 
     try {
+      if (exact_solver_workspace.Size() < 2000) {
+        exact_solver_workspace.SetSize(2000);
+      }
       std::tie(sol, leftvecs, leftvals, energy_change) =
-          solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost);
+          solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost, exact_solver_workspace);
     } catch (const std::exception& e) {
       if (print_level >= 1) {
         mfem::out << "subspace solve failed with " << e.what() << std::endl;
@@ -600,7 +511,7 @@ class TrustRegion : public mfem::NewtonSolver {
   void doglegStep(const mfem::Vector& cp, const mfem::Vector& newtonP, double trSize, mfem::Vector& s) const
   {
     SMITH_MARK_FUNCTION;
-    auto [cc, nn] = dot2(cp, cp, newtonP, newtonP);
+    auto [cc, nn] = dot_many(cp, cp, newtonP, newtonP);
     double tt = trSize * trSize;
 
     s = 0.0;
@@ -633,104 +544,11 @@ class TrustRegion : public mfem::NewtonSolver {
   }
 
   /// Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner
-  template <typename HessVecFunc, typename PrecondFunc>
-  void solveTrustRegionModelProblem(const mfem::Vector& r0, mfem::Vector& rCurrent, HessVecFunc hess_vec_func,
-                                    PrecondFunc precond, const TrustRegionSettings& settings, double& trSize,
-                                    TrustRegionResults& results, double r0_norm_squared) const
+  void solveModelProblem(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H,
+                         const mfem::Solver* P, const TrustRegionSettings& settings, double& trSize,
+                         TrustRegionResults& results, double r0_norm_squared) const
   {
-    SMITH_MARK_FUNCTION;
-    // minimize r0@z + 0.5*z@J@z
-    results.interior_status = TrustRegionResults::Status::Interior;
-    results.cg_iterations_count = 0;
-
-    auto& z = results.z;
-    auto& cgIter = results.cg_iterations_count;
-    auto& d = results.d;
-    auto& Pr = results.Pr;
-    auto& Hd = results.H_d;
-
-    const double cg_tol_squared = settings.cg_tol * settings.cg_tol;
-
-    if (r0_norm_squared <= cg_tol_squared && settings.min_cg_iterations == 0) {
-      if (print_level >= 2) {
-        mfem::out << "Trust region solution state within tolerance on first iteration."
-                  << "\n";
-      }
-      return;
-    }
-
-    rCurrent = r0;
-    precond(rCurrent, Pr);
-
-    // d = -Pr
-    d = Pr;
-    d *= -1.0;
-
-    z = 0.0;
-    double zz = 0.;
-    double rPr = Dot(rCurrent, Pr);
-
-    // std::cout << "initial energy = " << computeEnergy(r0, hess_vec_func, z) << std::endl;
-
-    for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) {
-      hess_vec_func(d, Hd);
-      const auto dots = dot4(d, rCurrent, d, Hd, z, d, d, d);
-      double descent_check = dots.v0;
-      double curvature = dots.v1;
-      double zd = dots.v2;
-      double dd = dots.v3;
-      if (descent_check > 0) {
-        d *= -1;
-        Hd *= -1;
-        results.interior_status = TrustRegionResults::Status::NonDescentDirection;
-        descent_check *= -1.0;
-        curvature *= -1.0;
-        zd *= -1.0;
-      }
-
-      const double alphaCg = curvature != 0.0 ? rPr / curvature : 0.0;
-      const double zzNp1 = zz + 2.0 * alphaCg * zd + alphaCg * alphaCg * dd;
-
-      const bool go_to_boundary = curvature <= 0 || zzNp1 >= trSize * trSize;
-      if (go_to_boundary) {
-        projectToBoundaryWithCoefs(z, d, trSize, zz, zd, dd);
-        if (curvature <= 0) {
-          results.interior_status = TrustRegionResults::Status::NegativeCurvature;
-        } else {
-          results.interior_status = TrustRegionResults::Status::OnBoundary;
-        }
-        return;
-      }
-
-      auto& zPred = Pr;  // re-use Pr memory.
-                         // This predicted step will no longer be used by the time Pr is, so we can avoid an extra
-                         // vector floating around
-      add(z, alphaCg, d, zPred);
-
-      z = zPred;
-
-      if (results.interior_status == TrustRegionResults::Status::NonDescentDirection) {
-        if (print_level >= 2) {
-          mfem::out << "Found a non descent direction\n";
-        }
-        return;
-      }
-
-      add(rCurrent, alphaCg, Hd, rCurrent);
-
-      precond(rCurrent, Pr);
-      auto [rPrNp1, r_current_norm_squared] = dot2(rCurrent, Pr, rCurrent, rCurrent);
-      if (r_current_norm_squared <= cg_tol_squared && cgIter >= settings.min_cg_iterations) {
-        return;
-      }
-
-      double beta = rPrNp1 / rPr;
-      rPr = rPrNp1;
-      add(-1.0, Pr, beta, d, d);
-
-      zz = zzNp1;
-    }
-    cgIter--;  // if all cg iterations are taken, correct for output
+    steihaugTointCG(r0, rCurrent, H, P, settings, trSize, results, r0_norm_squared, *this);
   }
 
   std::unique_ptr<mfem::Operator> cloneAssembledOperator(const mfem::Operator& op) const
@@ -781,7 +599,7 @@ class TrustRegion : public mfem::NewtonSolver {
   };
 
   /// @overload
-  void Mult(const mfem::Vector&, mfem::Vector& X) const
+  void Mult(const mfem::Vector&, mfem::Vector& X) const override
   {
     MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator).");
     MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver).");
@@ -875,7 +693,6 @@ class TrustRegion : public mfem::NewtonSolver {
       }
 
       auto hess_vec_func = [&](const mfem::Vector& x_, mfem::Vector& v_) { hessVec(x_, v_); };
-      auto precond_func = [&](const mfem::Vector& x_, mfem::Vector& v_) { precond(x_, v_); };
 
       double cauchyPointNormSquared = tr_size * tr_size;
       trResults.reset();
@@ -910,7 +727,7 @@ class TrustRegion : public mfem::NewtonSolver {
         trResults.interior_status = TrustRegionResults::Status::OnBoundary;
       } else {
         settings.cg_tol = std::max(0.5 * norm_goal, 5e-5 * norm);
-        solveTrustRegionModelProblem(r, scratch, hess_vec_func, precond_func, settings, tr_size, trResults,
+        solveModelProblem(r, scratch, *this->oper, &this->tr_precond, settings, tr_size, trResults,
                                      norm * norm);
       }
       cumulative_cg_iters_from_last_precond_update += trResults.cg_iterations_count;
@@ -1027,7 +844,7 @@ class TrustRegion : public mfem::NewtonSolver {
         static constexpr double roundOffTol = 0.0;  // 1e-14;
 
         hess_vec_func(trResults.d, trResults.H_d);
-        const auto [dHd, rd] = dot2(trResults.d, trResults.H_d, r, trResults.d);
+        const auto [dHd, rd] = dot_many(trResults.d, trResults.H_d, r, trResults.d);
         double modelObjective = rd + 0.5 * dHd - roundOffTol;
 
         add(X, trResults.d, x_pred);
diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp
index 2f4bf90713..65652e2d73 100644
--- a/src/smith/numerics/mfem_trust_region_subspace.cpp
+++ b/src/smith/numerics/mfem_trust_region_subspace.cpp
@@ -27,51 +27,13 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm
   return mfem::InnerProduct(comm, a, b);
 }
 
-std::pair<std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>> removeDependentDirections(
-    std::vector<const mfem::Vector*> directions, std::vector<const mfem::Vector*> A_directions)
-{
-  SMITH_MARK_FUNCTION;
-  std::vector<double> norms;
-  size_t num_dirs = directions.size();
-
-  for (size_t i = 0; i < num_dirs; ++i) {
-    norms.push_back(std::sqrt(mfem::InnerProduct(MPI_COMM_WORLD, *directions[i], *directions[i])));
-  }
-
-  std::vector<std::pair<const mfem::Vector*, size_t>> kepts;
-  for (size_t i = 0; i < num_dirs; ++i) {
-    bool keepi = true;
-    if (norms[i] == 0) keepi = false;
-    for (auto&& kept_and_j : kepts) {
-      size_t j = kept_and_j.second;
-      double dot_ij = mfem::InnerProduct(MPI_COMM_WORLD, *directions[i], *kept_and_j.first);
-      if (dot_ij > 0.999 * norms[i] * norms[j]) {
-        keepi = false;
-      }
-    }
-    if (keepi) {
-      kepts.emplace_back(std::make_pair(directions[i], i));
-    }
-  }
-
-  std::vector<const mfem::Vector*> directions_new;
-  std::vector<const mfem::Vector*> A_directions_new;
-
-  for (auto kept_and_j : kepts) {
-    directions_new.push_back(directions[kept_and_j.second]);
-    A_directions_new.push_back(A_directions[kept_and_j.second]);
-  }
-
-  return std::make_pair(directions_new, A_directions_new);
-}
-
 #ifdef MFEM_USE_LAPACK
 
 TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
                                                const std::vector<const mfem::Vector*>& A_directions,
-                                               const mfem::Vector& b, double delta, int num_leftmost)
+                                               const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace)
 {
-  return solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost);
+  return solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost, workspace);
 }
 
 namespace {
@@ -86,32 +48,7 @@ double norm(const mfem::Vector& x)
   return x.Norml2();
 }
 
-mfem::Vector operator+(const mfem::Vector& x, double value)
-{
-  mfem::Vector out(x);
-  for (int i = 0; i < out.Size(); ++i) {
-    out[i] += value;
-  }
-  return out;
-}
 
-mfem::Vector pointwiseMultiply(const mfem::Vector& a, const mfem::Vector& b)
-{
-  mfem::Vector out(a.Size());
-  for (int i = 0; i < a.Size(); ++i) {
-    out[i] = a[i] * b[i];
-  }
-  return out;
-}
-
-mfem::Vector pointwiseDivide(const mfem::Vector& a, const mfem::Vector& b)
-{
-  mfem::Vector out(a.Size());
-  for (int i = 0; i < a.Size(); ++i) {
-    out[i] = a[i] / b[i];
-  }
-  return out;
-}
 
 double sumAbs(const mfem::Vector& x)
 {
@@ -122,15 +59,6 @@ double sumAbs(const mfem::Vector& x)
   return total;
 }
 
-double sum(const mfem::Vector& x)
-{
-  double total = 0.0;
-  for (int i = 0; i < x.Size(); ++i) {
-    total += x[i];
-  }
-  return total;
-}
-
 void symmetrize(mfem::DenseMatrix& A)
 {
   MFEM_VERIFY(A.Height() == A.Width(), "symmetrize requires square matrix");
@@ -234,14 +162,20 @@ double quadraticEnergy(const mfem::DenseMatrix& A, const mfem::Vector& b, const
 
 double pnormSquared(const mfem::Vector& bvv, const mfem::Vector& sig)
 {
-  return sum(pointwiseDivide(bvv, pointwiseMultiply(sig, sig)));
+  double total = 0.0;
+  for (int i = 0; i < bvv.Size(); ++i) {
+    total += bvv[i] / (sig[i] * sig[i]);
+  }
+  return total;
 }
 
 double qnormSquared(const mfem::Vector& bvv, const mfem::Vector& sig)
 {
-  mfem::Vector sig_sq = pointwiseMultiply(sig, sig);
-  mfem::Vector sig_cu = pointwiseMultiply(sig_sq, sig);
-  return sum(pointwiseDivide(bvv, sig_cu));
+  double total = 0.0;
+  for (int i = 0; i < bvv.Size(); ++i) {
+    total += bvv[i] / (sig[i] * sig[i] * sig[i]);
+  }
+  return total;
 }
 
 mfem::Vector matrixColumn(const mfem::DenseMatrix& A, int j)
@@ -264,8 +198,30 @@ mfem::DenseMatrix columnsToMatrix(const std::vector<mfem::Vector>& cols)
   return A;
 }
 
+/**
+ * @brief Solves the exact trust region subproblem:
+ *        min 1/2 x^T A x - b^T x, subject to ||x|| <= delta.
+ *
+ * Implements a variant of the Moore-Sorensen algorithm:
+ * 1. Computes the eigensystem of A.
+ * 2. Checks if the unconstrained minimum lies strictly inside the trust region.
+ * 3. Checks for the "hard case" where the minimum eigenvalue is near zero or negative,
+ *    and the Newton step points outside the trust region, requiring a shift along the leftmost eigenvector.
+ * 4. Otherwise, performs a Newton iteration on the secular equation (1/||p(\lambda)|| - 1/delta = 0)
+ *    to find the optimal Lagrange multiplier \lambda.
+ *
+ * @param A The reduced Hessian matrix (square).
+ * @param b The reduced gradient vector.
+ * @param delta The trust region radius.
+ * @param num_leftmost The number of leftmost eigenvectors/values to return.
+ * @return A tuple containing:
+ *         - The optimal solution vector.
+ *         - A list of the leftmost eigenvectors.
+ *         - A list of the corresponding leftmost eigenvalues.
+ *         - A boolean indicating success.
+ */
 std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> exactTrustRegionSolve(
-    mfem::DenseMatrix A, const mfem::Vector& b, double delta, int num_leftmost)
+    mfem::DenseMatrix A, const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace)
 {
   if (A.Height() != A.Width()) {
     throw PetscException("Exact trust region solver requires square matrices");
@@ -274,8 +230,17 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
     throw PetscException("The right hand size for exact trust region solve must be consistent with the input matrix size");
   }
 
-  mfem::Vector sigs;
-  mfem::DenseMatrix V;
+  int offset = 0;
+  auto alloc_vector = [&](int size) {
+    mfem::Vector v(workspace.GetData() + offset, size);
+    offset += size;
+    return v;
+  };
+
+  mfem::Vector sigs = alloc_vector(b.Size());
+  mfem::DenseMatrix V(workspace.GetData() + offset, b.Size(), b.Size());
+  offset += b.Size() * b.Size();
+
   A.Eigensystem(sigs, V);
   std::vector<mfem::Vector> leftmosts;
   std::vector<double> minsigs;
@@ -288,13 +253,14 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
   const mfem::Vector leftMost = matrixColumn(V, 0);
   const double minSig = sigs[0];
 
-  mfem::Vector bv(sigs.Size());
+  mfem::Vector bv = alloc_vector(sigs.Size());
   for (int i = 0; i < sigs.Size(); ++i) {
     const mfem::Vector vi = matrixColumn(V, i);
     bv[i] = dot(vi, b);
   }
 
-  mfem::Vector bvOverSigs = pointwiseDivide(bv, sigs);
+  mfem::Vector bvOverSigs = alloc_vector(sigs.Size());
+  for (int i = 0; i < sigs.Size(); ++i) bvOverSigs[i] = bv[i] / sigs[i];
   const double sigScale = sumAbs(sigs) / sigs.Size();
   const double eps = 1e-12 * sigScale;
 
@@ -303,11 +269,12 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
   }
 
   double lam = minSig < eps ? -minSig + eps : 0.0;
-  mfem::Vector sigsPlusLam = sigs + lam;
-  bvOverSigs = pointwiseDivide(bv, sigsPlusLam);
+  mfem::Vector sigsPlusLam = alloc_vector(sigs.Size());
+  for (int i = 0; i < sigs.Size(); ++i) sigsPlusLam[i] = sigs[i] + lam;
+  for (int i = 0; i < sigs.Size(); ++i) bvOverSigs[i] = bv[i] / sigsPlusLam[i];
 
   if ((minSig < eps) && (norm(bvOverSigs) < delta)) {
-    mfem::Vector p(b.Size());
+    mfem::Vector p = alloc_vector(b.Size());
     p = 0.0;
     for (int i = 0; i < b.Size(); ++i) {
       const mfem::Vector vi = matrixColumn(V, i);
@@ -321,8 +288,8 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
     const double tau1 = -pz + std::sqrt(pz * pz + ddmpp);
     const double tau2 = -pz - std::sqrt(pz * pz + ddmpp);
 
-    mfem::Vector x1(p);
-    mfem::Vector x2(p);
+    mfem::Vector x1 = alloc_vector(p.Size()); x1 = p;
+    mfem::Vector x2 = alloc_vector(p.Size()); x2 = p;
     x1.Add(tau1, leftMost);
     x2.Add(tau2, leftMost);
 
@@ -332,8 +299,9 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
     return std::make_tuple(e1 < e2 ? x1 : x2, leftmosts, minsigs, true);
   }
 
-  const mfem::Vector bvbv = pointwiseMultiply(bv, bv);
-  sigsPlusLam = sigs + lam;
+  mfem::Vector bvbv = alloc_vector(bv.Size());
+  for(int i=0; i<bv.Size(); ++i) bvbv[i] = bv[i] * bv[i];
+  for (int i = 0; i < sigs.Size(); ++i) sigsPlusLam[i] = sigs[i] + lam;
 
   double pNormSq = pnormSquared(bvbv, sigsPlusLam);
   double pNorm = std::sqrt(pNormSq);
@@ -344,7 +312,7 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
   while ((std::abs(bError) > 1e-9) && (iters++ < maxIters)) {
     const double qNormSq = qnormSquared(bvbv, sigsPlusLam);
     lam += (pNormSq / qNormSq) * bError;
-    sigsPlusLam = sigs + lam;
+    for (int i = 0; i < sigs.Size(); ++i) sigsPlusLam[i] = sigs[i] + lam;
     pNormSq = pnormSquared(bvbv, sigsPlusLam);
     pNorm = std::sqrt(pNormSq);
     bError = (pNorm - delta) / delta;
@@ -352,9 +320,9 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
 
   const bool success = iters < maxIters;
 
-  bvOverSigs = pointwiseDivide(bv, sigsPlusLam);
+  for (int i = 0; i < sigs.Size(); ++i) bvOverSigs[i] = bv[i] / sigsPlusLam[i];
 
-  mfem::Vector x(b.Size());
+  mfem::Vector x = alloc_vector(b.Size());
   x = 0.0;
   for (int i = 0; i < b.Size(); ++i) {
     const mfem::Vector vi = matrixColumn(V, i);
@@ -362,7 +330,8 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
   }
 
   const double e1 = quadraticEnergy(A, b, x);
-  mfem::Vector neg_x(x);
+  mfem::Vector neg_x = alloc_vector(x.Size());
+  neg_x = x;
   neg_x *= -1.0;
   const double e2 = quadraticEnergy(A, b, neg_x);
 
@@ -425,7 +394,7 @@ mfem::Vector combineDirections(const std::vector<const mfem::Vector*>& states, c
 
 TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem::Vector*>& states,
                                                    const std::vector<const mfem::Vector*>& Astates,
-                                                   const mfem::Vector& b, double delta, int num_leftmost)
+                                                   const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace)
 {
   SMITH_MARK_FUNCTION;
   SubspaceProjections projections = projectSubspaceGlobally(states, Astates, b);
@@ -445,6 +414,11 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem:
 
   double trace_mag = 0.0;
   mfem::DenseMatrix T = orthonormalBasisTransform(ss, trace_mag);
+  if (trace_mag == 0.0) {
+    mfem::Vector sol(*states[0]);
+    sol = 0.0;
+    return std::make_tuple(sol, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0.0);
+  }
   if (T.Width() == 0) {
     throw PetscException("No independent directions in MFEM subspace solve.");
   }
@@ -454,7 +428,7 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem:
   const mfem::Vector& sb = projections.sb;
   const mfem::Vector pb = projectWithTranspose(T, sb);
 
-  auto [reduced_x, leftvecs, leftvals, success] = exactTrustRegionSolve(pAp, pb, delta, num_leftmost);
+  auto [reduced_x, leftvecs, leftvals, success] = exactTrustRegionSolve(pAp, pb, delta, num_leftmost, workspace);
   (void)success;
   const double energy = quadraticEnergy(pAp, pb, reduced_x);
 
diff --git a/src/smith/numerics/petsc_trust_region_subspace.cpp b/src/smith/numerics/petsc_trust_region_subspace.cpp
index 2368e06899..9669359388 100644
--- a/src/smith/numerics/petsc_trust_region_subspace.cpp
+++ b/src/smith/numerics/petsc_trust_region_subspace.cpp
@@ -290,7 +290,7 @@ std::vector<const mfem::Vector*> remove_at(const std::vector<const mfem::Vector*
 
 TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vector<const mfem::Vector*>& states,
                                                     const std::vector<const mfem::Vector*>& Astates,
-                                                    const mfem::Vector& b, double delta, int num_leftmost)
+                                                    const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace)
 {
   SMITH_MARK_FUNCTION;
   DenseMat sAs1 = dot(states, Astates);
@@ -318,7 +318,7 @@ TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vector<const mfem
     if (R(i, i) < 1e-9 * trace_mag) {
       auto statesNew = remove_at(states, size_t(i));
       auto AstatesNew = remove_at(Astates, size_t(i));
-      return solveSubspaceProblemPetsc(statesNew, AstatesNew, b, delta, num_leftmost);
+      return solveSubspaceProblemPetsc(statesNew, AstatesNew, b, delta, num_leftmost, workspace);
     }
   }
 
diff --git a/src/smith/numerics/steihaug_toint_cg.cpp b/src/smith/numerics/steihaug_toint_cg.cpp
new file mode 100644
index 0000000000..9b4db432da
--- /dev/null
+++ b/src/smith/numerics/steihaug_toint_cg.cpp
@@ -0,0 +1,134 @@
+// Copyright (c) Lawrence Livermore National Security, LLC and
+// other Smith Project Developers. See the top-level LICENSE file for
+// details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+
+#include "smith/numerics/steihaug_toint_cg.hpp"
+
+namespace smith {
+
+namespace {
+
+void smith_add(const mfem::Vector& a, double b, const mfem::Vector& c, mfem::Vector& out)
+{
+  if (out.GetData() == c.GetData()) {
+    // We expect out and c are often the same vector memory (zPred = Pr, z += alpha * d)
+    // Wait, add(a, b, c, out) means out = a + b*c
+    out = a;
+    out.Add(b, c);
+  } else {
+    out = a;
+    out.Add(b, c);
+  }
+}
+
+} // namespace
+
+void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent,
+                     const mfem::Operator& H, const mfem::Solver* P,
+                     const TrustRegionSettings& settings, double& trSize,
+                     TrustRegionResults& results, double r0_norm_squared, 
+                     const SteihaugTointDelegate& delegate)
+{
+  // minimize r0@z + 0.5*z@J@z
+  results.interior_status = TrustRegionResults::Status::Interior;
+  results.cg_iterations_count = 0;
+
+  auto& z = results.z;
+  auto& cgIter = results.cg_iterations_count;
+  auto& d = results.d;
+  auto& Pr = results.Pr;
+  auto& Hd = results.H_d;
+
+  const double cg_tol_squared = settings.cg_tol * settings.cg_tol;
+
+  if (r0_norm_squared <= cg_tol_squared && settings.min_cg_iterations == 0) {
+    return;
+  }
+
+  rCurrent = r0;
+  if (P) {
+    P->Mult(rCurrent, Pr);
+  } else {
+    Pr = rCurrent;
+  }
+
+  // d = -Pr
+  d = Pr;
+  d *= -1.0;
+
+  z = 0.0;
+  double zz = 0.;
+  
+  // rPr = dot(rCurrent, Pr)
+  auto rPr_arr = delegate.dot_many_2(rCurrent, Pr, rCurrent, Pr); // We only need the first
+  double rPr = rPr_arr[0];
+
+  for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) {
+    H.Mult(d, Hd);
+    
+    auto dots = delegate.dot_many_4(d, rCurrent, d, Hd, z, d, d, d);
+    double descent_check = dots[0];
+    double curvature = dots[1];
+    double zd = dots[2];
+    double dd = dots[3];
+    
+    if (descent_check > 0) {
+      d *= -1;
+      Hd *= -1;
+      results.interior_status = TrustRegionResults::Status::NonDescentDirection;
+      descent_check *= -1.0;
+      curvature *= -1.0;
+      zd *= -1.0;
+    }
+
+    const double alphaCg = curvature != 0.0 ? rPr / curvature : 0.0;
+    const double zzNp1 = zz + 2.0 * alphaCg * zd + alphaCg * alphaCg * dd;
+
+    const bool go_to_boundary = curvature <= 0 || zzNp1 >= trSize * trSize;
+    if (go_to_boundary) {
+      delegate.projectToBoundaryWithCoefs(z, d, trSize, zz, zd, dd);
+      if (curvature <= 0) {
+        results.interior_status = TrustRegionResults::Status::NegativeCurvature;
+      } else {
+        results.interior_status = TrustRegionResults::Status::OnBoundary;
+      }
+      return;
+    }
+
+    auto& zPred = Pr;  
+    smith_add(z, alphaCg, d, zPred);
+    z = zPred;
+
+    if (results.interior_status == TrustRegionResults::Status::NonDescentDirection) {
+      return;
+    }
+
+    smith_add(rCurrent, alphaCg, Hd, rCurrent);
+
+    if (P) {
+      P->Mult(rCurrent, Pr);
+    } else {
+      Pr = rCurrent;
+    }
+    
+    auto dots2 = delegate.dot_many_2(rCurrent, Pr, rCurrent, rCurrent);
+    double rPrNp1 = dots2[0];
+    double r_current_norm_squared = dots2[1];
+    
+    if (r_current_norm_squared <= cg_tol_squared && cgIter >= settings.min_cg_iterations) {
+      return;
+    }
+
+    double beta = rPrNp1 / rPr;
+    rPr = rPrNp1;
+    d *= beta;
+    d.Add(-1.0, Pr);
+
+    zz = zzNp1;
+  }
+  cgIter--; 
+}
+
+}  // namespace smith
\ No newline at end of file
diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp
new file mode 100644
index 0000000000..ad7a27e66d
--- /dev/null
+++ b/src/smith/numerics/steihaug_toint_cg.hpp
@@ -0,0 +1,140 @@
+// Copyright (c) Lawrence Livermore National Security, LLC and
+// other Smith Project Developers. See the top-level LICENSE file for
+// details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+
+#pragma once
+
+#include "mfem.hpp"
+#include <array>
+
+namespace smith {
+
+/// Internal structure for storing trust region settings
+struct TrustRegionSettings {
+  /// cg tol
+  double cg_tol = 1e-8;
+  /// min cg iters
+  size_t min_cg_iterations = 0;  //
+  /// max cg iters should be around # of system dofs
+  size_t max_cg_iterations = 10000;  //
+  /// max cumulative iterations
+  size_t max_cumulative_iteration = 1;
+  /// minimum trust region size
+  double min_tr_size = 1e-13;
+  /// trust region decrease factor
+  double t1 = 0.25;
+  /// trust region increase factor
+  double t2 = 1.75;
+  /// worse case energy drop ratio.  trust region accepted if energy drop is better than this.
+  double eta1 = 1e-9;
+  /// non-ideal energy drop ratio.  trust region decreases if energy drop is worse than this.
+  double eta2 = 0.1;
+  /// ideal energy drop ratio.  trust region increases if energy drop is better than this.
+  double eta3 = 0.6;
+  /// parameter limiting how fast the energy can drop relative to the prediction (in case the energy surrogate is poor)
+  double eta4 = 4.2;
+};
+
+/// Internal structure for storing trust region stateful data
+struct TrustRegionResults {
+  /// Constructor takes the size of the solution vector
+  TrustRegionResults(int size)
+  {
+    z.SetSize(size);
+    H_z.SetSize(size);
+    d_old.SetSize(size);
+    H_d_old.SetSize(size);
+    H_d_old_at_accept.SetSize(size);
+    d.SetSize(size);
+    H_d.SetSize(size);
+    Pr.SetSize(size);
+    cauchy_point.SetSize(size);
+    H_cauchy_point.SetSize(size);
+    z = 0.0;
+    H_z = 0.0;
+    d_old = 0.0;
+    H_d_old = 0.0;
+    H_d_old_at_accept = 0.0;
+    d = 0.0;
+    H_d = 0.0;
+    Pr = 0.0;
+    cauchy_point = 0.0;
+    H_cauchy_point = 0.0;
+  }
+
+  /// resets trust region results for a new outer iteration
+  void reset()
+  {
+    z = 0.0;
+    cauchy_point = 0.0;
+  }
+
+  /// enumerates the possible final status of the trust region steps
+  enum class Status
+  {
+    Interior,
+    NegativeCurvature,
+    OnBoundary,
+    NonDescentDirection
+  };
+
+  /// step direction
+  mfem::Vector z;
+  /// action of hessian on current step z
+  mfem::Vector H_z;
+  /// old step direction
+  mfem::Vector d_old;
+  /// action of hessian on previous step z_old
+  mfem::Vector H_d_old;
+  /// action of previous accepted hessian on previous step z_old
+  mfem::Vector H_d_old_at_accept;
+  /// true after at least one accepted line-search step has populated d_old
+  bool has_d_old = false;
+  /// incrementalCG direction
+  mfem::Vector d;
+  /// action of hessian on direction d
+  mfem::Vector H_d;
+  /// preconditioned residual
+  mfem::Vector Pr;
+  /// cauchy point
+  mfem::Vector cauchy_point;
+  /// action of hessian on direction of cauchy point
+  mfem::Vector H_cauchy_point;
+  /// specifies if step is interior, exterior, negative curvature, etc.
+  Status interior_status = Status::Interior;
+  /// iteration counter
+  size_t cg_iterations_count = 0;
+};
+
+class SteihaugTointDelegate {
+public:
+  virtual ~SteihaugTointDelegate() = default;
+
+  virtual std::array<double, 4> dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0,
+                                           const mfem::Vector& a1, const mfem::Vector& b1,
+                                           const mfem::Vector& a2, const mfem::Vector& b2,
+                                           const mfem::Vector& a3, const mfem::Vector& b3) const = 0;
+
+  virtual std::array<double, 2> dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0,
+                                           const mfem::Vector& a1, const mfem::Vector& b1) const = 0;
+
+  virtual void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd,
+                                          double dd) const = 0;
+};
+
+/**
+ * @brief Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner
+ * 
+ * This is a standard implementation of 'The Conjugate Gradient Method and Trust Regions in Large Scale Optimization' 
+ * by T. Steihaug. It is also called the Steihaug-Toint CG trust region algorithm (see also Trust Region Methods 
+ * by Conn, Gould, and Toint).
+ */
+void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent,
+                     const mfem::Operator& H, const mfem::Solver* P,
+                     const TrustRegionSettings& settings, double& trSize,
+                     TrustRegionResults& results, double r0_norm_squared, 
+                     const SteihaugTointDelegate& delegate);
+
+}  // namespace smith
\ No newline at end of file
diff --git a/src/smith/numerics/tests/CMakeLists.txt b/src/smith/numerics/tests/CMakeLists.txt
index a2577051e2..617a0a4f11 100644
--- a/src/smith/numerics/tests/CMakeLists.txt
+++ b/src/smith/numerics/tests/CMakeLists.txt
@@ -10,6 +10,7 @@ set(numerics_serial_test_sources
     test_equationsolver.cpp
     test_operator.cpp
     test_odes.cpp
+    test_steihaug_toint_cg.cpp
     test_block_preconditioner.cpp
     test_block_preconditioner_backend.cpp
     test_block_preconditioner_custom_operators.cpp
diff --git a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp
new file mode 100644
index 0000000000..755419667a
--- /dev/null
+++ b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp
@@ -0,0 +1,133 @@
+// Copyright (c) Lawrence Livermore National Security, LLC and
+// other Smith Project Developers. See the top-level LICENSE file for
+// details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+
+#include <gtest/gtest.h>
+#include "smith/numerics/steihaug_toint_cg.hpp"
+
+namespace {
+
+class TestDelegate : public smith::SteihaugTointDelegate {
+public:
+  std::array<double, 4> dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0,
+                                   const mfem::Vector& a1, const mfem::Vector& b1,
+                                   const mfem::Vector& a2, const mfem::Vector& b2,
+                                   const mfem::Vector& a3, const mfem::Vector& b3) const override
+  {
+    return {a0 * b0, a1 * b1, a2 * b2, a3 * b3};
+  }
+
+  std::array<double, 2> dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0,
+                                   const mfem::Vector& a1, const mfem::Vector& b1) const override
+  {
+    return {a0 * b0, a1 * b1};
+  }
+
+  void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd,
+                                  double dd) const override
+  {
+    double deltadelta_m_zz = delta * delta - zz;
+    if (deltadelta_m_zz <= 0) return;
+    double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd;
+    z.Add(tau, d);
+  }
+};
+
+class DiagonalOperator : public mfem::Operator {
+public:
+  DiagonalOperator(const mfem::Vector& diag) : mfem::Operator(diag.Size()), diag_(diag) {}
+  void Mult(const mfem::Vector& x, mfem::Vector& y) const override
+  {
+    for (int i = 0; i < height; ++i) {
+      y[i] = diag_[i] * x[i];
+    }
+  }
+private:
+  const mfem::Vector& diag_;
+};
+
+} // namespace
+
+TEST(SteihaugTointCG, SolvesSPDInsideBoundary)
+{
+  int size = 2;
+  mfem::Vector diag(size);
+  diag[0] = 2.0;
+  diag[1] = 4.0;
+  DiagonalOperator H(diag);
+
+  mfem::Vector r0(size);
+  r0[0] = 1.0;
+  r0[1] = 1.0;
+
+  smith::TrustRegionSettings settings;
+  settings.cg_tol = 1e-10;
+  settings.max_cg_iterations = 10;
+  
+  double trSize = 100.0; // Huge trust region
+  smith::TrustRegionResults results(size);
+  
+  mfem::Vector rCurrent(size);
+  TestDelegate delegate;
+  
+  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate);
+  
+  // Solution should be H^{-1} (-r0)
+  // x = -0.5, y = -0.25
+  EXPECT_NEAR(results.z[0], -0.5, 1e-9);
+  EXPECT_NEAR(results.z[1], -0.25, 1e-9);
+  EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::Interior);
+}
+
+TEST(SteihaugTointCG, HitsBoundary)
+{
+  int size = 1;
+  mfem::Vector diag(size);
+  diag[0] = 1.0;
+  DiagonalOperator H(diag);
+
+  mfem::Vector r0(size);
+  r0[0] = 1.0;
+
+  smith::TrustRegionSettings settings;
+  settings.max_cg_iterations = 10;
+  
+  double trSize = 0.5; // Small trust region, solution would be -1.0
+  smith::TrustRegionResults results(size);
+  
+  mfem::Vector rCurrent(size);
+  TestDelegate delegate;
+  
+  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate);
+  
+  EXPECT_NEAR(results.z.Norml2(), 0.5, 1e-9);
+  EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::OnBoundary);
+}
+
+TEST(SteihaugTointCG, DetectsNegativeCurvature)
+{
+  int size = 1;
+  mfem::Vector diag(size);
+  diag[0] = -1.0; // Negative curvature
+  DiagonalOperator H(diag);
+
+  mfem::Vector r0(size);
+  r0[0] = 1.0;
+
+  smith::TrustRegionSettings settings;
+  settings.max_cg_iterations = 10;
+  
+  double trSize = 2.0;
+  smith::TrustRegionResults results(size);
+  
+  mfem::Vector rCurrent(size);
+  TestDelegate delegate;
+  
+  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate);
+  
+  // For negative curvature, it should go to boundary
+  EXPECT_NEAR(results.z.Norml2(), 2.0, 1e-9);
+  EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::NegativeCurvature);
+}
diff --git a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
index 62c7730205..a476c02ee5 100644
--- a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
+++ b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
@@ -75,43 +75,6 @@ struct DiagonalSubspaceFixture {
 
 }  // namespace
 
-TEST(TrustRegionSubspaceMfem, RemoveDependentDirectionsDropsDuplicatesAndZero)
-{
-  mfem::Vector d1(4);
-  mfem::Vector d2(4);
-  mfem::Vector d3(4);
-  mfem::Vector hd1(4);
-  mfem::Vector hd2(4);
-  mfem::Vector hd3(4);
-
-  d1 = 0.0;
-  d2 = 0.0;
-  d3 = 0.0;
-  hd1 = 0.0;
-  hd2 = 0.0;
-  hd3 = 0.0;
-
-  d1[0] = 1.0;
-  d1[1] = 2.0;
-  d2 = d1;
-  d2 *= 3.0;
-
-  hd1[0] = 2.0;
-  hd1[1] = 5.0;
-  hd2 = hd1;
-  hd2 *= 3.0;
-
-  std::vector<const mfem::Vector*> dirs = {&d1, &d2, &d3};
-  std::vector<const mfem::Vector*> hdirs = {&hd1, &hd2, &hd3};
-
-  auto [dirs_new, hdirs_new] = smith::removeDependentDirections(dirs, hdirs);
-
-  ASSERT_EQ(dirs_new.size(), 1);
-  ASSERT_EQ(hdirs_new.size(), 1);
-  expectNearVector(*dirs_new[0], d1, 0.0);
-  expectNearVector(*hdirs_new[0], hd1, 0.0);
-}
-
 
 
 TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary)
@@ -122,8 +85,9 @@ TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary)
   const auto astates = applyDiagonalOperator(fixture.diag, states);
   const auto astate_ptrs = toPointers(astates);
 
+  mfem::Vector workspace(2000);
   auto [sol, leftvecs, leftvals, energy] =
-      smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 1);
+      smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 1, workspace);
 
   EXPECT_NEAR(sol.Norml2(), test_delta, 1.0e-12);
   EXPECT_FALSE(leftvecs.empty());
@@ -139,10 +103,11 @@ TEST(TrustRegionSubspaceMfem, GenericSolveUsesMfemBackend)
   const auto astates = applyDiagonalOperator(fixture.diag, states);
   const auto astate_ptrs = toPointers(astates);
 
+  mfem::Vector workspace(2000);
   auto [generic_sol, generic_leftvecs, generic_leftvals, generic_energy] =
-      smith::solveSubspaceProblem(states, astate_ptrs, fixture.b, test_delta, 2);
+      smith::solveSubspaceProblem(states, astate_ptrs, fixture.b, test_delta, 2, workspace);
   auto [mfem_sol, mfem_leftvecs, mfem_leftvals, mfem_energy] =
-      smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 2);
+      smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 2, workspace);
 
   expectNearVector(generic_sol, mfem_sol, 1.0e-12);
   ASSERT_EQ(generic_leftvecs.size(), mfem_leftvecs.size());
@@ -182,7 +147,8 @@ TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection)
   const auto astates = applyDiagonalOperator(diag, states);
   const auto astate_ptrs = toPointers(astates);
 
-  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemMfem(states, astate_ptrs, b, 0.25, 1);
+  mfem::Vector workspace(2000);
+  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemMfem(states, astate_ptrs, b, 0.25, 1, workspace);
 
   EXPECT_LE(sol.Norml2(), 0.25 + 1.0e-12);
   EXPECT_FALSE(leftvecs.empty());
diff --git a/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp b/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp
index 55c7a16f77..d0746e83b9 100644
--- a/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp
+++ b/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp
@@ -153,7 +153,8 @@ TEST_F(MeshFixture, PetscSubspaceSolveHitsTrustRegionBoundary)
   }
 
   double delta = 0.001;
-  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, delta, 1);
+  mfem::Vector workspace(2000);
+  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, delta, 1, workspace);
 
   EXPECT_NEAR(sol.Norml2(), delta, 1e-12);
   EXPECT_FALSE(leftvecs.empty());
@@ -190,10 +191,11 @@ TEST_F(MeshFixture, MfemSubspaceSolveMatchesPetsc)
     AstatePtrs.push_back(&Astates[i]);
   }
 
+  mfem::Vector workspace(2000);
   auto [petsc_sol, petsc_leftvecs, petsc_leftvals, petsc_energy] =
-      smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, 0.001, 2);
+      smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, 0.001, 2, workspace);
   auto [mfem_sol, mfem_leftvecs, mfem_leftvals, mfem_energy] =
-      smith::solveSubspaceProblemMfem(states, AstatePtrs, b, 0.001, 2);
+      smith::solveSubspaceProblemMfem(states, AstatePtrs, b, 0.001, 2, workspace);
 
   expectNearVector(mfem_sol, petsc_sol, 1e-10);
   ASSERT_EQ(mfem_leftvecs.size(), petsc_leftvecs.size());
diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp
index 2f5290b93f..9d26fc3c36 100644
--- a/src/smith/numerics/trust_region_solver.hpp
+++ b/src/smith/numerics/trust_region_solver.hpp
@@ -55,19 +55,16 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm
 /// and their eigenvalues, and the predicted model energy change
 TrustRegionSubspaceResult solveSubspaceProblem(
     const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
-    const mfem::Vector& b, double delta, int num_leftmost);
+    const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace);
 
 #if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE)
 TrustRegionSubspaceResult solveSubspaceProblemPetsc(
     const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
-    const mfem::Vector& b, double delta, int num_leftmost);
+    const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace);
 #endif
 
 TrustRegionSubspaceResult solveSubspaceProblemMfem(
     const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
-    const mfem::Vector& b, double delta, int num_leftmost);
-
-std::pair<std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>> removeDependentDirections(
-    std::vector<const mfem::Vector*> directions, std::vector<const mfem::Vector*> A_directions);
+    const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace);
 
 }  // namespace smith
diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp
index 6b1d324712..00ca99d7fa 100644
--- a/src/smith/physics/tests/shallow_arch_buckling.cpp
+++ b/src/smith/physics/tests/shallow_arch_buckling.cpp
@@ -47,8 +47,7 @@ NonlinearSolver selectedNonlinearSolver()
     return NonlinearSolver::TrustRegion;
   }
 
-  throw std::runtime_error("Unknown --solver value '" + solver_name +
-                           "'. Use NewtonLineSearch or TrustRegion.");
+  throw std::runtime_error("Unknown --solver value '" + solver_name + "'. Use NewtonLineSearch or TrustRegion.");
 }
 
 void parseCommandLine(int& argc, char** argv)
@@ -121,7 +120,7 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough)
                                             .preconditioner = Preconditioner::HypreJacobi,
                                             .relative_tol = 1.0e-8,
                                             .absolute_tol = 1.0e-14,
-                                            .max_iterations = 10000,
+                                            .max_iterations = 100000,
                                             .print_level = 0};
 
   smith::NonlinearSolverOptions nonlinear_options{
@@ -165,8 +164,8 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough)
   if (rank == 0) {
     mfem::out << "Compressed thin beam snap-through run: solver = " << solver_name
               << ", trust_subspace_option = " << trust_subspace_option
-              << ", trust_num_leftmost = " << trust_num_leftmost
-              << ", trust_num_past_steps = " << trust_num_past_steps << '\n';
+              << ", trust_num_leftmost = " << trust_num_leftmost << ", trust_num_past_steps = " << trust_num_past_steps
+              << '\n';
   }
 
   constexpr int num_steps = 5;
@@ -177,7 +176,6 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough)
     }
     solid.outputStateToDisk("shallow_arch_buckling");
   }
-
 }
 
 }  // namespace smith

From d426fc5b8e15f873b93771c0fd883e73108a1204 Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Sat, 9 May 2026 17:17:06 -0600
Subject: [PATCH 16/27] style.

---
 src/smith/numerics/equation_solver.cpp        | 33 ++++++-------
 .../numerics/mfem_trust_region_subspace.cpp   | 40 +++++++---------
 .../numerics/petsc_trust_region_subspace.cpp  |  3 +-
 src/smith/numerics/solver_config.hpp          |  3 +-
 src/smith/numerics/steihaug_toint_cg.cpp      | 26 +++++-----
 src/smith/numerics/steihaug_toint_cg.hpp      | 25 +++++-----
 .../numerics/tests/test_steihaug_toint_cg.cpp | 48 +++++++++----------
 .../tests/test_trust_region_solver_mfem.cpp   | 13 ++---
 src/smith/numerics/trust_region_solver.hpp    | 24 ++++++----
 9 files changed, 99 insertions(+), 116 deletions(-)

diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index cb82935b5d..0780abc23b 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -246,8 +246,6 @@ class NewtonSolver : public mfem::NewtonSolver {
   }
 };
 
-
-
 /// trust region printing utility function
 void printTrustRegionInfo(double realWork, double modelObjective, size_t cgIters, double trSize, bool willAccept)
 {
@@ -285,7 +283,7 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
   mutable mfem::Vector solve_start_x;
   mutable mfem::Vector min_residual_x;
   mutable double min_residual_norm = -1.0;
-  
+
   /// Workspace vector for exact subspace solver to avoid small allocations
   mutable mfem::Vector exact_solver_workspace;
 
@@ -332,14 +330,13 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
     std::array<int, num_pairs> sizes;
     std::array<const double*, num_pairs> ptr_a;
     std::array<const double*, num_pairs> ptr_b;
-    
+
     auto populate_arrays = [&]<std::size_t... I>(std::index_sequence<I...>) {
       ((
-        sizes[I] = std::get<2 * I>(tuple_args).Size(),
-        [&](){ MFEM_ASSERT(sizes[I] == std::get<2 * I + 1>(tuple_args).Size(), "Incompatible vector sizes."); }(),
-        ptr_a[I] = std::get<2 * I>(tuple_args).GetData(),
-        ptr_b[I] = std::get<2 * I + 1>(tuple_args).GetData()
-      ), ...);
+           sizes[I] = std::get<2 * I>(tuple_args).Size(),
+           [&]() { MFEM_ASSERT(sizes[I] == std::get<2 * I + 1>(tuple_args).Size(), "Incompatible vector sizes."); }(),
+           ptr_a[I] = std::get<2 * I>(tuple_args).GetData(), ptr_b[I] = std::get<2 * I + 1>(tuple_args).GetData()),
+       ...);
     };
     populate_arrays(std::make_index_sequence<num_pairs>{});
 
@@ -407,16 +404,15 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
     }
   }
 
-  std::array<double, 4> dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0,
-                                   const mfem::Vector& a1, const mfem::Vector& b1,
-                                   const mfem::Vector& a2, const mfem::Vector& b2,
+  std::array<double, 4> dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
+                                   const mfem::Vector& b1, const mfem::Vector& a2, const mfem::Vector& b2,
                                    const mfem::Vector& a3, const mfem::Vector& b3) const override
   {
     return dot_many(a0, b0, a1, b1, a2, b2, a3, b3);
   }
 
-  std::array<double, 2> dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0,
-                                   const mfem::Vector& a1, const mfem::Vector& b1) const override
+  std::array<double, 2> dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
+                                   const mfem::Vector& b1) const override
   {
     return dot_many(a0, b0, a1, b1);
   }
@@ -544,9 +540,9 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
   }
 
   /// Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner
-  void solveModelProblem(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H,
-                         const mfem::Solver* P, const TrustRegionSettings& settings, double& trSize,
-                         TrustRegionResults& results, double r0_norm_squared) const
+  void solveModelProblem(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P,
+                         const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results,
+                         double r0_norm_squared) const
   {
     steihaugTointCG(r0, rCurrent, H, P, settings, trSize, results, r0_norm_squared, *this);
   }
@@ -727,8 +723,7 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
         trResults.interior_status = TrustRegionResults::Status::OnBoundary;
       } else {
         settings.cg_tol = std::max(0.5 * norm_goal, 5e-5 * norm);
-        solveModelProblem(r, scratch, *this->oper, &this->tr_precond, settings, tr_size, trResults,
-                                     norm * norm);
+        solveModelProblem(r, scratch, *this->oper, &this->tr_precond, settings, tr_size, trResults, norm * norm);
       }
       cumulative_cg_iters_from_last_precond_update += trResults.cg_iterations_count;
 
diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp
index 65652e2d73..716a2700ab 100644
--- a/src/smith/numerics/mfem_trust_region_subspace.cpp
+++ b/src/smith/numerics/mfem_trust_region_subspace.cpp
@@ -31,24 +31,17 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm
 
 TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
                                                const std::vector<const mfem::Vector*>& A_directions,
-                                               const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace)
+                                               const mfem::Vector& b, double delta, int num_leftmost,
+                                               mfem::Vector& workspace)
 {
   return solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost, workspace);
 }
 
 namespace {
 
-double dot(const mfem::Vector& a, const mfem::Vector& b)
-{
-  return a * b;
-}
-
-double norm(const mfem::Vector& x)
-{
-  return x.Norml2();
-}
-
+double dot(const mfem::Vector& a, const mfem::Vector& b) { return a * b; }
 
+double norm(const mfem::Vector& x) { return x.Norml2(); }
 
 double sumAbs(const mfem::Vector& x)
 {
@@ -99,9 +92,7 @@ SubspaceProjections globalSubspaceProjectionFromLocalInnerProducts(const std::ve
 {
   const int n = static_cast<int>(states.size());
   const int triangular_size = n * (n + 1) / 2;
-  const auto triangular_index = [n](int i, int j) {
-    return i * n - (i * (i - 1)) / 2 + (j - i);
-  };
+  const auto triangular_index = [n](int i, int j) { return i * n - (i * (i - 1)) / 2 + (j - i); };
   const int sAs_offset = 0;
   const int ss_offset = triangular_size;
   const int sb_offset = 2 * triangular_size;
@@ -113,8 +104,7 @@ SubspaceProjections globalSubspaceProjectionFromLocalInnerProducts(const std::ve
     local_projection_entries[size_t(sb_offset + i)] = mfem::InnerProduct(*states[size_t(i)], b);
     for (int j = i; j < n; ++j) {
       const size_t ij = size_t(triangular_index(i, j));
-      local_projection_entries[size_t(sAs_offset) + ij] =
-          mfem::InnerProduct(*states[size_t(i)], *Astates[size_t(j)]);
+      local_projection_entries[size_t(sAs_offset) + ij] = mfem::InnerProduct(*states[size_t(i)], *Astates[size_t(j)]);
       local_projection_entries[size_t(ss_offset) + ij] = mfem::InnerProduct(*states[size_t(i)], *states[size_t(j)]);
     }
   }
@@ -227,7 +217,8 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
     throw PetscException("Exact trust region solver requires square matrices");
   }
   if (A.Height() != b.Size()) {
-    throw PetscException("The right hand size for exact trust region solve must be consistent with the input matrix size");
+    throw PetscException(
+        "The right hand size for exact trust region solve must be consistent with the input matrix size");
   }
 
   int offset = 0;
@@ -288,8 +279,10 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
     const double tau1 = -pz + std::sqrt(pz * pz + ddmpp);
     const double tau2 = -pz - std::sqrt(pz * pz + ddmpp);
 
-    mfem::Vector x1 = alloc_vector(p.Size()); x1 = p;
-    mfem::Vector x2 = alloc_vector(p.Size()); x2 = p;
+    mfem::Vector x1 = alloc_vector(p.Size());
+    x1 = p;
+    mfem::Vector x2 = alloc_vector(p.Size());
+    x2 = p;
     x1.Add(tau1, leftMost);
     x2.Add(tau2, leftMost);
 
@@ -300,7 +293,7 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
   }
 
   mfem::Vector bvbv = alloc_vector(bv.Size());
-  for(int i=0; i<bv.Size(); ++i) bvbv[i] = bv[i] * bv[i];
+  for (int i = 0; i < bv.Size(); ++i) bvbv[i] = bv[i] * bv[i];
   for (int i = 0; i < sigs.Size(); ++i) sigsPlusLam[i] = sigs[i] + lam;
 
   double pNormSq = pnormSquared(bvbv, sigsPlusLam);
@@ -394,7 +387,8 @@ mfem::Vector combineDirections(const std::vector<const mfem::Vector*>& states, c
 
 TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem::Vector*>& states,
                                                    const std::vector<const mfem::Vector*>& Astates,
-                                                   const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace)
+                                                   const mfem::Vector& b, double delta, int num_leftmost,
+                                                   mfem::Vector& workspace)
 {
   SMITH_MARK_FUNCTION;
   SubspaceProjections projections = projectSubspaceGlobally(states, Astates, b);
@@ -454,7 +448,7 @@ TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vec
   return solveSubspaceProblemPetsc(directions, A_directions, b, delta, num_leftmost);
 #else
   throw PetscException("MFEM trust-region subspace solve requires MFEM LAPACK support.");
-  return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>> {}, std::vector<double> {}, 0.0);
+  return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0.0);
 #endif
 }
 
@@ -463,7 +457,7 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem:
                                                    double, int)
 {
   throw PetscException("MFEM trust-region subspace solve requires MFEM LAPACK support.");
-  return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>> {}, std::vector<double> {}, 0.0);
+  return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0.0);
 }
 
 #endif  // MFEM_USE_LAPACK
diff --git a/src/smith/numerics/petsc_trust_region_subspace.cpp b/src/smith/numerics/petsc_trust_region_subspace.cpp
index 9669359388..bb82215528 100644
--- a/src/smith/numerics/petsc_trust_region_subspace.cpp
+++ b/src/smith/numerics/petsc_trust_region_subspace.cpp
@@ -290,7 +290,8 @@ std::vector<const mfem::Vector*> remove_at(const std::vector<const mfem::Vector*
 
 TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vector<const mfem::Vector*>& states,
                                                     const std::vector<const mfem::Vector*>& Astates,
-                                                    const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace)
+                                                    const mfem::Vector& b, double delta, int num_leftmost,
+                                                    mfem::Vector& workspace)
 {
   SMITH_MARK_FUNCTION;
   DenseMat sAs1 = dot(states, Astates);
diff --git a/src/smith/numerics/solver_config.hpp b/src/smith/numerics/solver_config.hpp
index 6cfdc53014..dc031c4d85 100644
--- a/src/smith/numerics/solver_config.hpp
+++ b/src/smith/numerics/solver_config.hpp
@@ -473,7 +473,8 @@ struct NonlinearSolverOptions {
   /// Include the displacement from current nonlinear-solve state back to the nonlinear-solve initial state.
   bool trust_use_solve_start_direction = false;
 
-  /// Include the displacement from current nonlinear-solve state to the state with the minimum residual seen so far in this nonlinear solve.
+  /// Include the displacement from current nonlinear-solve state to the state with the minimum residual seen so far in
+  /// this nonlinear solve.
   bool trust_use_min_residual_direction = false;
 
   /// Should the gradient be converted to a monolithic matrix
diff --git a/src/smith/numerics/steihaug_toint_cg.cpp b/src/smith/numerics/steihaug_toint_cg.cpp
index 9b4db432da..6c20abea17 100644
--- a/src/smith/numerics/steihaug_toint_cg.cpp
+++ b/src/smith/numerics/steihaug_toint_cg.cpp
@@ -23,13 +23,11 @@ void smith_add(const mfem::Vector& a, double b, const mfem::Vector& c, mfem::Vec
   }
 }
 
-} // namespace
+}  // namespace
 
-void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent,
-                     const mfem::Operator& H, const mfem::Solver* P,
-                     const TrustRegionSettings& settings, double& trSize,
-                     TrustRegionResults& results, double r0_norm_squared, 
-                     const SteihaugTointDelegate& delegate)
+void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P,
+                     const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results,
+                     double r0_norm_squared, const SteihaugTointDelegate& delegate)
 {
   // minimize r0@z + 0.5*z@J@z
   results.interior_status = TrustRegionResults::Status::Interior;
@@ -60,20 +58,20 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent,
 
   z = 0.0;
   double zz = 0.;
-  
+
   // rPr = dot(rCurrent, Pr)
-  auto rPr_arr = delegate.dot_many_2(rCurrent, Pr, rCurrent, Pr); // We only need the first
+  auto rPr_arr = delegate.dot_many_2(rCurrent, Pr, rCurrent, Pr);  // We only need the first
   double rPr = rPr_arr[0];
 
   for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) {
     H.Mult(d, Hd);
-    
+
     auto dots = delegate.dot_many_4(d, rCurrent, d, Hd, z, d, d, d);
     double descent_check = dots[0];
     double curvature = dots[1];
     double zd = dots[2];
     double dd = dots[3];
-    
+
     if (descent_check > 0) {
       d *= -1;
       Hd *= -1;
@@ -97,7 +95,7 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent,
       return;
     }
 
-    auto& zPred = Pr;  
+    auto& zPred = Pr;
     smith_add(z, alphaCg, d, zPred);
     z = zPred;
 
@@ -112,11 +110,11 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent,
     } else {
       Pr = rCurrent;
     }
-    
+
     auto dots2 = delegate.dot_many_2(rCurrent, Pr, rCurrent, rCurrent);
     double rPrNp1 = dots2[0];
     double r_current_norm_squared = dots2[1];
-    
+
     if (r_current_norm_squared <= cg_tol_squared && cgIter >= settings.min_cg_iterations) {
       return;
     }
@@ -128,7 +126,7 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent,
 
     zz = zzNp1;
   }
-  cgIter--; 
+  cgIter--;
 }
 
 }  // namespace smith
\ No newline at end of file
diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp
index ad7a27e66d..a2a6087073 100644
--- a/src/smith/numerics/steihaug_toint_cg.hpp
+++ b/src/smith/numerics/steihaug_toint_cg.hpp
@@ -109,16 +109,15 @@ struct TrustRegionResults {
 };
 
 class SteihaugTointDelegate {
-public:
+ public:
   virtual ~SteihaugTointDelegate() = default;
 
-  virtual std::array<double, 4> dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0,
-                                           const mfem::Vector& a1, const mfem::Vector& b1,
-                                           const mfem::Vector& a2, const mfem::Vector& b2,
+  virtual std::array<double, 4> dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
+                                           const mfem::Vector& b1, const mfem::Vector& a2, const mfem::Vector& b2,
                                            const mfem::Vector& a3, const mfem::Vector& b3) const = 0;
 
-  virtual std::array<double, 2> dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0,
-                                           const mfem::Vector& a1, const mfem::Vector& b1) const = 0;
+  virtual std::array<double, 2> dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
+                                           const mfem::Vector& b1) const = 0;
 
   virtual void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd,
                                           double dd) const = 0;
@@ -126,15 +125,13 @@ class SteihaugTointDelegate {
 
 /**
  * @brief Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner
- * 
- * This is a standard implementation of 'The Conjugate Gradient Method and Trust Regions in Large Scale Optimization' 
- * by T. Steihaug. It is also called the Steihaug-Toint CG trust region algorithm (see also Trust Region Methods 
+ *
+ * This is a standard implementation of 'The Conjugate Gradient Method and Trust Regions in Large Scale Optimization'
+ * by T. Steihaug. It is also called the Steihaug-Toint CG trust region algorithm (see also Trust Region Methods
  * by Conn, Gould, and Toint).
  */
-void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent,
-                     const mfem::Operator& H, const mfem::Solver* P,
-                     const TrustRegionSettings& settings, double& trSize,
-                     TrustRegionResults& results, double r0_norm_squared, 
-                     const SteihaugTointDelegate& delegate);
+void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P,
+                     const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results,
+                     double r0_norm_squared, const SteihaugTointDelegate& delegate);
 
 }  // namespace smith
\ No newline at end of file
diff --git a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp
index 755419667a..bd48fcbba4 100644
--- a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp
+++ b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp
@@ -10,17 +10,16 @@
 namespace {
 
 class TestDelegate : public smith::SteihaugTointDelegate {
-public:
-  std::array<double, 4> dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0,
-                                   const mfem::Vector& a1, const mfem::Vector& b1,
-                                   const mfem::Vector& a2, const mfem::Vector& b2,
+ public:
+  std::array<double, 4> dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
+                                   const mfem::Vector& b1, const mfem::Vector& a2, const mfem::Vector& b2,
                                    const mfem::Vector& a3, const mfem::Vector& b3) const override
   {
     return {a0 * b0, a1 * b1, a2 * b2, a3 * b3};
   }
 
-  std::array<double, 2> dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0,
-                                   const mfem::Vector& a1, const mfem::Vector& b1) const override
+  std::array<double, 2> dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
+                                   const mfem::Vector& b1) const override
   {
     return {a0 * b0, a1 * b1};
   }
@@ -36,7 +35,7 @@ class TestDelegate : public smith::SteihaugTointDelegate {
 };
 
 class DiagonalOperator : public mfem::Operator {
-public:
+ public:
   DiagonalOperator(const mfem::Vector& diag) : mfem::Operator(diag.Size()), diag_(diag) {}
   void Mult(const mfem::Vector& x, mfem::Vector& y) const override
   {
@@ -44,11 +43,12 @@ class DiagonalOperator : public mfem::Operator {
       y[i] = diag_[i] * x[i];
     }
   }
-private:
+
+ private:
   const mfem::Vector& diag_;
 };
 
-} // namespace
+}  // namespace
 
 TEST(SteihaugTointCG, SolvesSPDInsideBoundary)
 {
@@ -65,15 +65,15 @@ TEST(SteihaugTointCG, SolvesSPDInsideBoundary)
   smith::TrustRegionSettings settings;
   settings.cg_tol = 1e-10;
   settings.max_cg_iterations = 10;
-  
-  double trSize = 100.0; // Huge trust region
+
+  double trSize = 100.0;  // Huge trust region
   smith::TrustRegionResults results(size);
-  
+
   mfem::Vector rCurrent(size);
   TestDelegate delegate;
-  
+
   smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate);
-  
+
   // Solution should be H^{-1} (-r0)
   // x = -0.5, y = -0.25
   EXPECT_NEAR(results.z[0], -0.5, 1e-9);
@@ -93,15 +93,15 @@ TEST(SteihaugTointCG, HitsBoundary)
 
   smith::TrustRegionSettings settings;
   settings.max_cg_iterations = 10;
-  
-  double trSize = 0.5; // Small trust region, solution would be -1.0
+
+  double trSize = 0.5;  // Small trust region, solution would be -1.0
   smith::TrustRegionResults results(size);
-  
+
   mfem::Vector rCurrent(size);
   TestDelegate delegate;
-  
+
   smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate);
-  
+
   EXPECT_NEAR(results.z.Norml2(), 0.5, 1e-9);
   EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::OnBoundary);
 }
@@ -110,7 +110,7 @@ TEST(SteihaugTointCG, DetectsNegativeCurvature)
 {
   int size = 1;
   mfem::Vector diag(size);
-  diag[0] = -1.0; // Negative curvature
+  diag[0] = -1.0;  // Negative curvature
   DiagonalOperator H(diag);
 
   mfem::Vector r0(size);
@@ -118,15 +118,15 @@ TEST(SteihaugTointCG, DetectsNegativeCurvature)
 
   smith::TrustRegionSettings settings;
   settings.max_cg_iterations = 10;
-  
+
   double trSize = 2.0;
   smith::TrustRegionResults results(size);
-  
+
   mfem::Vector rCurrent(size);
   TestDelegate delegate;
-  
+
   smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate);
-  
+
   // For negative curvature, it should go to boundary
   EXPECT_NEAR(results.z.Norml2(), 2.0, 1e-9);
   EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::NegativeCurvature);
diff --git a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
index a476c02ee5..9a1657fd81 100644
--- a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
+++ b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
@@ -18,7 +18,8 @@ namespace {
 constexpr int test_size = 5;
 constexpr double test_delta = 1.0e-3;
 
-std::vector<mfem::Vector> applyDiagonalOperator(const mfem::Vector& diag, const std::vector<const mfem::Vector*>& states)
+std::vector<mfem::Vector> applyDiagonalOperator(const mfem::Vector& diag,
+                                                const std::vector<const mfem::Vector*>& states)
 {
   std::vector<mfem::Vector> out;
   out.reserve(states.size());
@@ -50,12 +51,7 @@ std::vector<const mfem::Vector*> toPointers(const std::vector<mfem::Vector>& vec
 }
 
 struct DiagonalSubspaceFixture {
-  DiagonalSubspaceFixture(int size)
-      : u1(size),
-        u2(size),
-        u3(size),
-        diag(size),
-        b(size)
+  DiagonalSubspaceFixture(int size) : u1(size), u2(size), u3(size), diag(size), b(size)
   {
     u1 = 1.0;
     for (int i = 0; i < size; ++i) {
@@ -75,8 +71,6 @@ struct DiagonalSubspaceFixture {
 
 }  // namespace
 
-
-
 TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary)
 {
   DiagonalSubspaceFixture fixture(test_size);
@@ -156,7 +150,6 @@ TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection)
   EXPECT_LT(energy, 0.0);
 }
 
-
 int main(int argc, char* argv[])
 {
   ::testing::InitGoogleTest(&argc, argv);
diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp
index 9d26fc3c36..a02824e44a 100644
--- a/src/smith/numerics/trust_region_solver.hpp
+++ b/src/smith/numerics/trust_region_solver.hpp
@@ -37,7 +37,8 @@ class PetscException : public std::exception {
   std::string msg;
 };
 
-enum class TrustRegionSubspaceBackend {
+enum class TrustRegionSubspaceBackend
+{
   Petsc,
   Mfem
 };
@@ -53,18 +54,21 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm
 
 /// @brief returns the solution, as well as a list of the N leftmost eigenvectors
 /// and their eigenvalues, and the predicted model energy change
-TrustRegionSubspaceResult solveSubspaceProblem(
-    const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
-    const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace);
+TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
+                                               const std::vector<const mfem::Vector*>& A_directions,
+                                               const mfem::Vector& b, double delta, int num_leftmost,
+                                               mfem::Vector& workspace);
 
 #if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE)
-TrustRegionSubspaceResult solveSubspaceProblemPetsc(
-    const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
-    const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace);
+TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vector<const mfem::Vector*>& directions,
+                                                    const std::vector<const mfem::Vector*>& A_directions,
+                                                    const mfem::Vector& b, double delta, int num_leftmost,
+                                                    mfem::Vector& workspace);
 #endif
 
-TrustRegionSubspaceResult solveSubspaceProblemMfem(
-    const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
-    const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace);
+TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem::Vector*>& directions,
+                                                   const std::vector<const mfem::Vector*>& A_directions,
+                                                   const mfem::Vector& b, double delta, int num_leftmost,
+                                                   mfem::Vector& workspace);
 
 }  // namespace smith

From 8106675616df2240b2d8d1682c36cc9e9a83a16a Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Sat, 9 May 2026 17:48:38 -0600
Subject: [PATCH 17/27] Simplify solver changes, use SLIC in test.

---
 src/smith/numerics/equation_solver.cpp        | 31 ++-----------------
 .../numerics/mfem_trust_region_subspace.cpp   | 25 +++++++--------
 .../numerics/petsc_trust_region_subspace.cpp  | 11 +++----
 src/smith/numerics/steihaug_toint_cg.cpp      |  4 +--
 src/smith/numerics/steihaug_toint_cg.hpp      |  6 +---
 .../tests/test_trust_region_solver_mfem.cpp   | 11 +++----
 .../tests/test_trust_region_solver_petsc.cpp  |  8 ++---
 src/smith/numerics/trust_region_solver.hpp    | 20 ++++--------
 .../physics/tests/shallow_arch_buckling.cpp   | 22 ++++++-------
 9 files changed, 43 insertions(+), 95 deletions(-)

diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index 0780abc23b..6f34db7fc2 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -275,8 +275,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
   mutable std::vector<std::shared_ptr<mfem::Vector>> left_mosts;
   /// the action of the stiffness/hessian (H) on the left most eigenvectors
   mutable std::vector<std::shared_ptr<mfem::Vector>> H_left_mosts;
-  /// previous accepted-iteration Hessian actions on the retained left most eigenvectors
-  mutable std::vector<std::shared_ptr<mfem::Vector>> previous_H_left_mosts;
   /// accepted TrustRegion steps, newest first
   mutable std::deque<std::shared_ptr<mfem::Vector>> accepted_step_history;
   /// initial state for this nonlinear solve, used as an optional history direction
@@ -284,9 +282,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
   mutable mfem::Vector min_residual_x;
   mutable double min_residual_norm = -1.0;
 
-  /// Workspace vector for exact subspace solver to avoid small allocations
-  mutable mfem::Vector exact_solver_workspace;
-
   /// nonlinear solution options
   NonlinearSolverOptions nonlinear_options;
   /// linear solution options
@@ -461,11 +456,8 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
     double energy_change;
 
     try {
-      if (exact_solver_workspace.Size() < 2000) {
-        exact_solver_workspace.SetSize(2000);
-      }
       std::tie(sol, leftvecs, leftvals, energy_change) =
-          solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost, exact_solver_workspace);
+          solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost);
     } catch (const std::exception& e) {
       if (print_level >= 1) {
         mfem::out << "subspace solve failed with " << e.what() << std::endl;
@@ -547,20 +539,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
     steihaugTointCG(r0, rCurrent, H, P, settings, trSize, results, r0_norm_squared, *this);
   }
 
-  std::unique_ptr<mfem::Operator> cloneAssembledOperator(const mfem::Operator& op) const
-  {
-    if (const auto* hypre_matrix = dynamic_cast<const mfem::HypreParMatrix*>(&op)) {
-      return std::make_unique<mfem::HypreParMatrix>(*hypre_matrix);
-    }
-    if (const auto* sparse_matrix = dynamic_cast<const mfem::SparseMatrix*>(&op)) {
-      return std::make_unique<mfem::SparseMatrix>(*sparse_matrix);
-    }
-    if (const auto* block_operator = dynamic_cast<const mfem::BlockOperator*>(&op)) {
-      return buildMonolithicMatrix(*block_operator);
-    }
-    return nullptr;
-  }
-
   /// assemble the jacobian
   void assembleJacobian(const mfem::Vector& x) const
   {
@@ -615,7 +593,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
     solve_start_x = X;
     min_residual_x.SetSize(X.Size());
     min_residual_x = X;
-    previous_H_left_mosts.clear();
 
     real_t norm, norm_goal = 0.0;
     norm = initial_norm = computeResidual(X, r);
@@ -723,7 +700,7 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
         trResults.interior_status = TrustRegionResults::Status::OnBoundary;
       } else {
         settings.cg_tol = std::max(0.5 * norm_goal, 5e-5 * norm);
-        solveModelProblem(r, scratch, *this->oper, &this->tr_precond, settings, tr_size, trResults, norm * norm);
+        solveModelProblem(r, scratch, *grad, &this->tr_precond, settings, tr_size, trResults, norm * norm);
       }
       cumulative_cg_iters_from_last_precond_update += trResults.cg_iterations_count;
 
@@ -761,7 +738,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
 
           if (!have_computed_H_left_mosts) {
             have_computed_H_left_mosts = true;
-            previous_H_left_mosts = H_left_mosts;
             H_left_mosts.clear();
             std::vector<const mfem::Vector*> leftmost_inputs;
             std::vector<mfem::Vector*> leftmost_outputs;
@@ -827,7 +803,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
               H_min_residual_direction.SetSize(X.Size());
               std::vector<const mfem::Vector*> min_res_inputs{&min_residual_direction};
               std::vector<mfem::Vector*> min_res_outputs{&H_min_residual_direction};
-              // Reusing solve_start counters for now
               batchedSubspaceHessVec(hess_vec_func, min_res_inputs, min_res_outputs);
               ds.push_back(&min_residual_direction);
               H_ds.push_back(&H_min_residual_direction);
@@ -861,7 +836,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
 
         if (normPred <= norm_goal) {
           trResults.d_old = trResults.d;
-          trResults.H_d_old_at_accept = trResults.H_d;
           trResults.has_d_old = true;
           pushAcceptedStepHistory(trResults.d);
           if (!candidate_left_mosts.empty()) {
@@ -918,7 +892,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
 
         if (willAccept) {
           trResults.d_old = trResults.d;
-          trResults.H_d_old_at_accept = trResults.H_d;
           trResults.has_d_old = true;
           pushAcceptedStepHistory(trResults.d);
           if (!candidate_left_mosts.empty()) {
diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp
index 716a2700ab..ac66e814c3 100644
--- a/src/smith/numerics/mfem_trust_region_subspace.cpp
+++ b/src/smith/numerics/mfem_trust_region_subspace.cpp
@@ -31,10 +31,9 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm
 
 TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
                                                const std::vector<const mfem::Vector*>& A_directions,
-                                               const mfem::Vector& b, double delta, int num_leftmost,
-                                               mfem::Vector& workspace)
+                                               const mfem::Vector& b, double delta, int num_leftmost)
 {
-  return solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost, workspace);
+  return solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost);
 }
 
 namespace {
@@ -211,16 +210,17 @@ mfem::DenseMatrix columnsToMatrix(const std::vector<mfem::Vector>& cols)
  *         - A boolean indicating success.
  */
 std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> exactTrustRegionSolve(
-    mfem::DenseMatrix A, const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace)
+    mfem::DenseMatrix A, const mfem::Vector& b, double delta, int num_leftmost)
 {
   if (A.Height() != A.Width()) {
-    throw PetscException("Exact trust region solver requires square matrices");
+    throw TrustRegionException("Exact trust region solver requires square matrices");
   }
   if (A.Height() != b.Size()) {
-    throw PetscException(
+    throw TrustRegionException(
         "The right hand size for exact trust region solve must be consistent with the input matrix size");
   }
 
+  mfem::Vector workspace(b.Size() * b.Size() + 8 * b.Size());
   int offset = 0;
   auto alloc_vector = [&](int size) {
     mfem::Vector v(workspace.GetData() + offset, size);
@@ -387,8 +387,7 @@ mfem::Vector combineDirections(const std::vector<const mfem::Vector*>& states, c
 
 TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem::Vector*>& states,
                                                    const std::vector<const mfem::Vector*>& Astates,
-                                                   const mfem::Vector& b, double delta, int num_leftmost,
-                                                   mfem::Vector& workspace)
+                                                   const mfem::Vector& b, double delta, int num_leftmost)
 {
   SMITH_MARK_FUNCTION;
   SubspaceProjections projections = projectSubspaceGlobally(states, Astates, b);
@@ -398,7 +397,7 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem:
   for (int i = 0; i < sAs.Height(); ++i) {
     for (int j = 0; j < sAs.Width(); ++j) {
       if (std::isnan(sAs(i, j))) {
-        throw PetscException("States in subspace solve contain NaNs.");
+        throw TrustRegionException("States in subspace solve contain NaNs.");
       }
     }
   }
@@ -414,7 +413,7 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem:
     return std::make_tuple(sol, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0.0);
   }
   if (T.Width() == 0) {
-    throw PetscException("No independent directions in MFEM subspace solve.");
+    throw TrustRegionException("No independent directions in MFEM subspace solve.");
   }
   mfem::DenseMatrix pAp = tripleProduct(T, sAs, T);
   symmetrize(pAp);
@@ -422,7 +421,7 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem:
   const mfem::Vector& sb = projections.sb;
   const mfem::Vector pb = projectWithTranspose(T, sb);
 
-  auto [reduced_x, leftvecs, leftvals, success] = exactTrustRegionSolve(pAp, pb, delta, num_leftmost, workspace);
+  auto [reduced_x, leftvecs, leftvals, success] = exactTrustRegionSolve(pAp, pb, delta, num_leftmost);
   (void)success;
   const double energy = quadraticEnergy(pAp, pb, reduced_x);
 
@@ -447,7 +446,7 @@ TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vec
 #if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE)
   return solveSubspaceProblemPetsc(directions, A_directions, b, delta, num_leftmost);
 #else
-  throw PetscException("MFEM trust-region subspace solve requires MFEM LAPACK support.");
+  throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support.");
   return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0.0);
 #endif
 }
@@ -456,7 +455,7 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem:
                                                    const std::vector<const mfem::Vector*>&, const mfem::Vector& b,
                                                    double, int)
 {
-  throw PetscException("MFEM trust-region subspace solve requires MFEM LAPACK support.");
+  throw TrustRegionException("MFEM trust-region subspace solve requires MFEM LAPACK support.");
   return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0.0);
 }
 
diff --git a/src/smith/numerics/petsc_trust_region_subspace.cpp b/src/smith/numerics/petsc_trust_region_subspace.cpp
index bb82215528..6883f2571c 100644
--- a/src/smith/numerics/petsc_trust_region_subspace.cpp
+++ b/src/smith/numerics/petsc_trust_region_subspace.cpp
@@ -141,7 +141,7 @@ auto qr(const std::vector<const mfem::Vector*>& states)
   MatCreateSeqDense(PETSC_COMM_SELF, num_cols, num_cols, NULL, &R);
   auto error = BVOrthogonalize(Q, R);
 
-  if (error) throw PetscException("BVOrthogonalize failed.");
+  if (error) throw TrustRegionException("BVOrthogonalize failed.");
 
   return std::make_pair(Q, DenseMat(R));
 }
@@ -290,21 +290,20 @@ std::vector<const mfem::Vector*> remove_at(const std::vector<const mfem::Vector*
 
 TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vector<const mfem::Vector*>& states,
                                                     const std::vector<const mfem::Vector*>& Astates,
-                                                    const mfem::Vector& b, double delta, int num_leftmost,
-                                                    mfem::Vector& workspace)
+                                                    const mfem::Vector& b, double delta, int num_leftmost)
 {
   SMITH_MARK_FUNCTION;
   DenseMat sAs1 = dot(states, Astates);
   DenseMat sAs = sym(sAs1);
 
   if (sAs.hasNan()) {
-    throw PetscException("States in subspace solve contain NaNs.");
+    throw TrustRegionException("States in subspace solve contain NaNs.");
   }
 
   auto [Q_parallel, R] = qr(states);
 
   if (R.hasNan()) {
-    throw PetscException("R from qr returning with a NaN.");
+    throw TrustRegionException("R from qr returning with a NaN.");
   }
 
   auto [rows, cols] = R.size();
@@ -319,7 +318,7 @@ TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vector<const mfem
     if (R(i, i) < 1e-9 * trace_mag) {
       auto statesNew = remove_at(states, size_t(i));
       auto AstatesNew = remove_at(Astates, size_t(i));
-      return solveSubspaceProblemPetsc(statesNew, AstatesNew, b, delta, num_leftmost, workspace);
+      return solveSubspaceProblemPetsc(statesNew, AstatesNew, b, delta, num_leftmost);
     }
   }
 
diff --git a/src/smith/numerics/steihaug_toint_cg.cpp b/src/smith/numerics/steihaug_toint_cg.cpp
index 6c20abea17..546fcd81e5 100644
--- a/src/smith/numerics/steihaug_toint_cg.cpp
+++ b/src/smith/numerics/steihaug_toint_cg.cpp
@@ -13,8 +13,6 @@ namespace {
 void smith_add(const mfem::Vector& a, double b, const mfem::Vector& c, mfem::Vector& out)
 {
   if (out.GetData() == c.GetData()) {
-    // We expect out and c are often the same vector memory (zPred = Pr, z += alpha * d)
-    // Wait, add(a, b, c, out) means out = a + b*c
     out = a;
     out.Add(b, c);
   } else {
@@ -129,4 +127,4 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem:
   cgIter--;
 }
 
-}  // namespace smith
\ No newline at end of file
+}  // namespace smith
diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp
index a2a6087073..8ace278a61 100644
--- a/src/smith/numerics/steihaug_toint_cg.hpp
+++ b/src/smith/numerics/steihaug_toint_cg.hpp
@@ -46,7 +46,6 @@ struct TrustRegionResults {
     H_z.SetSize(size);
     d_old.SetSize(size);
     H_d_old.SetSize(size);
-    H_d_old_at_accept.SetSize(size);
     d.SetSize(size);
     H_d.SetSize(size);
     Pr.SetSize(size);
@@ -56,7 +55,6 @@ struct TrustRegionResults {
     H_z = 0.0;
     d_old = 0.0;
     H_d_old = 0.0;
-    H_d_old_at_accept = 0.0;
     d = 0.0;
     H_d = 0.0;
     Pr = 0.0;
@@ -88,8 +86,6 @@ struct TrustRegionResults {
   mfem::Vector d_old;
   /// action of hessian on previous step z_old
   mfem::Vector H_d_old;
-  /// action of previous accepted hessian on previous step z_old
-  mfem::Vector H_d_old_at_accept;
   /// true after at least one accepted line-search step has populated d_old
   bool has_d_old = false;
   /// incrementalCG direction
@@ -134,4 +130,4 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem:
                      const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results,
                      double r0_norm_squared, const SteihaugTointDelegate& delegate);
 
-}  // namespace smith
\ No newline at end of file
+}  // namespace smith
diff --git a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
index 9a1657fd81..cb5fc328af 100644
--- a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
+++ b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
@@ -79,9 +79,8 @@ TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary)
   const auto astates = applyDiagonalOperator(fixture.diag, states);
   const auto astate_ptrs = toPointers(astates);
 
-  mfem::Vector workspace(2000);
   auto [sol, leftvecs, leftvals, energy] =
-      smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 1, workspace);
+      smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 1);
 
   EXPECT_NEAR(sol.Norml2(), test_delta, 1.0e-12);
   EXPECT_FALSE(leftvecs.empty());
@@ -97,11 +96,10 @@ TEST(TrustRegionSubspaceMfem, GenericSolveUsesMfemBackend)
   const auto astates = applyDiagonalOperator(fixture.diag, states);
   const auto astate_ptrs = toPointers(astates);
 
-  mfem::Vector workspace(2000);
   auto [generic_sol, generic_leftvecs, generic_leftvals, generic_energy] =
-      smith::solveSubspaceProblem(states, astate_ptrs, fixture.b, test_delta, 2, workspace);
+      smith::solveSubspaceProblem(states, astate_ptrs, fixture.b, test_delta, 2);
   auto [mfem_sol, mfem_leftvecs, mfem_leftvals, mfem_energy] =
-      smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 2, workspace);
+      smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 2);
 
   expectNearVector(generic_sol, mfem_sol, 1.0e-12);
   ASSERT_EQ(generic_leftvecs.size(), mfem_leftvecs.size());
@@ -141,8 +139,7 @@ TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection)
   const auto astates = applyDiagonalOperator(diag, states);
   const auto astate_ptrs = toPointers(astates);
 
-  mfem::Vector workspace(2000);
-  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemMfem(states, astate_ptrs, b, 0.25, 1, workspace);
+  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemMfem(states, astate_ptrs, b, 0.25, 1);
 
   EXPECT_LE(sol.Norml2(), 0.25 + 1.0e-12);
   EXPECT_FALSE(leftvecs.empty());
diff --git a/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp b/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp
index d0746e83b9..55c7a16f77 100644
--- a/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp
+++ b/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp
@@ -153,8 +153,7 @@ TEST_F(MeshFixture, PetscSubspaceSolveHitsTrustRegionBoundary)
   }
 
   double delta = 0.001;
-  mfem::Vector workspace(2000);
-  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, delta, 1, workspace);
+  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, delta, 1);
 
   EXPECT_NEAR(sol.Norml2(), delta, 1e-12);
   EXPECT_FALSE(leftvecs.empty());
@@ -191,11 +190,10 @@ TEST_F(MeshFixture, MfemSubspaceSolveMatchesPetsc)
     AstatePtrs.push_back(&Astates[i]);
   }
 
-  mfem::Vector workspace(2000);
   auto [petsc_sol, petsc_leftvecs, petsc_leftvals, petsc_energy] =
-      smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, 0.001, 2, workspace);
+      smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, 0.001, 2);
   auto [mfem_sol, mfem_leftvecs, mfem_leftvals, mfem_energy] =
-      smith::solveSubspaceProblemMfem(states, AstatePtrs, b, 0.001, 2, workspace);
+      smith::solveSubspaceProblemMfem(states, AstatePtrs, b, 0.001, 2);
 
   expectNearVector(mfem_sol, petsc_sol, 1e-10);
   ASSERT_EQ(mfem_leftvecs.size(), petsc_leftvecs.size());
diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp
index a02824e44a..7cbd1463aa 100644
--- a/src/smith/numerics/trust_region_solver.hpp
+++ b/src/smith/numerics/trust_region_solver.hpp
@@ -14,6 +14,7 @@
 
 #include "smith/smith_config.hpp"
 
+#include <exception>
 #include <memory>
 #include <string>
 #include <tuple>
@@ -24,10 +25,10 @@
 
 namespace smith {
 
-class PetscException : public std::exception {
+class TrustRegionException : public std::exception {
  public:
   /// constructor
-  PetscException(const std::string& message) : msg(message) {}
+  TrustRegionException(const std::string& message) : msg(message) {}
 
   /// what is message
   const char* what() const noexcept override { return msg.c_str(); }
@@ -37,12 +38,6 @@ class PetscException : public std::exception {
   std::string msg;
 };
 
-enum class TrustRegionSubspaceBackend
-{
-  Petsc,
-  Mfem
-};
-
 using TrustRegionSubspaceResult =
     std::tuple<mfem::Vector, std::vector<std::shared_ptr<mfem::Vector>>, std::vector<double>, double>;
 
@@ -56,19 +51,16 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm
 /// and their eigenvalues, and the predicted model energy change
 TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
                                                const std::vector<const mfem::Vector*>& A_directions,
-                                               const mfem::Vector& b, double delta, int num_leftmost,
-                                               mfem::Vector& workspace);
+                                               const mfem::Vector& b, double delta, int num_leftmost);
 
 #if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE)
 TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vector<const mfem::Vector*>& directions,
                                                     const std::vector<const mfem::Vector*>& A_directions,
-                                                    const mfem::Vector& b, double delta, int num_leftmost,
-                                                    mfem::Vector& workspace);
+                                                    const mfem::Vector& b, double delta, int num_leftmost);
 #endif
 
 TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem::Vector*>& directions,
                                                    const std::vector<const mfem::Vector*>& A_directions,
-                                                   const mfem::Vector& b, double delta, int num_leftmost,
-                                                   mfem::Vector& workspace);
+                                                   const mfem::Vector& b, double delta, int num_leftmost);
 
 }  // namespace smith
diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp
index 00ca99d7fa..b554a11efd 100644
--- a/src/smith/physics/tests/shallow_arch_buckling.cpp
+++ b/src/smith/physics/tests/shallow_arch_buckling.cpp
@@ -4,6 +4,7 @@
 //
 // SPDX-License-Identifier: (BSD-3-Clause)
 
+#include <format>
 #include <memory>
 #include <stdexcept>
 #include <string>
@@ -14,6 +15,7 @@
 #include "mfem.hpp"
 
 #include "smith/infrastructure/application_manager.hpp"
+#include "smith/infrastructure/logger.hpp"
 #include "smith/numerics/functional/domain.hpp"
 #include "smith/numerics/functional/tensor.hpp"
 #include "smith/numerics/solver_config.hpp"
@@ -86,13 +88,11 @@ void parseCommandLine(int& argc, char** argv)
 TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough)
 {
   MPI_Barrier(MPI_COMM_WORLD);
-  int rank = 0;
-  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 
   constexpr int p = 1;
   constexpr int dim = 2;
-  constexpr int nx = 150;
-  constexpr int ny = 6;
+  constexpr int nx = 120;
+  constexpr int ny = 5;
 
   axom::sidre::DataStore datastore;
   smith::StateManager::initialize(datastore, "shallow_arch_buckling");
@@ -161,19 +161,15 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough)
   solid.completeSetup();
   solid.outputStateToDisk("shallow_arch_buckling");
 
-  if (rank == 0) {
-    mfem::out << "Compressed thin beam snap-through run: solver = " << solver_name
-              << ", trust_subspace_option = " << trust_subspace_option
-              << ", trust_num_leftmost = " << trust_num_leftmost << ", trust_num_past_steps = " << trust_num_past_steps
-              << '\n';
-  }
+  SLIC_INFO_ROOT(
+      std::format("Compressed thin beam snap-through run: solver = {}, trust_subspace_option = {}, "
+                  "trust_num_leftmost = {}, trust_num_past_steps = {}",
+                  solver_name, trust_subspace_option, trust_num_leftmost, trust_num_past_steps));
 
   constexpr int num_steps = 5;
   for (int step = 0; step < num_steps; ++step) {
     solid.advanceTimestep(1.0 / num_steps);
-    if (rank == 0) {
-      mfem::out << "Load step " << step + 1 << "/" << num_steps << '\n';
-    }
+    SLIC_INFO_ROOT(std::format("Load step {}/{}", step + 1, num_steps));
     solid.outputStateToDisk("shallow_arch_buckling");
   }
 }

From dae08a6cda222f9f47229fca46b31cf55b39fb76 Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Sat, 9 May 2026 18:16:42 -0600
Subject: [PATCH 18/27] Docs and style.

---
 src/smith/numerics/equation_solver.cpp              | 8 ++++++++
 src/smith/numerics/functional/differentiate_wrt.hpp | 8 ++++----
 src/smith/numerics/mfem_trust_region_subspace.cpp   | 1 +
 src/smith/numerics/steihaug_toint_cg.hpp            | 5 +++++
 src/smith/numerics/trust_region_solver.hpp          | 2 ++
 5 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index 6f34db7fc2..f5f205c9fe 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -279,7 +279,9 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
   mutable std::deque<std::shared_ptr<mfem::Vector>> accepted_step_history;
   /// initial state for this nonlinear solve, used as an optional history direction
   mutable mfem::Vector solve_start_x;
+  /// state with the lowest residual norm seen in this nonlinear solve
   mutable mfem::Vector min_residual_x;
+  /// lowest residual norm seen in this nonlinear solve
   mutable double min_residual_norm = -1.0;
 
   /// nonlinear solution options
@@ -304,6 +306,7 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
   }
 #endif
 
+  /// compute several vector inner products with a single MPI reduction when possible
   template <typename... Args>
   std::array<double, sizeof...(Args) / 2> dot_many(const Args&... args) const
   {
@@ -371,6 +374,7 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
     return products;
   }
 
+  /// apply Hessian-vector products for all supplied subspace directions
   template <typename HessVecFunc>
   void batchedSubspaceHessVec(HessVecFunc hess_vec_func, const std::vector<const mfem::Vector*>& inputs,
                               const std::vector<mfem::Vector*>& outputs) const
@@ -385,6 +389,7 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
     }
   }
 
+  /// store accepted steps for optional later subspace enrichment
   void pushAcceptedStepHistory(const mfem::Vector& step) const
   {
     if (nonlinear_options.trust_num_past_steps <= 0) {
@@ -399,6 +404,7 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
     }
   }
 
+  /// SteihaugTointDelegate implementation for four inner products.
   std::array<double, 4> dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
                                    const mfem::Vector& b1, const mfem::Vector& a2, const mfem::Vector& b2,
                                    const mfem::Vector& a3, const mfem::Vector& b3) const override
@@ -406,12 +412,14 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
     return dot_many(a0, b0, a1, b1, a2, b2, a3, b3);
   }
 
+  /// SteihaugTointDelegate implementation for two inner products.
   std::array<double, 2> dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
                                    const mfem::Vector& b1) const override
   {
     return dot_many(a0, b0, a1, b1);
   }
 
+  /// SteihaugTointDelegate implementation for projecting to the trust-region boundary.
   void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd,
                                   double dd) const override
   {
diff --git a/src/smith/numerics/functional/differentiate_wrt.hpp b/src/smith/numerics/functional/differentiate_wrt.hpp
index d72a0bfdd0..a3c71102bc 100644
--- a/src/smith/numerics/functional/differentiate_wrt.hpp
+++ b/src/smith/numerics/functional/differentiate_wrt.hpp
@@ -15,19 +15,19 @@ template <uint32_t i>
 struct DifferentiateWRT {};
 
 /**
- * @brief this type exists solely as a way to signal to `smith::Functional` that the function
- * smith::Functional::operator()` should differentiate w.r.t. a specific argument
+ * @brief this type exists solely as a way to signal to smith::Functional that the function
+ * smith::Functional::operator() should differentiate w.r.t. a specific argument
  */
 struct differentiate_wrt_this {
   const mfem::Vector& ref;  ///< the actual data wrapped by this type
 
-  /// @brief implicitly convert back to `mfem::Vector` to extract the actual data
+  /// @brief implicitly convert back to mfem::Vector to extract the actual data
   operator const mfem::Vector&() const { return ref; }
 };
 
 /**
  * @brief this function is intended to only be used in combination with
- *   `smith::Functional::operator()`, as a way for the user to express that
+ *   smith::Functional::operator(), as a way for the user to express that
  *   it should both evaluate and differentiate w.r.t. a specific argument (only 1 argument at a time)
  *
  * For example:
diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp
index ac66e814c3..0f2eddb32a 100644
--- a/src/smith/numerics/mfem_trust_region_subspace.cpp
+++ b/src/smith/numerics/mfem_trust_region_subspace.cpp
@@ -451,6 +451,7 @@ TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vec
 #endif
 }
 
+/// @brief report unavailable MFEM subspace solve when MFEM was built without LAPACK.
 TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem::Vector*>&,
                                                    const std::vector<const mfem::Vector*>&, const mfem::Vector& b,
                                                    double, int)
diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp
index 8ace278a61..c44316b637 100644
--- a/src/smith/numerics/steihaug_toint_cg.hpp
+++ b/src/smith/numerics/steihaug_toint_cg.hpp
@@ -104,17 +104,22 @@ struct TrustRegionResults {
   size_t cg_iterations_count = 0;
 };
 
+/// Delegate for operations Steihaug-Toint CG needs from its caller.
 class SteihaugTointDelegate {
  public:
+  /// destructor
   virtual ~SteihaugTointDelegate() = default;
 
+  /// compute four inner products with one global reduction.
   virtual std::array<double, 4> dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
                                            const mfem::Vector& b1, const mfem::Vector& a2, const mfem::Vector& b2,
                                            const mfem::Vector& a3, const mfem::Vector& b3) const = 0;
 
+  /// compute two inner products with one global reduction.
   virtual std::array<double, 2> dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
                                            const mfem::Vector& b1) const = 0;
 
+  /// project z along d to the trust-region boundary using precomputed dot products.
   virtual void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd,
                                           double dd) const = 0;
 };
diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp
index 7cbd1463aa..98dc9486b8 100644
--- a/src/smith/numerics/trust_region_solver.hpp
+++ b/src/smith/numerics/trust_region_solver.hpp
@@ -25,6 +25,7 @@
 
 namespace smith {
 
+/// Exception type for trust-region subspace solve failures.
 class TrustRegionException : public std::exception {
  public:
   /// constructor
@@ -38,6 +39,7 @@ class TrustRegionException : public std::exception {
   std::string msg;
 };
 
+/// Subspace solution, leftmost eigenvectors, leftmost eigenvalues, and predicted model energy change.
 using TrustRegionSubspaceResult =
     std::tuple<mfem::Vector, std::vector<std::shared_ptr<mfem::Vector>>, std::vector<double>, double>;
 

From 03dbbfddf14eddd039770e301a9436432dfe9475 Mon Sep 17 00:00:00 2001
From: Michael Tupek <tupek2@llnl.gov>
Date: Tue, 12 May 2026 10:45:21 -0700
Subject: [PATCH 19/27] Completely remove petsc subspace options, use pure mfem
 one only going forward.  Try to simpify some of the testing and changes.

---
 cmake/thirdparty/FindMFEM.cmake               |  17 -
 src/smith/numerics/CMakeLists.txt             |   2 -
 src/smith/numerics/dense_petsc.hpp            | 380 ------------------
 src/smith/numerics/equation_solver.cpp        | 208 ++--------
 .../numerics/functional/differentiate_wrt.hpp |   8 +-
 .../tests/functional_comparisons.cpp          |  32 --
 .../numerics/mfem_trust_region_subspace.cpp   |   4 -
 .../numerics/petsc_trust_region_subspace.cpp  | 362 -----------------
 src/smith/numerics/solver_config.hpp          |  10 -
 src/smith/numerics/steihaug_toint_cg.cpp      |  29 +-
 src/smith/numerics/steihaug_toint_cg.hpp      |  29 +-
 src/smith/numerics/tests/CMakeLists.txt       |   1 -
 .../numerics/tests/test_steihaug_toint_cg.cpp |  60 +--
 .../tests/test_trust_region_solver_petsc.cpp  | 225 -----------
 src/smith/numerics/trust_region_solver.hpp    |   6 -
 src/smith/physics/functional_weak_form.hpp    |   1 -
 src/smith/physics/solid_mechanics.hpp         |   2 -
 .../physics/tests/shallow_arch_buckling.cpp   |  20 +-
 18 files changed, 83 insertions(+), 1313 deletions(-)
 delete mode 100644 src/smith/numerics/dense_petsc.hpp
 delete mode 100644 src/smith/numerics/petsc_trust_region_subspace.cpp
 delete mode 100644 src/smith/numerics/tests/test_trust_region_solver_petsc.cpp

diff --git a/cmake/thirdparty/FindMFEM.cmake b/cmake/thirdparty/FindMFEM.cmake
index bb7d37037d..cd7d45c8af 100644
--- a/cmake/thirdparty/FindMFEM.cmake
+++ b/cmake/thirdparty/FindMFEM.cmake
@@ -107,17 +107,6 @@ else()
         # filter out items containing "Xlinker"
         set(_mfem_tpl_list ${mfem_tpl_lnk_flags})
         separate_arguments(_mfem_tpl_list)
-        foreach(_link_flag ${_mfem_tpl_list})
-            if(_link_flag MATCHES "^-L(.+)")
-                set(_link_dir "${CMAKE_MATCH_1}")
-                if(EXISTS "${_link_dir}/liblapack.dylib" OR EXISTS "${_link_dir}/libblas.dylib")
-                    list(APPEND _mfem_tpl_link_dirs "${_link_dir}")
-                endif()
-            endif()
-        endforeach()
-        if(_mfem_tpl_link_dirs)
-            list(REMOVE_DUPLICATES _mfem_tpl_link_dirs)
-        endif()
         list(FILTER _mfem_tpl_list EXCLUDE REGEX Xlinker)
         # On Apple, -Wl,-rpath,... entries duplicate CMake's own rpath management
         # (CMAKE_INSTALL_RPATH_USE_LINK_PATH) and cause ld "duplicate -rpath" warnings
@@ -149,12 +138,6 @@ else()
         TREAT_INCLUDES_AS_SYSTEM ON
         EXPORTABLE    ON)
 
-    if(APPLE AND _mfem_tpl_link_dirs)
-        foreach(_link_dir ${_mfem_tpl_link_dirs})
-            target_link_options(mfem INTERFACE "LINKER:-rpath,${_link_dir}")
-        endforeach()
-    endif()
-
     install(TARGETS          mfem
         EXPORT               smith-targets
         DESTINATION          lib
diff --git a/src/smith/numerics/CMakeLists.txt b/src/smith/numerics/CMakeLists.txt
index f3031866ef..500453a808 100644
--- a/src/smith/numerics/CMakeLists.txt
+++ b/src/smith/numerics/CMakeLists.txt
@@ -14,14 +14,12 @@ set(numerics_headers
     stdfunction_operator.hpp
     petsc_solvers.hpp
     trust_region_solver.hpp
-    dense_petsc.hpp
     block_preconditioner.hpp
     )
 
 set(numerics_sources
     equation_solver.cpp
     steihaug_toint_cg.cpp
-    petsc_trust_region_subspace.cpp
     mfem_trust_region_subspace.cpp
     odes.cpp
     petsc_solvers.cpp
diff --git a/src/smith/numerics/dense_petsc.hpp b/src/smith/numerics/dense_petsc.hpp
deleted file mode 100644
index 516b3ee01d..0000000000
--- a/src/smith/numerics/dense_petsc.hpp
+++ /dev/null
@@ -1,380 +0,0 @@
-// Copyright (c) Lawrence Livermore National Security, LLC and
-// other Smith Project Developers. See the top-level LICENSE file for
-// details.
-//
-// SPDX-License-Identifier: (BSD-3-Clause)
-
-#pragma once
-
-#ifdef SMITH_USE_SLEPC
-
-#include <slepcsvd.h>
-#include <slepcbv.h>
-#include <vector>
-
-struct DenseVec;
-
-/// Dense Matrix class which wraps petsc matrix for the case of a SeqDense matrix (on 1 processor)
-struct DenseMat {
-  /// @brief copy constructor
-  /// @param a matrix
-  DenseMat(const Mat& a) : A(a) {}
-
-  /// @brief constructor
-  /// @param a matrix
-  DenseMat(const DenseMat& a)
-  {
-    MatDuplicate(a.A, MAT_COPY_VALUES, &A);
-    MatCopy(a.A, A, SAME_NONZERO_PATTERN);
-    MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
-    MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
-  }
-
-  /// @brief destructor
-  ~DenseMat() { MatDestroy(&A); }
-
-  /// @brief size
-  auto size() const
-  {
-    int isize;
-    int jsize;
-    MatGetSize(A, &isize, &jsize);
-    return std::make_pair(isize, jsize);
-  }
-
-  /// @brief index into
-  double operator()(int i, int j) const
-  {
-    double val;
-    MatGetValue(A, i, j, &val);
-    return val;
-  }
-
-  /// @brief set value
-  void setValue(int i, int j, double val) { MatSetValues(A, 1, &i, 1, &j, &val, INSERT_VALUES); }
-
-  /// @brief matrix-vector multiply
-  DenseVec operator*(const DenseVec& v) const;
-
-  /// @brief solve
-  DenseVec solve(const DenseVec& v) const;
-
-  /// @brief multiply this by P transpose on left and P on the right
-  DenseMat PtAP(const DenseMat& P) const;
-
-  /// @brief print utility
-  void print(std::string first = "") const
-  {
-    if (first.size()) {
-      std::cout << first << ": ";
-    }
-    MatView(A, PETSC_VIEWER_STDOUT_SELF);
-  }
-
-  /// @brief check for nans
-  bool hasNan() const
-  {
-    auto [rows, cols] = size();
-    for (int i = 0; i < rows; ++i) {
-      for (int j = 0; j < cols; ++j) {
-        double val = (*this)(i, j);
-        if (val != val) return true;
-      }
-    }
-    return false;
-  }
-
-  /// @brief  reassemble petsc dense matrix after values have been modified
-  void reassemble()
-  {
-    MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
-    MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
-  }
-
-  /// petsc matrix
-  Mat A;
-};
-
-/// matrix inverse
-/// @param a matrix
-DenseMat inverse(const DenseMat& a)
-{
-  Mat inv;
-  MatDuplicate(a.A, MAT_COPY_VALUES, &inv);
-  MatSeqDenseInvert(inv);
-  return inv;
-}
-
-/// compute the symmetric part
-/// @param a matrix
-DenseMat sym(const DenseMat& a)
-{
-  DenseMat b = a;
-  auto [rows, cols] = b.size();
-  SLIC_ERROR_IF(rows != cols, "Calling sym on a non-square DenseMat");
-
-  for (int i = 0; i < rows; ++i) {
-    for (int j = 0; j < i; ++j) {
-      auto val = 0.5 * a(i, j) + 0.5 * a(j, i);
-      b.setValue(i, j, val);
-      b.setValue(j, i, val);
-    }
-  }
-
-  b.reassemble();
-
-  return b;
-}
-
-/// Dense Vector class which wraps petsc vector for the case of a SeqDense vector (on 1 processor)
-struct DenseVec {
-  /// @brief constructor
-  DenseVec(const Vec& vin) : v(vin) {}
-
-  /// @brief constructor
-  DenseVec(const DenseVec& vin)
-  {
-    VecDuplicate(vin.v, &v);
-    VecCopy(vin.v, v);
-  }
-
-  /// @brief constructor from size
-  DenseVec(size_t size) { VecCreateSeq(PETSC_COMM_SELF, static_cast<int>(size), &v); }
-
-  /// @brief constructor from size
-  DenseVec(int size) { VecCreateSeq(PETSC_COMM_SELF, size, &v); }
-
-  /// @brief constructor standard vector
-  DenseVec(const std::vector<double> vin)
-  {
-    const auto sz = vin.size();
-    std::vector<int> allints(sz);
-    for (size_t i = 0; i < sz; ++i) {
-      allints[i] = static_cast<int>(i);
-    }
-    int sz_int = static_cast<int>(sz);
-    VecCreateSeq(PETSC_COMM_SELF, sz_int, &v);
-    VecSetValues(v, sz_int, &allints[0], &vin[0], INSERT_VALUES);
-  }
-
-  /// @brief assignment
-  DenseVec& operator=(const DenseVec& vin)
-  {
-    VecCopy(vin.v, v);
-    return *this;
-  }
-
-  /// @brief assignment from scalar
-  DenseVec& operator=(const double val)
-  {
-    VecSet(v, val);
-    return *this;
-  }
-
-  /// @brief destructor
-  ~DenseVec()
-  {
-    if (v) VecDestroy(&v);
-  }
-
-  /// @brief negate
-  DenseVec operator-() const
-  {
-    Vec minus;
-    VecDuplicate(v, &minus);
-    VecCopy(v, minus);
-    VecScale(minus, -1.0);
-    return minus;
-  }
-
-  /// @brief scale
-  DenseVec& operator*=(double scale)
-  {
-    VecScale(v, scale);
-    return *this;
-  }
-
-  /// @brief size
-  int size() const
-  {
-    int isize;
-    VecGetSize(v, &isize);
-    return isize;
-  }
-
-  /// @brief index into
-  double operator[](int i) const
-  {
-    double val;
-    VecGetValues(v, 1, &i, &val);
-    return val;
-  }
-
-  /// @brief index into
-  double operator[](size_t i) const { return (*this)[int(i)]; }
-
-  /// @brief set value
-  void setValue(int i, double val) { VecSetValues(v, 1, &i, &val, INSERT_VALUES); }
-
-  /// @brief set value
-  void setValue(size_t i, double val) { setValue(int(i), val); }
-
-  /// @brief add scaled vector
-  void add(double val, const DenseVec& w) { VecAXPY(v, val, w.v); }
-
-  /// @brief convert to standard vector
-  std::vector<double> getValues() const
-  {
-    size_t sz = static_cast<size_t>(size());
-    std::vector<double> vout(sz);
-    std::vector<int> allints(sz);
-    for (size_t i = 0; i < sz; ++i) {
-      allints[i] = static_cast<int>(i);
-    }
-    int sz_int = static_cast<int>(sz);
-    VecGetValues(v, sz_int, &allints[0], &vout[0]);
-    return vout;
-  }
-
-  /// @brief print utility
-  void print(std::string first = "") const
-  {
-    if (first.size()) {
-      std::cout << first << ": ";
-    }
-    VecView(v, PETSC_VIEWER_STDOUT_SELF);
-  }
-
-  /// petsc vector
-  Vec v;
-};
-
-/// @brief matrix vector multiply
-DenseVec DenseMat::operator*(const DenseVec& v) const
-{
-  Vec out;
-  auto [rows, cols] = size();
-  SLIC_ERROR_IF(cols != v.size(), "Column size of dense matrix and length of multiplied vector do not match");
-  VecCreateSeq(PETSC_COMM_SELF, rows, &out);
-  MatMult(A, v.v, out);
-  return out;
-}
-
-/// @brief matrix linear solve
-DenseVec DenseMat::solve(const DenseVec& v) const
-{
-  Vec out;
-  VecDuplicate(v.v, &out);
-  MatLUFactor(A, NULL, NULL, NULL);  // not efficient if done a lot
-  MatSolve(A, v.v, out);
-  return out;
-}
-
-/// @brief multiply matrix by P-transpose on left, P on right
-DenseMat DenseMat::PtAP(const DenseMat& P) const
-{
-  Mat pAp;
-  MatPtAP(A, P.A, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pAp);
-  return pAp;
-}
-
-/// @brief vector dot product
-double dot(const DenseVec& a, const DenseVec& b)
-{
-  double d;
-  VecDot(a.v, b.v, &d);
-  return d;
-}
-
-/// @brief add a scalar to a vector
-DenseVec operator+(const DenseVec& a, double b)
-{
-  Vec c;
-  VecDuplicate(a.v, &c);
-  VecSet(c, b);
-  VecAXPY(c, 1.0, a.v);
-  return c;
-}
-
-DenseVec operator+(double b, const DenseVec& a) { return a + b; }
-
-/// @brief component-wise multiplication of vectors
-DenseVec operator*(const DenseVec& a, const DenseVec& b)
-{
-  Vec c;
-  VecDuplicate(a.v, &c);
-  VecPointwiseMult(c, a.v, b.v);
-  return c;
-}
-
-/// @brief component-wise vector divide
-DenseVec operator/(const DenseVec& a, const DenseVec& b)
-{
-  Vec c;
-  VecDuplicate(a.v, &c);
-  VecPointwiseDivide(c, a.v, b.v);
-  return c;
-}
-
-/// @brief component-wise vector absolute value
-DenseVec abs(const DenseVec& a)
-{
-  Vec absa;
-  VecDuplicate(a.v, &absa);
-  VecCopy(a.v, absa);
-  VecAbs(absa);
-  return absa;
-}
-
-/// @brief sum values in a vector
-double sum(const DenseVec& a)
-{
-  double s;
-  VecSum(a.v, &s);
-  return s;
-}
-
-/// @brief l2-norm of vector
-double norm(const DenseVec& a)
-{
-  double n;
-  VecNorm(a.v, NORM_2, &n);
-  return n;
-}
-
-/// @brief computes the eigenvectors and eigenvalues of a dense symmetric matrix
-auto eigh(const DenseMat& Adense)
-{
-  auto [isize, jsize] = Adense.size();
-  SLIC_ERROR_IF(isize != jsize, "Eig must be called for symmetric matrices");
-
-  const Mat& A = Adense.A;
-
-  EPS eps;
-  EPSCreate(PETSC_COMM_SELF, &eps);
-  EPSSetOperators(eps, A, NULL);
-  EPSSetProblemType(eps, EPS_HEP);
-  EPSSetWhichEigenpairs(eps, EPS_SMALLEST_REAL);
-  EPSSetDimensions(eps, isize, PETSC_DETERMINE, PETSC_DETERMINE);
-  EPSSetFromOptions(eps);
-
-  EPSSolve(eps);
-
-  EPSType type;
-  EPSGetType(eps, &type);
-  EPSGetDimensions(eps, &jsize, NULL, NULL);
-
-  DenseVec eigenvalues(isize);
-  std::vector<DenseVec> eigenvectors;
-  for (int i = 0; i < isize; ++i) {
-    eigenvectors.emplace_back(isize);
-    double eigenvalue;
-    EPSGetEigenpair(eps, i, &eigenvalue, PETSC_NULLPTR, eigenvectors[static_cast<size_t>(i)].v, PETSC_NULLPTR);
-    eigenvalues.setValue(i, eigenvalue);
-  }
-
-  EPSDestroy(&eps);
-  return std::make_pair(std::move(eigenvalues), std::move(eigenvectors));
-}
-
-#endif  // SMITH_USE_SLEPC
diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index f5f205c9fe..9d83657269 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -9,7 +9,6 @@
 
 #include <array>
 #include <cstdlib>
-#include <deque>
 #include <functional>
 #include <iomanip>
 #include <iostream>
@@ -263,7 +262,7 @@ void printTrustRegionInfo(double realWork, double modelObjective, size_t cgIters
  * rely on an incremental work approximation: 0.5 (f^n + f^{n+1}) dot (u^{n+1} - u^n).  While less theoretically sound,
  * it appears to be very effective in practice.
  */
-class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
+class TrustRegion : public mfem::NewtonSolver {
  protected:
   /// predicted solution
   mutable mfem::Vector x_pred;
@@ -275,14 +274,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
   mutable std::vector<std::shared_ptr<mfem::Vector>> left_mosts;
   /// the action of the stiffness/hessian (H) on the left most eigenvectors
   mutable std::vector<std::shared_ptr<mfem::Vector>> H_left_mosts;
-  /// accepted TrustRegion steps, newest first
-  mutable std::deque<std::shared_ptr<mfem::Vector>> accepted_step_history;
-  /// initial state for this nonlinear solve, used as an optional history direction
-  mutable mfem::Vector solve_start_x;
-  /// state with the lowest residual norm seen in this nonlinear solve
-  mutable mfem::Vector min_residual_x;
-  /// lowest residual norm seen in this nonlinear solve
-  mutable double min_residual_norm = -1.0;
 
   /// nonlinear solution options
   NonlinearSolverOptions nonlinear_options;
@@ -307,53 +298,43 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
 #endif
 
   /// compute several vector inner products with a single MPI reduction when possible
-  template <typename... Args>
-  std::array<double, sizeof...(Args) / 2> dot_many(const Args&... args) const
+  std::vector<double> dot_many(const std::vector<DotPair>& pairs) const
   {
-    static_assert(sizeof...(Args) % 2 == 0, "dot_many requires an even number of arguments");
-    constexpr size_t num_pairs = sizeof...(Args) / 2;
-    std::array<double, num_pairs> products;
-    products.fill(0.0);
+    std::vector<double> products(pairs.size(), 0.0);
 
     if (dot_oper) {
-      auto tuple_args = std::tie(args...);
-      auto do_dots = [&]<std::size_t... I>(std::index_sequence<I...>) {
-        ((products[I] = Dot(std::get<2 * I>(tuple_args), std::get<2 * I + 1>(tuple_args))), ...);
-      };
-      do_dots(std::make_index_sequence<num_pairs>{});
+      for (size_t i = 0; i < pairs.size(); ++i) {
+        products[i] = Dot(*pairs[i].first, *pairs[i].second);
+      }
       return products;
     }
 
-    auto tuple_args = std::tie(args...);
-    std::array<int, num_pairs> sizes;
-    std::array<const double*, num_pairs> ptr_a;
-    std::array<const double*, num_pairs> ptr_b;
-
-    auto populate_arrays = [&]<std::size_t... I>(std::index_sequence<I...>) {
-      ((
-           sizes[I] = std::get<2 * I>(tuple_args).Size(),
-           [&]() { MFEM_ASSERT(sizes[I] == std::get<2 * I + 1>(tuple_args).Size(), "Incompatible vector sizes."); }(),
-           ptr_a[I] = std::get<2 * I>(tuple_args).GetData(), ptr_b[I] = std::get<2 * I + 1>(tuple_args).GetData()),
-       ...);
-    };
-    populate_arrays(std::make_index_sequence<num_pairs>{});
+    std::vector<int> sizes(pairs.size());
+    std::vector<const double*> ptr_a(pairs.size());
+    std::vector<const double*> ptr_b(pairs.size());
+    for (size_t i = 0; i < pairs.size(); ++i) {
+      sizes[i] = pairs[i].first->Size();
+      MFEM_ASSERT(sizes[i] == pairs[i].second->Size(), "Incompatible vector sizes.");
+      ptr_a[i] = pairs[i].first->GetData();
+      ptr_b[i] = pairs[i].second->GetData();
+    }
 
     bool all_same_size = true;
-    for (size_t i = 1; i < num_pairs; ++i) {
+    for (size_t i = 1; i < pairs.size(); ++i) {
       if (sizes[i] != sizes[0]) {
         all_same_size = false;
         break;
       }
     }
 
-    if (all_same_size && num_pairs > 0) {
+    if (all_same_size && !pairs.empty()) {
       for (int j = 0; j < sizes[0]; ++j) {
-        for (size_t i = 0; i < num_pairs; ++i) {
+        for (size_t i = 0; i < pairs.size(); ++i) {
           products[i] += ptr_a[i][j] * ptr_b[i][j];
         }
       }
     } else {
-      for (size_t i = 0; i < num_pairs; ++i) {
+      for (size_t i = 0; i < pairs.size(); ++i) {
         for (int j = 0; j < sizes[i]; ++j) {
           products[i] += ptr_a[i][j] * ptr_b[i][j];
         }
@@ -363,72 +344,16 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
 #ifdef MFEM_USE_MPI
     const MPI_Comm dot_comm = GetComm();
     if (dot_comm != MPI_COMM_NULL) {
-      std::array<mfem::real_t, num_pairs> global_products;
-      MPI_Allreduce(products.data(), global_products.data(), num_pairs, MFEM_MPI_REAL_T, MPI_SUM, dot_comm);
-      for (size_t i = 0; i < num_pairs; ++i) {
-        products[i] = global_products[i];
-      }
+      std::vector<mfem::real_t> global_products(pairs.size());
+      MPI_Allreduce(products.data(), global_products.data(), static_cast<int>(pairs.size()), MFEM_MPI_REAL_T, MPI_SUM,
+                    dot_comm);
+      products.assign(global_products.begin(), global_products.end());
     }
 #endif
 
     return products;
   }
 
-  /// apply Hessian-vector products for all supplied subspace directions
-  template <typename HessVecFunc>
-  void batchedSubspaceHessVec(HessVecFunc hess_vec_func, const std::vector<const mfem::Vector*>& inputs,
-                              const std::vector<mfem::Vector*>& outputs) const
-  {
-    MFEM_VERIFY(inputs.size() == outputs.size(), "Subspace Hessian-vector batch input/output size mismatch");
-    if (inputs.empty()) {
-      return;
-    }
-
-    for (size_t i = 0; i < inputs.size(); ++i) {
-      hess_vec_func(*inputs[i], *outputs[i]);
-    }
-  }
-
-  /// store accepted steps for optional later subspace enrichment
-  void pushAcceptedStepHistory(const mfem::Vector& step) const
-  {
-    if (nonlinear_options.trust_num_past_steps <= 0) {
-      accepted_step_history.clear();
-      return;
-    }
-
-    accepted_step_history.push_front(std::make_shared<mfem::Vector>(step));
-    const size_t max_size = static_cast<size_t>(nonlinear_options.trust_num_past_steps) + 1;
-    while (accepted_step_history.size() > max_size) {
-      accepted_step_history.pop_back();
-    }
-  }
-
-  /// SteihaugTointDelegate implementation for four inner products.
-  std::array<double, 4> dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
-                                   const mfem::Vector& b1, const mfem::Vector& a2, const mfem::Vector& b2,
-                                   const mfem::Vector& a3, const mfem::Vector& b3) const override
-  {
-    return dot_many(a0, b0, a1, b1, a2, b2, a3, b3);
-  }
-
-  /// SteihaugTointDelegate implementation for two inner products.
-  std::array<double, 2> dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
-                                   const mfem::Vector& b1) const override
-  {
-    return dot_many(a0, b0, a1, b1);
-  }
-
-  /// SteihaugTointDelegate implementation for projecting to the trust-region boundary.
-  void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd,
-                                  double dd) const override
-  {
-    double deltadelta_m_zz = delta * delta - zz;
-    if (deltadelta_m_zz == 0) return;
-    double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd;
-    z.Add(tau, d);
-  }
-
   /// solve the exact trust-region subspace problem with directions ds, and the leftmosts
   template <typename HessVecFunc>
   void solveTheSubspaceProblem([[maybe_unused]] mfem::Vector& z, [[maybe_unused]] const HessVecFunc& hess_vec_func,
@@ -507,7 +432,9 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
   void doglegStep(const mfem::Vector& cp, const mfem::Vector& newtonP, double trSize, mfem::Vector& s) const
   {
     SMITH_MARK_FUNCTION;
-    auto [cc, nn] = dot_many(cp, cp, newtonP, newtonP);
+    const auto dots = dot_many({{&cp, &cp}, {&newtonP, &newtonP}});
+    const double cc = dots[0];
+    const double nn = dots[1];
     double tt = trSize * trSize;
 
     s = 0.0;
@@ -544,7 +471,8 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
                          const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results,
                          double r0_norm_squared) const
   {
-    steihaugTointCG(r0, rCurrent, H, P, settings, trSize, results, r0_norm_squared, *this);
+    auto dot_many_lambda = [this](const std::vector<DotPair>& pairs) { return dot_many(pairs); };
+    steihaugTointCG(r0, rCurrent, H, P, settings, trSize, results, r0_norm_squared, dot_many_lambda);
   }
 
   /// assemble the jacobian
@@ -597,14 +525,8 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
 
     using real_t = mfem::real_t;
 
-    solve_start_x.SetSize(X.Size());
-    solve_start_x = X;
-    min_residual_x.SetSize(X.Size());
-    min_residual_x = X;
-
     real_t norm, norm_goal = 0.0;
     norm = initial_norm = computeResidual(X, r);
-    min_residual_norm = initial_norm;
     if (norm == 0.0) return;
 
     norm_goal = std::max(rel_tol * initial_norm, abs_tol);
@@ -741,7 +663,11 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
               subspace_hess_outputs.push_back(&trResults.H_d_old);
             }
 
-            batchedSubspaceHessVec(hess_vec_func, subspace_hess_inputs, subspace_hess_outputs);
+            MFEM_VERIFY(subspace_hess_inputs.size() == subspace_hess_outputs.size(),
+                        "Subspace Hessian-vector batch input/output size mismatch");
+            for (size_t i = 0; i < subspace_hess_inputs.size(); ++i) {
+              hess_vec_func(*subspace_hess_inputs[i], *subspace_hess_outputs[i]);
+            }
           }
 
           if (!have_computed_H_left_mosts) {
@@ -754,7 +680,11 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
               leftmost_inputs.push_back(left.get());
               leftmost_outputs.push_back(H_left_mosts.back().get());
             }
-            batchedSubspaceHessVec(hess_vec_func, leftmost_inputs, leftmost_outputs);
+            MFEM_VERIFY(leftmost_inputs.size() == leftmost_outputs.size(),
+                        "Subspace Hessian-vector batch input/output size mismatch");
+            for (size_t i = 0; i < leftmost_inputs.size(); ++i) {
+              hess_vec_func(*leftmost_inputs[i], *leftmost_outputs[i]);
+            }
           }
 
           std::vector<const mfem::Vector*> ds{&trResults.z, &trResults.cauchy_point};
@@ -764,65 +694,15 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
             H_ds.push_back(&trResults.H_d_old);
           }
 
-          std::vector<mfem::Vector> H_past_steps;
-          std::vector<const mfem::Vector*> past_step_inputs;
-          std::vector<mfem::Vector*> past_step_outputs;
-          const size_t max_past_steps = static_cast<size_t>(std::max(nonlinear_options.trust_num_past_steps, 0));
-          const size_t num_past_steps =
-              accepted_step_history.size() > 1 ? std::min(max_past_steps, accepted_step_history.size() - 1) : 0;
-          H_past_steps.reserve(num_past_steps);
-          past_step_inputs.reserve(num_past_steps);
-          past_step_outputs.reserve(num_past_steps);
-          for (size_t i = 0; i < num_past_steps; ++i) {
-            const auto& past_step = accepted_step_history[i + 1];
-            H_past_steps.emplace_back(past_step->Size());
-            past_step_inputs.push_back(past_step.get());
-            past_step_outputs.push_back(&H_past_steps.back());
-          }
-          if (!past_step_inputs.empty()) {
-            batchedSubspaceHessVec(hess_vec_func, past_step_inputs, past_step_outputs);
-            for (size_t i = 0; i < past_step_inputs.size(); ++i) {
-              ds.push_back(past_step_inputs[i]);
-              H_ds.push_back(past_step_outputs[i]);
-            }
-          }
-
-          mfem::Vector solve_start_direction;
-          mfem::Vector H_solve_start_direction;
-          if (nonlinear_options.trust_use_solve_start_direction && solve_start_x.Size() == X.Size()) {
-            solve_start_direction.SetSize(X.Size());
-            subtract(solve_start_x, X, solve_start_direction);
-            if (solve_start_direction.Norml2() > 0.0) {
-              H_solve_start_direction.SetSize(X.Size());
-              std::vector<const mfem::Vector*> solve_start_inputs{&solve_start_direction};
-              std::vector<mfem::Vector*> solve_start_outputs{&H_solve_start_direction};
-              batchedSubspaceHessVec(hess_vec_func, solve_start_inputs, solve_start_outputs);
-              ds.push_back(&solve_start_direction);
-              H_ds.push_back(&H_solve_start_direction);
-            }
-          }
-
-          mfem::Vector min_residual_direction;
-          mfem::Vector H_min_residual_direction;
-          if (nonlinear_options.trust_use_min_residual_direction && min_residual_x.Size() == X.Size()) {
-            min_residual_direction.SetSize(X.Size());
-            subtract(min_residual_x, X, min_residual_direction);
-            if (min_residual_direction.Norml2() > 0.0) {
-              H_min_residual_direction.SetSize(X.Size());
-              std::vector<const mfem::Vector*> min_res_inputs{&min_residual_direction};
-              std::vector<mfem::Vector*> min_res_outputs{&H_min_residual_direction};
-              batchedSubspaceHessVec(hess_vec_func, min_res_inputs, min_res_outputs);
-              ds.push_back(&min_residual_direction);
-              H_ds.push_back(&H_min_residual_direction);
-            }
-          }
           solveTheSubspaceProblem(trResults.d, hess_vec_func, ds, H_ds, r, tr_size, num_leftmost, candidate_left_mosts);
         }
 
         static constexpr double roundOffTol = 0.0;  // 1e-14;
 
         hess_vec_func(trResults.d, trResults.H_d);
-        const auto [dHd, rd] = dot_many(trResults.d, trResults.H_d, r, trResults.d);
+        const auto dots = dot_many({{&trResults.d, &trResults.H_d}, {&r, &trResults.d}});
+        const double dHd = dots[0];
+        const double rd = dots[1];
         double modelObjective = rd + 0.5 * dHd - roundOffTol;
 
         add(X, trResults.d, x_pred);
@@ -831,10 +711,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
         double normPred = std::numeric_limits<double>::max();
         try {
           normPred = computeResidual(x_pred, r_pred);
-          if (normPred < min_residual_norm) {
-            min_residual_norm = normPred;
-            min_residual_x = x_pred;
-          }
           double obj1 = 0.5 * (rd + Dot(r_pred, trResults.d)) - roundOffTol;
           realObjective = obj1;
         } catch (const std::exception&) {
@@ -845,7 +721,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
         if (normPred <= norm_goal) {
           trResults.d_old = trResults.d;
           trResults.has_d_old = true;
-          pushAcceptedStepHistory(trResults.d);
           if (!candidate_left_mosts.empty()) {
             left_mosts = std::move(candidate_left_mosts);
           }
@@ -901,7 +776,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate {
         if (willAccept) {
           trResults.d_old = trResults.d;
           trResults.has_d_old = true;
-          pushAcceptedStepHistory(trResults.d);
           if (!candidate_left_mosts.empty()) {
             left_mosts = std::move(candidate_left_mosts);
           }
diff --git a/src/smith/numerics/functional/differentiate_wrt.hpp b/src/smith/numerics/functional/differentiate_wrt.hpp
index a3c71102bc..d72a0bfdd0 100644
--- a/src/smith/numerics/functional/differentiate_wrt.hpp
+++ b/src/smith/numerics/functional/differentiate_wrt.hpp
@@ -15,19 +15,19 @@ template <uint32_t i>
 struct DifferentiateWRT {};
 
 /**
- * @brief this type exists solely as a way to signal to smith::Functional that the function
- * smith::Functional::operator() should differentiate w.r.t. a specific argument
+ * @brief this type exists solely as a way to signal to `smith::Functional` that the function
+ * smith::Functional::operator()` should differentiate w.r.t. a specific argument
  */
 struct differentiate_wrt_this {
   const mfem::Vector& ref;  ///< the actual data wrapped by this type
 
-  /// @brief implicitly convert back to mfem::Vector to extract the actual data
+  /// @brief implicitly convert back to `mfem::Vector` to extract the actual data
   operator const mfem::Vector&() const { return ref; }
 };
 
 /**
  * @brief this function is intended to only be used in combination with
- *   smith::Functional::operator(), as a way for the user to express that
+ *   `smith::Functional::operator()`, as a way for the user to express that
  *   it should both evaluate and differentiate w.r.t. a specific argument (only 1 argument at a time)
  *
  * For example:
diff --git a/src/smith/numerics/functional/tests/functional_comparisons.cpp b/src/smith/numerics/functional/tests/functional_comparisons.cpp
index eab6ca2bb4..a272498955 100644
--- a/src/smith/numerics/functional/tests/functional_comparisons.cpp
+++ b/src/smith/numerics/functional/tests/functional_comparisons.cpp
@@ -5,7 +5,6 @@
 // SPDX-License-Identifier: (BSD-3-Clause)
 
 #include <algorithm>
-#include <chrono>
 #include <cstdlib>
 #include <iostream>
 #include <map>
@@ -33,8 +32,6 @@
 using namespace smith;
 
 int nsamples = 1;  // because mfem doesn't take in unsigned int
-bool run_diagonal_benchmark = false;
-int diagonal_benchmark_samples = 5;
 
 constexpr bool verbose = false;
 std::unique_ptr<mfem::ParMesh> mesh2D;
@@ -462,31 +459,6 @@ TEST(Elasticity, 3DLinear) { functional_test(*mesh3D, H1<1, 3>{}, H1<1, 3>{}, Di
 TEST(Elasticity, 3DQuadratic) { functional_test(*mesh3D, H1<2, 3>{}, H1<2, 3>{}, Dimension<3>{}); }
 TEST(Elasticity, 3DCubic) { functional_test(*mesh3D, H1<3, 3>{}, H1<3, 3>{}, Dimension<3>{}); }
 
-namespace {
-
-template <typename Function>
-double time_on_slowest_rank(Function&& function)
-{
-  auto [num_ranks, rank] = smith::getMPIInfo();
-  (void)rank;
-  if (num_ranks > 1) {
-    MPI_Barrier(MPI_COMM_WORLD);
-  }
-
-  auto start = std::chrono::steady_clock::now();
-  function();
-  auto stop = std::chrono::steady_clock::now();
-
-  double elapsed = std::chrono::duration_cast<std::chrono::duration<double>>(stop - start).count();
-  double max_elapsed = elapsed;
-  if (num_ranks > 1) {
-    MPI_Allreduce(&elapsed, &max_elapsed, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
-  }
-  return max_elapsed;
-}
-
-}  // namespace
-
 // TODO: reenable these once hcurl implements of simplex elements is finished
 // TEST(Hcurl, 2DLinear) { functional_test(*mesh2D, Hcurl<1>{}, Hcurl<1>{}, Dimension<2>{}); }
 // TEST(Hcurl, 2DQuadratic) { functional_test(*mesh2D, Hcurl<2>{}, Hcurl<2>{}, Dimension<2>{}); }
@@ -509,10 +481,6 @@ int main(int argc, char* argv[])
   args.AddOption(&serial_refinement, "-r", "--ref", "");
   args.AddOption(&parallel_refinement, "-pr", "--pref", "");
   args.AddOption(&nsamples, "-n", "--n-samples", "Samples per test");
-  args.AddOption(&run_diagonal_benchmark, "-rdb", "--run-diagonal-benchmark", "-sdb", "--skip-diagonal-benchmark",
-                 "Run direct diagonal vs sparse assemble+GetDiag timing benchmark.");
-  args.AddOption(&diagonal_benchmark_samples, "-dbs", "--diagonal-benchmark-samples",
-                 "Samples for the diagonal assembly benchmark.");
 
   args.Parse();
   if (!args.Good()) {
diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp
index 0f2eddb32a..9edb69756e 100644
--- a/src/smith/numerics/mfem_trust_region_subspace.cpp
+++ b/src/smith/numerics/mfem_trust_region_subspace.cpp
@@ -443,12 +443,8 @@ TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vec
                                                const std::vector<const mfem::Vector*>& A_directions,
                                                const mfem::Vector& b, double delta, int num_leftmost)
 {
-#if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE)
-  return solveSubspaceProblemPetsc(directions, A_directions, b, delta, num_leftmost);
-#else
   throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support.");
   return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0.0);
-#endif
 }
 
 /// @brief report unavailable MFEM subspace solve when MFEM was built without LAPACK.
diff --git a/src/smith/numerics/petsc_trust_region_subspace.cpp b/src/smith/numerics/petsc_trust_region_subspace.cpp
deleted file mode 100644
index 6883f2571c..0000000000
--- a/src/smith/numerics/petsc_trust_region_subspace.cpp
+++ /dev/null
@@ -1,362 +0,0 @@
-// Copyright (c) Lawrence Livermore National Security, LLC and
-// other Smith Project Developers. See the top-level LICENSE file for
-// details.
-//
-// SPDX-License-Identifier: (BSD-3-Clause)
-
-#include "smith/numerics/trust_region_solver.hpp"
-
-#if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE)
-
-#include <iostream>
-
-#include "smith/infrastructure/profiling.hpp"
-#include "smith/numerics/dense_petsc.hpp"
-
-namespace smith {
-namespace {
-
-/// @brief struct which aids in moving between mfem::Vector and petsc BV
-struct BasisVectors {
-  BasisVectors(const mfem::Vector& state) : local_rows(state.Size()), global_rows(globalSize(state, PETSC_COMM_WORLD))
-  {
-    VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &v);
-
-    PetscInt iStart, iEnd;
-    VecGetOwnershipRange(v, &iStart, &iEnd);
-
-    col_indices.reserve(size_t(local_rows));
-    for (int i = iStart; i < iEnd; ++i) {
-      col_indices.push_back(i);
-    }
-  }
-
-  ~BasisVectors() { VecDestroy(&v); }
-
-  BV constructBases(const std::vector<const mfem::Vector*>& states) const
-  {
-    size_t num_cols = states.size();
-    BV Q;
-    BVCreate(PETSC_COMM_SELF, &Q);
-    BVSetType(Q, BVVECS);
-    BVSetSizesFromVec(Q, v, static_cast<int>(num_cols));
-    for (size_t c = 0; c < num_cols; ++c) {
-      VecSetValues(v, local_rows, &col_indices[0], &(*states[c])[0], INSERT_VALUES);
-      VecAssemblyBegin(v);
-      VecAssemblyEnd(v);
-      int c_int = static_cast<int>(c);
-      BVInsertVec(Q, c_int, v);
-    }
-    return Q;
-  }
-
- private:
-  const int local_rows;
-  const int global_rows;
-
-  std::vector<int> col_indices;
-  Vec v;
-};
-
-Vec petscVec(const mfem::Vector& state)
-{
-  const int local_rows = state.Size();
-  const int global_rows = globalSize(state, PETSC_COMM_WORLD);
-
-  Vec v;
-  VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &v);
-
-  PetscInt iStart, iEnd;
-  VecGetOwnershipRange(v, &iStart, &iEnd);
-
-  std::vector<int> col_indices;
-  col_indices.reserve(static_cast<size_t>(local_rows));
-  for (int i = iStart; i < iEnd; ++i) {
-    col_indices.push_back(i);
-  }
-
-  VecSetValues(v, local_rows, &col_indices[0], &state[0], INSERT_VALUES);
-
-  VecAssemblyBegin(v);
-  VecAssemblyEnd(v);
-
-  return v;
-}
-
-void copy(const Vec& v, mfem::Vector& s)
-{
-  const int local_rows = s.Size();
-  PetscInt iStart, iEnd;
-  VecGetOwnershipRange(v, &iStart, &iEnd);
-
-  SLIC_ERROR_IF(local_rows != iEnd - iStart,
-                "Inconsistency between local t-dof vector size and petsc start and end indices");
-
-  std::vector<int> col_indices;
-  col_indices.reserve(static_cast<size_t>(local_rows));
-  for (int i = iStart; i < iEnd; ++i) {
-    col_indices.push_back(i);
-  }
-
-  VecGetValues(v, local_rows, &col_indices[0], &s[0]);
-}
-
-Mat dot(const std::vector<const mfem::Vector*>& s, const std::vector<const mfem::Vector*>& As)
-{
-  SLIC_ERROR_IF(s.size() != As.size(),
-                "Search directions and their linear operator result must have same number of columns");
-  size_t num_cols = s.size();
-  int num_cols_int = static_cast<int>(num_cols);
-  Mat sAs;
-  MatCreateSeqDense(PETSC_COMM_SELF, num_cols_int, num_cols_int, NULL, &sAs);
-  for (size_t i = 0; i < num_cols; ++i) {
-    for (size_t j = 0; j < num_cols; ++j) {
-      MatSetValue(sAs, static_cast<int>(i), static_cast<int>(j), mfem::InnerProduct(PETSC_COMM_WORLD, *s[i], *As[j]),
-                  INSERT_VALUES);
-    }
-  }
-  MatAssemblyBegin(sAs, MAT_FINAL_ASSEMBLY);
-  MatAssemblyEnd(sAs, MAT_FINAL_ASSEMBLY);
-  return sAs;
-}
-
-Vec dot(const std::vector<const mfem::Vector*>& s, const mfem::Vector& b)
-{
-  size_t num_cols = s.size();
-  Vec sb;
-  VecCreateSeq(PETSC_COMM_SELF, static_cast<int>(num_cols), &sb);
-  for (size_t i = 0; i < num_cols; ++i) {
-    VecSetValue(sb, static_cast<int>(i), mfem::InnerProduct(PETSC_COMM_WORLD, *s[i], b), INSERT_VALUES);
-  }
-  return sb;
-}
-
-auto qr(const std::vector<const mfem::Vector*>& states)
-{
-  BasisVectors bvs(*states[0]);
-  BV Q = bvs.constructBases(states);
-
-  Mat R;
-  int num_cols = static_cast<int>(states.size());
-  MatCreateSeqDense(PETSC_COMM_SELF, num_cols, num_cols, NULL, &R);
-  auto error = BVOrthogonalize(Q, R);
-
-  if (error) throw TrustRegionException("BVOrthogonalize failed.");
-
-  return std::make_pair(Q, DenseMat(R));
-}
-
-double quadraticEnergy(const DenseMat& A, const DenseVec& b, const DenseVec& x)
-{
-  DenseVec Ax = A * x;
-  double xAx = dot(x, Ax);
-  double xb = dot(x, b);
-  return 0.5 * xAx - xb;
-}
-
-double pnorm_squared(const DenseVec& bvv, const DenseVec& sig)
-{
-  auto bvv_div_sig_squared = bvv / (sig * sig);
-  return sum(bvv_div_sig_squared);
-}
-
-double qnorm_squared(const DenseVec& bvv, const DenseVec& sig)
-{
-  auto bvv_div_sig_cubed = bvv / (sig * sig * sig);
-  return sum(bvv_div_sig_cubed);
-}
-
-auto exactTrustRegionSolve(DenseMat A, const DenseVec& b, double delta, int num_leftmost)
-{
-  auto [isize, jsize] = A.size();
-  auto isize2 = b.size();
-  SLIC_ERROR_IF(isize != jsize, "Exact trust region solver requires square matrices");
-  SLIC_ERROR_IF(isize != isize2,
-                "The right hand size for exact trust region solve must be consistent with the input matrix size");
-
-  auto [sigs, V] = eigh(A);
-  std::vector<DenseVec> leftmosts;
-  std::vector<double> minsigs;
-  size_t num_leftmost_possible(size_t(std::min(num_leftmost, isize)));
-  for (size_t i = 0; i < num_leftmost_possible; ++i) {
-    leftmosts.emplace_back(V[i]);
-    minsigs.emplace_back(sigs[i]);
-  }
-
-  const auto& leftMost = V[0];
-  double minSig = sigs[0];
-
-  DenseVec bv(isize);
-  for (size_t i = 0; i < size_t(isize); ++i) {
-    bv.setValue(i, dot(V[i], b));
-  }
-
-  DenseVec bvOverSigs = bv / sigs;
-  double sigScale = sum(abs(sigs)) / isize;
-  double eps = 1e-12 * sigScale;
-
-  if ((minSig >= eps) && (norm(bvOverSigs) <= delta)) {
-    return std::make_tuple(A.solve(b), leftmosts, minsigs, true);
-  }
-
-  double lam = minSig < eps ? -minSig + eps : 0.0;
-
-  DenseVec sigsPlusLam = sigs + lam;
-
-  bvOverSigs = bv / sigsPlusLam;
-
-  if ((minSig < eps) && (norm(bvOverSigs) < delta)) {
-    DenseVec p(isize);
-    p = 0.0;
-    for (int i = 0; i < isize; ++i) {
-      p.add(bv[i], V[size_t(i)]);
-    }
-
-    const auto& z = leftMost;
-    double pz = dot(p, z);
-    double pp = dot(p, p);
-    double ddmpp = std::max(delta * delta - pp, 0.0);
-
-    double tau1 = -pz + std::sqrt(pz * pz + ddmpp);
-    double tau2 = -pz - std::sqrt(pz * pz + ddmpp);
-
-    DenseVec x1(p);
-    DenseVec x2(p);
-    x1.add(tau1, z);
-    x2.add(tau2, z);
-
-    double e1 = quadraticEnergy(A, b, x1);
-    double e2 = quadraticEnergy(A, b, x2);
-
-    DenseVec x = e1 < e2 ? x1 : x2;
-
-    return std::make_tuple(x, leftmosts, minsigs, true);
-  }
-  DenseVec bvbv = bv * bv;
-  sigsPlusLam = sigs + lam;
-
-  double pNormSq = pnorm_squared(bvbv, sigsPlusLam);
-  double pNorm = std::sqrt(pNormSq);
-  double bError = (pNorm - delta) / delta;
-
-  size_t iters = 0;
-  size_t maxIters = 30;
-  while ((std::abs(bError) > 1e-9) && (iters++ < maxIters)) {
-    double qNormSq = qnorm_squared(bvbv, sigsPlusLam);
-    lam += (pNormSq / qNormSq) * bError;
-    sigsPlusLam = sigs + lam;
-    pNormSq = pnorm_squared(bvbv, sigsPlusLam);
-    pNorm = std::sqrt(pNormSq);
-    bError = (pNorm - delta) / delta;
-  }
-
-  bool success = true;
-  if (iters >= maxIters) {
-    success = false;
-  }
-
-  bvOverSigs = bv / sigsPlusLam;
-
-  DenseVec x(isize);
-  x = 0.0;
-  for (int i = 0; i < isize; ++i) {
-    x.add(bvOverSigs[i], V[size_t(i)]);
-  }
-
-  double e1 = quadraticEnergy(A, b, x);
-  double e2 = quadraticEnergy(A, b, -x);
-
-  if (e2 < e1) {
-    x *= -delta / norm(x);
-  } else {
-    x *= delta / norm(x);
-  }
-
-  return std::make_tuple(x, leftmosts, minsigs, success);
-}
-
-std::vector<const mfem::Vector*> remove_at(const std::vector<const mfem::Vector*>& a, size_t j)
-{
-  std::vector<const mfem::Vector*> b;
-  for (size_t i = 0; i < a.size(); ++i) {
-    if (i != j) {
-      b.emplace_back(a[i]);
-    }
-  }
-  return b;
-}
-
-}  // namespace
-
-TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vector<const mfem::Vector*>& states,
-                                                    const std::vector<const mfem::Vector*>& Astates,
-                                                    const mfem::Vector& b, double delta, int num_leftmost)
-{
-  SMITH_MARK_FUNCTION;
-  DenseMat sAs1 = dot(states, Astates);
-  DenseMat sAs = sym(sAs1);
-
-  if (sAs.hasNan()) {
-    throw TrustRegionException("States in subspace solve contain NaNs.");
-  }
-
-  auto [Q_parallel, R] = qr(states);
-
-  if (R.hasNan()) {
-    throw TrustRegionException("R from qr returning with a NaN.");
-  }
-
-  auto [rows, cols] = R.size();
-  SLIC_ERROR_IF(rows != cols, "R matrix is not square in subspace problem solve\n");
-
-  double trace_mag = 0.0;
-  for (int i = 0; i < rows; ++i) {
-    trace_mag += std::abs(R(i, i));
-  }
-
-  for (int i = 0; i < rows; ++i) {
-    if (R(i, i) < 1e-9 * trace_mag) {
-      auto statesNew = remove_at(states, size_t(i));
-      auto AstatesNew = remove_at(Astates, size_t(i));
-      return solveSubspaceProblemPetsc(statesNew, AstatesNew, b, delta, num_leftmost);
-    }
-  }
-
-  auto Rinv = inverse(R);
-  DenseMat pAp = sAs.PtAP(Rinv);
-
-  Vec b_parallel = petscVec(b);
-  std::vector<double> pb_vec(states.size());
-  BVDotVec(Q_parallel, b_parallel, &pb_vec[0]);
-  DenseVec pb(pb_vec);
-
-  auto [reduced_x, leftvecs, leftvals, success] = exactTrustRegionSolve(pAp, pb, delta, num_leftmost);
-  (void)success;
-
-  double energy = quadraticEnergy(pAp, pb, reduced_x);
-
-  Vec x_parallel;
-  VecDuplicate(b_parallel, &x_parallel);
-
-  std::vector<double> reduced_x_vec = reduced_x.getValues();
-  BVMultVec(Q_parallel, 1.0, 0.0, x_parallel, &reduced_x_vec[0]);
-  mfem::Vector sol(b);
-  copy(x_parallel, sol);
-
-  std::vector<std::shared_ptr<mfem::Vector>> leftmosts;
-  for (size_t i = 0; i < leftvecs.size(); ++i) {
-    auto reduced_leftvec = leftvecs[i].getValues();
-    BVMultVec(Q_parallel, 1.0, 0.0, x_parallel, &reduced_leftvec[0]);
-    leftmosts.emplace_back(std::make_shared<mfem::Vector>(b));
-    copy(x_parallel, *leftmosts[i]);
-  }
-
-  BVDestroy(&Q_parallel);
-  VecDestroy(&b_parallel);
-  VecDestroy(&x_parallel);
-  return std::make_tuple(sol, leftmosts, leftvals, energy);
-}
-
-}  // namespace smith
-
-#endif  // SMITH_USE_SLEPC
diff --git a/src/smith/numerics/solver_config.hpp b/src/smith/numerics/solver_config.hpp
index dc031c4d85..aebf795305 100644
--- a/src/smith/numerics/solver_config.hpp
+++ b/src/smith/numerics/solver_config.hpp
@@ -467,16 +467,6 @@ struct NonlinearSolverOptions {
   /// Number of extra leftmost eigenvector to be stored between solves
   int num_leftmost = 1;
 
-  /// Number of additional older accepted TrustRegion steps to include in subspace solves.
-  int trust_num_past_steps = 0;
-
-  /// Include the displacement from current nonlinear-solve state back to the nonlinear-solve initial state.
-  bool trust_use_solve_start_direction = false;
-
-  /// Include the displacement from current nonlinear-solve state to the state with the minimum residual seen so far in
-  /// this nonlinear solve.
-  bool trust_use_min_residual_direction = false;
-
   /// Should the gradient be converted to a monolithic matrix
   bool force_monolithic = false;
 };
diff --git a/src/smith/numerics/steihaug_toint_cg.cpp b/src/smith/numerics/steihaug_toint_cg.cpp
index 546fcd81e5..4964d3a641 100644
--- a/src/smith/numerics/steihaug_toint_cg.cpp
+++ b/src/smith/numerics/steihaug_toint_cg.cpp
@@ -10,22 +10,19 @@ namespace smith {
 
 namespace {
 
-void smith_add(const mfem::Vector& a, double b, const mfem::Vector& c, mfem::Vector& out)
+void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, double dd)
 {
-  if (out.GetData() == c.GetData()) {
-    out = a;
-    out.Add(b, c);
-  } else {
-    out = a;
-    out.Add(b, c);
-  }
+  const double deltadelta_m_zz = delta * delta - zz;
+  if (deltadelta_m_zz <= 0.0) return;
+  const double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd;
+  z.Add(tau, d);
 }
 
 }  // namespace
 
 void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P,
                      const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results,
-                     double r0_norm_squared, const SteihaugTointDelegate& delegate)
+                     double r0_norm_squared, const DotManyFunction& dot_many)
 {
   // minimize r0@z + 0.5*z@J@z
   results.interior_status = TrustRegionResults::Status::Interior;
@@ -58,13 +55,12 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem:
   double zz = 0.;
 
   // rPr = dot(rCurrent, Pr)
-  auto rPr_arr = delegate.dot_many_2(rCurrent, Pr, rCurrent, Pr);  // We only need the first
-  double rPr = rPr_arr[0];
+  double rPr = dot_many({{&rCurrent, &Pr}, {&rCurrent, &Pr}})[0];
 
   for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) {
     H.Mult(d, Hd);
 
-    auto dots = delegate.dot_many_4(d, rCurrent, d, Hd, z, d, d, d);
+    auto dots = dot_many({{&d, &rCurrent}, {&d, &Hd}, {&z, &d}, {&d, &d}});
     double descent_check = dots[0];
     double curvature = dots[1];
     double zd = dots[2];
@@ -84,7 +80,7 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem:
 
     const bool go_to_boundary = curvature <= 0 || zzNp1 >= trSize * trSize;
     if (go_to_boundary) {
-      delegate.projectToBoundaryWithCoefs(z, d, trSize, zz, zd, dd);
+      projectToBoundaryWithCoefs(z, d, trSize, zz, zd, dd);
       if (curvature <= 0) {
         results.interior_status = TrustRegionResults::Status::NegativeCurvature;
       } else {
@@ -94,14 +90,15 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem:
     }
 
     auto& zPred = Pr;
-    smith_add(z, alphaCg, d, zPred);
+    zPred = z;
+    zPred.Add(alphaCg, d);
     z = zPred;
 
     if (results.interior_status == TrustRegionResults::Status::NonDescentDirection) {
       return;
     }
 
-    smith_add(rCurrent, alphaCg, Hd, rCurrent);
+    rCurrent.Add(alphaCg, Hd);
 
     if (P) {
       P->Mult(rCurrent, Pr);
@@ -109,7 +106,7 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem:
       Pr = rCurrent;
     }
 
-    auto dots2 = delegate.dot_many_2(rCurrent, Pr, rCurrent, rCurrent);
+    auto dots2 = dot_many({{&rCurrent, &Pr}, {&rCurrent, &rCurrent}});
     double rPrNp1 = dots2[0];
     double r_current_norm_squared = dots2[1];
 
diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp
index c44316b637..d519590920 100644
--- a/src/smith/numerics/steihaug_toint_cg.hpp
+++ b/src/smith/numerics/steihaug_toint_cg.hpp
@@ -6,8 +6,12 @@
 
 #pragma once
 
-#include "mfem.hpp"
 #include <array>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "mfem.hpp"
 
 namespace smith {
 
@@ -104,25 +108,8 @@ struct TrustRegionResults {
   size_t cg_iterations_count = 0;
 };
 
-/// Delegate for operations Steihaug-Toint CG needs from its caller.
-class SteihaugTointDelegate {
- public:
-  /// destructor
-  virtual ~SteihaugTointDelegate() = default;
-
-  /// compute four inner products with one global reduction.
-  virtual std::array<double, 4> dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
-                                           const mfem::Vector& b1, const mfem::Vector& a2, const mfem::Vector& b2,
-                                           const mfem::Vector& a3, const mfem::Vector& b3) const = 0;
-
-  /// compute two inner products with one global reduction.
-  virtual std::array<double, 2> dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
-                                           const mfem::Vector& b1) const = 0;
-
-  /// project z along d to the trust-region boundary using precomputed dot products.
-  virtual void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd,
-                                          double dd) const = 0;
-};
+using DotPair = std::pair<const mfem::Vector*, const mfem::Vector*>;
+using DotManyFunction = std::function<std::vector<double>(const std::vector<DotPair>&)>;
 
 /**
  * @brief Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner
@@ -133,6 +120,6 @@ class SteihaugTointDelegate {
  */
 void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P,
                      const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results,
-                     double r0_norm_squared, const SteihaugTointDelegate& delegate);
+                     double r0_norm_squared, const DotManyFunction& dot_many);
 
 }  // namespace smith
diff --git a/src/smith/numerics/tests/CMakeLists.txt b/src/smith/numerics/tests/CMakeLists.txt
index 617a0a4f11..aa2ea5e011 100644
--- a/src/smith/numerics/tests/CMakeLists.txt
+++ b/src/smith/numerics/tests/CMakeLists.txt
@@ -32,7 +32,6 @@ if(PETSC_FOUND)
     if(SLEPC_FOUND)
         set(slepc_solver_tests
             test_eigensolver.cpp
-            test_trust_region_solver_petsc.cpp
             )
         smith_add_tests(SOURCES       ${slepc_solver_tests}
                         DEPENDS_ON    ${numerics_test_dependencies}
diff --git a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp
index bd48fcbba4..03c212aa0d 100644
--- a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp
+++ b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp
@@ -9,44 +9,15 @@
 
 namespace {
 
-class TestDelegate : public smith::SteihaugTointDelegate {
- public:
-  std::array<double, 4> dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
-                                   const mfem::Vector& b1, const mfem::Vector& a2, const mfem::Vector& b2,
-                                   const mfem::Vector& a3, const mfem::Vector& b3) const override
-  {
-    return {a0 * b0, a1 * b1, a2 * b2, a3 * b3};
-  }
-
-  std::array<double, 2> dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1,
-                                   const mfem::Vector& b1) const override
-  {
-    return {a0 * b0, a1 * b1};
-  }
-
-  void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd,
-                                  double dd) const override
-  {
-    double deltadelta_m_zz = delta * delta - zz;
-    if (deltadelta_m_zz <= 0) return;
-    double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd;
-    z.Add(tau, d);
-  }
-};
-
-class DiagonalOperator : public mfem::Operator {
- public:
-  DiagonalOperator(const mfem::Vector& diag) : mfem::Operator(diag.Size()), diag_(diag) {}
-  void Mult(const mfem::Vector& x, mfem::Vector& y) const override
-  {
-    for (int i = 0; i < height; ++i) {
-      y[i] = diag_[i] * x[i];
-    }
+std::vector<double> dot_many(const std::vector<smith::DotPair>& pairs)
+{
+  std::vector<double> out;
+  out.reserve(pairs.size());
+  for (const auto& [a, b] : pairs) {
+    out.push_back((*a) * (*b));
   }
-
- private:
-  const mfem::Vector& diag_;
-};
+  return out;
+}
 
 }  // namespace
 
@@ -56,7 +27,7 @@ TEST(SteihaugTointCG, SolvesSPDInsideBoundary)
   mfem::Vector diag(size);
   diag[0] = 2.0;
   diag[1] = 4.0;
-  DiagonalOperator H(diag);
+  mfem::SparseMatrix H(diag);
 
   mfem::Vector r0(size);
   r0[0] = 1.0;
@@ -70,9 +41,8 @@ TEST(SteihaugTointCG, SolvesSPDInsideBoundary)
   smith::TrustRegionResults results(size);
 
   mfem::Vector rCurrent(size);
-  TestDelegate delegate;
 
-  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate);
+  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), dot_many);
 
   // Solution should be H^{-1} (-r0)
   // x = -0.5, y = -0.25
@@ -86,7 +56,7 @@ TEST(SteihaugTointCG, HitsBoundary)
   int size = 1;
   mfem::Vector diag(size);
   diag[0] = 1.0;
-  DiagonalOperator H(diag);
+  mfem::SparseMatrix H(diag);
 
   mfem::Vector r0(size);
   r0[0] = 1.0;
@@ -98,9 +68,8 @@ TEST(SteihaugTointCG, HitsBoundary)
   smith::TrustRegionResults results(size);
 
   mfem::Vector rCurrent(size);
-  TestDelegate delegate;
 
-  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate);
+  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), dot_many);
 
   EXPECT_NEAR(results.z.Norml2(), 0.5, 1e-9);
   EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::OnBoundary);
@@ -111,7 +80,7 @@ TEST(SteihaugTointCG, DetectsNegativeCurvature)
   int size = 1;
   mfem::Vector diag(size);
   diag[0] = -1.0;  // Negative curvature
-  DiagonalOperator H(diag);
+  mfem::SparseMatrix H(diag);
 
   mfem::Vector r0(size);
   r0[0] = 1.0;
@@ -123,9 +92,8 @@ TEST(SteihaugTointCG, DetectsNegativeCurvature)
   smith::TrustRegionResults results(size);
 
   mfem::Vector rCurrent(size);
-  TestDelegate delegate;
 
-  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate);
+  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), dot_many);
 
   // For negative curvature, it should go to boundary
   EXPECT_NEAR(results.z.Norml2(), 2.0, 1e-9);
diff --git a/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp b/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp
deleted file mode 100644
index 55c7a16f77..0000000000
--- a/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp
+++ /dev/null
@@ -1,225 +0,0 @@
-// Copyright (c) Lawrence Livermore National Security, LLC and
-// other Smith Project Developers. See the top-level LICENSE file for
-// details.
-//
-// SPDX-License-Identifier: (BSD-3-Clause)
-
-#include <cmath>
-#include <stdexcept>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "gtest/gtest.h"
-#include "mfem.hpp"
-
-#include "smith/physics/state/state_manager.hpp"
-#include "smith/infrastructure/application_manager.hpp"
-#include "smith/numerics/trust_region_solver.hpp"
-#include "smith/infrastructure/profiling.hpp"
-#include "smith/mesh_utils/mesh_utils.hpp"
-#include "smith/numerics/functional/finite_element.hpp"
-#include "smith/physics/state/finite_element_state.hpp"
-#include "smith/physics/state/finite_element_vector.hpp"
-#include "smith/numerics/petsc_solvers.hpp"
-
-#ifdef SMITH_TRUST_REGION_USE_PETSC_SUBSPACE
-
-const std::string MESHTAG = "mesh";
-
-static constexpr int scalar_field_order = 1;
-
-struct MeshFixture : public testing::Test {
-  void SetUp()
-  {
-    smith::StateManager::initialize(datastore_, "solver_test");
-
-    auto mfem_shape = mfem::Element::QUADRILATERAL;
-
-    double length = 0.5;
-    double width = 2.0;
-    auto meshtmp =
-        smith::mesh::refineAndDistribute(mfem::Mesh::MakeCartesian2D(2, 1, mfem_shape, true, length, width), 0, 0);
-    mesh_ = &smith::StateManager::setMesh(std::move(meshtmp), MESHTAG);
-  }
-
-  axom::sidre::DataStore datastore_;
-  mfem::ParMesh* mesh_;
-};
-
-std::vector<mfem::Vector> applyLinearOperator(const Mat& A, const std::vector<const mfem::Vector*>& states)
-{
-  std::vector<mfem::Vector> Astates;
-  for (auto s : states) {
-    Astates.emplace_back(*s);
-  }
-
-  int local_rows(states[0]->Size());
-  int global_rows(smith::globalSize(*states[0], PETSC_COMM_WORLD));
-
-  Vec x;
-  Vec y;
-
-  VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &x);
-  VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &y);
-
-  PetscInt iStart, iEnd;
-  VecGetOwnershipRange(x, &iStart, &iEnd);
-
-  std::vector<int> col_indices;
-  col_indices.reserve(static_cast<size_t>(local_rows));
-  for (int i = iStart; i < iEnd; ++i) {
-    col_indices.push_back(i);
-  }
-
-  size_t num_cols = states.size();
-  for (size_t c = 0; c < num_cols; ++c) {
-    VecSetValues(x, local_rows, &col_indices[0], &(*states[c])[0], INSERT_VALUES);
-    VecAssemblyBegin(x);
-    VecAssemblyEnd(x);
-    MatMult(A, x, y);
-    VecGetValues(y, local_rows, &col_indices[0], &Astates[c][0]);
-  }
-
-  VecDestroy(&x);
-  VecDestroy(&y);
-
-  return Astates;
-}
-
-auto createDiagonalTestMatrix(mfem::Vector& x)
-{
-  const int local_rows = x.Size();
-  const int global_rows = smith::globalSize(x, PETSC_COMM_WORLD);
-
-  Vec b;
-  VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &b);
-
-  PetscInt iStart, iEnd;
-  VecGetOwnershipRange(b, &iStart, &iEnd);
-  VecDestroy(&b);
-
-  std::vector<int> col_indices;
-  col_indices.reserve(static_cast<size_t>(local_rows));
-  for (int i = iStart; i < iEnd; ++i) {
-    col_indices.push_back(i);
-  }
-
-  std::vector<int> row_offsets(static_cast<size_t>(local_rows) + 1);
-  for (int i = 0; i < local_rows + 1; ++i) {
-    row_offsets[static_cast<size_t>(i)] = i;
-  }
-
-  Mat A;
-  MatCreateMPIAIJWithArrays(PETSC_COMM_WORLD, local_rows, local_rows, global_rows, global_rows, &row_offsets[0],
-                            &col_indices[0], &x[0], &A);
-
-  return A;
-}
-
-void expectNearVector(const mfem::Vector& a, const mfem::Vector& b, double tol)
-{
-  ASSERT_EQ(a.Size(), b.Size());
-  for (int i = 0; i < a.Size(); ++i) {
-    EXPECT_NEAR(a[i], b[i], tol);
-  }
-}
-
-TEST_F(MeshFixture, PetscSubspaceSolveHitsTrustRegionBoundary)
-{
-  SMITH_MARK_FUNCTION;
-
-  auto u1 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u1", MESHTAG);
-  auto u2 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u2", MESHTAG);
-  auto u3 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u3", MESHTAG);
-  auto a = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "a", MESHTAG);
-  auto b = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "b", MESHTAG);
-
-  u1 = 1.0;
-  for (int i = 0; i < u2.Size(); ++i) {
-    u2[i] = i + 2;
-    u3[i] = i * i - 15.0;
-    a[i] = 2 * i + 0.01 * i * i + 1.25;
-    b[i] = -i + 0.02 * i * i + 0.1;
-  }
-  std::vector<const mfem::Vector*> states = {&u1, &u2, &u3};
-
-  auto A_parallel = createDiagonalTestMatrix(a);
-  std::vector<mfem::Vector> Astates = applyLinearOperator(A_parallel, states);
-
-  std::vector<const mfem::Vector*> AstatePtrs;
-  for (size_t i = 0; i < Astates.size(); ++i) {
-    AstatePtrs.push_back(&Astates[i]);
-  }
-
-  double delta = 0.001;
-  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, delta, 1);
-
-  EXPECT_NEAR(sol.Norml2(), delta, 1e-12);
-  EXPECT_FALSE(leftvecs.empty());
-  EXPECT_EQ(leftvals.size(), 1);
-  EXPECT_LT(energy, 0.0);
-
-  MatDestroy(&A_parallel);
-}
-
-TEST_F(MeshFixture, MfemSubspaceSolveMatchesPetsc)
-{
-  SMITH_MARK_FUNCTION;
-
-  auto u1 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u1", MESHTAG);
-  auto u2 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u2", MESHTAG);
-  auto u3 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u3", MESHTAG);
-  auto a = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "a", MESHTAG);
-  auto b = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "b", MESHTAG);
-
-  u1 = 1.0;
-  for (int i = 0; i < u2.Size(); ++i) {
-    u2[i] = i + 2;
-    u3[i] = i * i - 15.0;
-    a[i] = 2 * i + 0.01 * i * i + 1.25;
-    b[i] = -i + 0.02 * i * i + 0.1;
-  }
-
-  std::vector<const mfem::Vector*> states = {&u1, &u2, &u3, &u2};
-  auto A_parallel = createDiagonalTestMatrix(a);
-  std::vector<mfem::Vector> Astates = applyLinearOperator(A_parallel, states);
-
-  std::vector<const mfem::Vector*> AstatePtrs;
-  for (size_t i = 0; i < Astates.size(); ++i) {
-    AstatePtrs.push_back(&Astates[i]);
-  }
-
-  auto [petsc_sol, petsc_leftvecs, petsc_leftvals, petsc_energy] =
-      smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, 0.001, 2);
-  auto [mfem_sol, mfem_leftvecs, mfem_leftvals, mfem_energy] =
-      smith::solveSubspaceProblemMfem(states, AstatePtrs, b, 0.001, 2);
-
-  expectNearVector(mfem_sol, petsc_sol, 1e-10);
-  ASSERT_EQ(mfem_leftvecs.size(), petsc_leftvecs.size());
-  ASSERT_EQ(mfem_leftvals.size(), petsc_leftvals.size());
-  for (size_t i = 0; i < mfem_leftvecs.size(); ++i) {
-    const double same = smith::innerProduct(*mfem_leftvecs[i], *petsc_leftvecs[i], MPI_COMM_WORLD);
-    mfem::Vector neg(*petsc_leftvecs[i]);
-    neg *= -1.0;
-    const double flipped = smith::innerProduct(*mfem_leftvecs[i], neg, MPI_COMM_WORLD);
-    if (std::abs(flipped) > std::abs(same)) {
-      expectNearVector(*mfem_leftvecs[i], neg, 1e-9);
-    } else {
-      expectNearVector(*mfem_leftvecs[i], *petsc_leftvecs[i], 1e-9);
-    }
-    EXPECT_NEAR(mfem_leftvals[i], petsc_leftvals[i], 1e-10);
-  }
-  EXPECT_NEAR(mfem_energy, petsc_energy, 1e-12);
-
-  MatDestroy(&A_parallel);
-}
-
-#endif  // SMITH_TRUST_REGION_USE_PETSC_SUBSPACE
-
-int main(int argc, char* argv[])
-{
-  ::testing::InitGoogleTest(&argc, argv);
-  smith::ApplicationManager applicationManager(argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp
index 98dc9486b8..1a024e5032 100644
--- a/src/smith/numerics/trust_region_solver.hpp
+++ b/src/smith/numerics/trust_region_solver.hpp
@@ -55,12 +55,6 @@ TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vec
                                                const std::vector<const mfem::Vector*>& A_directions,
                                                const mfem::Vector& b, double delta, int num_leftmost);
 
-#if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE)
-TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vector<const mfem::Vector*>& directions,
-                                                    const std::vector<const mfem::Vector*>& A_directions,
-                                                    const mfem::Vector& b, double delta, int num_leftmost);
-#endif
-
 TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem::Vector*>& directions,
                                                    const std::vector<const mfem::Vector*>& A_directions,
                                                    const mfem::Vector& b, double delta, int num_leftmost);
diff --git a/src/smith/physics/functional_weak_form.hpp b/src/smith/physics/functional_weak_form.hpp
index 5852a0388a..18c292dcab 100644
--- a/src/smith/physics/functional_weak_form.hpp
+++ b/src/smith/physics/functional_weak_form.hpp
@@ -15,7 +15,6 @@
 
 #include "smith/physics/weak_form.hpp"
 #include "smith/physics/mesh.hpp"
-#include "smith/numerics/equation_solver.hpp"
 #include "smith/numerics/functional/shape_aware_functional.hpp"
 #include "smith/physics/state/finite_element_state.hpp"
 #include "smith/physics/state/finite_element_dual.hpp"
diff --git a/src/smith/physics/solid_mechanics.hpp b/src/smith/physics/solid_mechanics.hpp
index a2d66cf8d5..f7d2706114 100644
--- a/src/smith/physics/solid_mechanics.hpp
+++ b/src/smith/physics/solid_mechanics.hpp
@@ -12,14 +12,12 @@
 
 #pragma once
 
-#include <chrono>
 #include <cstddef>
 #include <array>
 #include <functional>
 #include <memory>
 #include <optional>
 #include <string>
-#include <type_traits>
 #include <unordered_map>
 #include <utility>
 #include <vector>
diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp
index b554a11efd..258332731f 100644
--- a/src/smith/physics/tests/shallow_arch_buckling.cpp
+++ b/src/smith/physics/tests/shallow_arch_buckling.cpp
@@ -36,9 +36,6 @@ int print_level = 2;
 int nonlinear_max_iterations = 300000;
 int trust_subspace_option = static_cast<int>(SubSpaceOptions::NEVER);
 int trust_num_leftmost = 1;
-int trust_num_past_steps = 0;
-bool trust_use_solve_start_direction = false;
-bool trust_use_min_residual_direction = false;
 
 NonlinearSolver selectedNonlinearSolver()
 {
@@ -67,14 +64,6 @@ void parseCommandLine(int& argc, char** argv)
       trust_subspace_option = std::stoi(arg.substr(std::string("--trust-subspace-option=").size()));
     } else if (arg.rfind("--trust-num-leftmost=", 0) == 0) {
       trust_num_leftmost = std::stoi(arg.substr(std::string("--trust-num-leftmost=").size()));
-    } else if (arg.rfind("--trust-num-past-steps=", 0) == 0) {
-      trust_num_past_steps = std::stoi(arg.substr(std::string("--trust-num-past-steps=").size()));
-    } else if (arg.rfind("--trust-use-solve-start-direction=", 0) == 0) {
-      const std::string value = arg.substr(std::string("--trust-use-solve-start-direction=").size());
-      trust_use_solve_start_direction = (value == "1" || value == "true" || value == "on");
-    } else if (arg.rfind("--trust-use-min-residual-direction=", 0) == 0) {
-      const std::string value = arg.substr(std::string("--trust-use-min-residual-direction=").size());
-      trust_use_min_residual_direction = (value == "1" || value == "true" || value == "on");
     } else {
       argv[write_arg] = argv[read_arg];
       ++write_arg;
@@ -130,10 +119,7 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough)
       .max_iterations = nonlinear_max_iterations,
       .print_level = print_level,
       .subspace_option = static_cast<SubSpaceOptions>(trust_subspace_option),
-      .num_leftmost = trust_num_leftmost,
-      .trust_num_past_steps = trust_num_past_steps,
-      .trust_use_solve_start_direction = trust_use_solve_start_direction,
-      .trust_use_min_residual_direction = trust_use_min_residual_direction};
+      .num_leftmost = trust_num_leftmost};
 
   SolidMechanics<p, dim> solid(nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options,
                                "compressed_beam", mesh);
@@ -163,8 +149,8 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough)
 
   SLIC_INFO_ROOT(
       std::format("Compressed thin beam snap-through run: solver = {}, trust_subspace_option = {}, "
-                  "trust_num_leftmost = {}, trust_num_past_steps = {}",
-                  solver_name, trust_subspace_option, trust_num_leftmost, trust_num_past_steps));
+                  "trust_num_leftmost = {}",
+                  solver_name, trust_subspace_option, trust_num_leftmost));
 
   constexpr int num_steps = 5;
   for (int step = 0; step < num_steps; ++step) {

From 7d0edd35731ac3dd072b193038a56e09e52efc88 Mon Sep 17 00:00:00 2001
From: Michael Tupek <tupek2@llnl.gov>
Date: Tue, 12 May 2026 20:07:04 -0700
Subject: [PATCH 20/27] Add some docs.

---
 src/smith/numerics/steihaug_toint_cg.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp
index d519590920..24b5a43801 100644
--- a/src/smith/numerics/steihaug_toint_cg.hpp
+++ b/src/smith/numerics/steihaug_toint_cg.hpp
@@ -108,8 +108,8 @@ struct TrustRegionResults {
   size_t cg_iterations_count = 0;
 };
 
-using DotPair = std::pair<const mfem::Vector*, const mfem::Vector*>;
-using DotManyFunction = std::function<std::vector<double>(const std::vector<DotPair>&)>;
+using DotPair = std::pair<const mfem::Vector*, const mfem::Vector*>;                      ///< using
+using DotManyFunction = std::function<std::vector<double>(const std::vector<DotPair>&)>;  ///< using
 
 /**
  * @brief Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner

From b4117acfe49e063de96603e347c95f17e7c034e8 Mon Sep 17 00:00:00 2001
From: Michael Tupek <tupek2@llnl.gov>
Date: Wed, 13 May 2026 12:45:57 -0700
Subject: [PATCH 21/27] Trying to simplify the left most logic on linesearch a
 bit.

---
 src/smith/numerics/equation_solver.cpp        | 160 ++++-----
 .../numerics/mfem_trust_region_subspace.cpp   | 324 ++++++++++--------
 src/smith/numerics/steihaug_toint_cg.cpp      |  10 +
 src/smith/numerics/steihaug_toint_cg.hpp      |   3 +
 .../numerics/tests/test_steihaug_toint_cg.cpp |  23 +-
 .../tests/test_trust_region_solver_mfem.cpp   |  43 +--
 src/smith/numerics/trust_region_solver.hpp    |  30 ++
 7 files changed, 294 insertions(+), 299 deletions(-)

diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index 9d83657269..0e873fc155 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -29,9 +29,10 @@ namespace smith {
 
 namespace {
 
-#ifdef MFEM_USE_MPI
-size_t rootOnlyPrintLevel(size_t level, MPI_Comm comm)
+size_t rootOnlyPrintLevel(const mfem::NewtonSolver& solver, size_t level)
 {
+#ifdef MFEM_USE_MPI
+  const MPI_Comm comm = solver.GetComm();
   if (level > 0 && comm != MPI_COMM_NULL) {
     int rank = 0;
     MPI_Comm_rank(comm, &rank);
@@ -39,11 +40,9 @@ size_t rootOnlyPrintLevel(size_t level, MPI_Comm comm)
       return 0;
     }
   }
+#endif
   return level;
 }
-#else
-size_t rootOnlyPrintLevel(size_t level) { return level; }
-#endif
 
 }  // namespace
 
@@ -119,12 +118,7 @@ class NewtonSolver : public mfem::NewtonSolver {
     print_level = static_cast<size_t>(std::max(nonlinear_options.print_level, 0));
     print_level = print_options.iterations ? std::max<size_t>(1, print_level) : print_level;
     print_level = print_options.summary ? std::max<size_t>(2, print_level) : print_level;
-    print_level = rootOnlyPrintLevel(print_level
-#ifdef MFEM_USE_MPI
-                                     ,
-                                     GetComm()
-#endif
-    );
+    print_level = rootOnlyPrintLevel(*this, print_level);
 
     using real_t = mfem::real_t;
 
@@ -300,46 +294,15 @@ class TrustRegion : public mfem::NewtonSolver {
   /// compute several vector inner products with a single MPI reduction when possible
   std::vector<double> dot_many(const std::vector<DotPair>& pairs) const
   {
-    std::vector<double> products(pairs.size(), 0.0);
-
     if (dot_oper) {
+      std::vector<double> products(pairs.size(), 0.0);
       for (size_t i = 0; i < pairs.size(); ++i) {
         products[i] = Dot(*pairs[i].first, *pairs[i].second);
       }
       return products;
     }
 
-    std::vector<int> sizes(pairs.size());
-    std::vector<const double*> ptr_a(pairs.size());
-    std::vector<const double*> ptr_b(pairs.size());
-    for (size_t i = 0; i < pairs.size(); ++i) {
-      sizes[i] = pairs[i].first->Size();
-      MFEM_ASSERT(sizes[i] == pairs[i].second->Size(), "Incompatible vector sizes.");
-      ptr_a[i] = pairs[i].first->GetData();
-      ptr_b[i] = pairs[i].second->GetData();
-    }
-
-    bool all_same_size = true;
-    for (size_t i = 1; i < pairs.size(); ++i) {
-      if (sizes[i] != sizes[0]) {
-        all_same_size = false;
-        break;
-      }
-    }
-
-    if (all_same_size && !pairs.empty()) {
-      for (int j = 0; j < sizes[0]; ++j) {
-        for (size_t i = 0; i < pairs.size(); ++i) {
-          products[i] += ptr_a[i][j] * ptr_b[i][j];
-        }
-      }
-    } else {
-      for (size_t i = 0; i < pairs.size(); ++i) {
-        for (int j = 0; j < sizes[i]; ++j) {
-          products[i] += ptr_a[i][j] * ptr_b[i][j];
-        }
-      }
-    }
+    std::vector<double> products = smith::dotMany(pairs);
 
 #ifdef MFEM_USE_MPI
     const MPI_Comm dot_comm = GetComm();
@@ -354,43 +317,47 @@ class TrustRegion : public mfem::NewtonSolver {
     return products;
   }
 
-  /// solve the exact trust-region subspace problem with directions ds, and the leftmosts
-  template <typename HessVecFunc>
-  void solveTheSubspaceProblem([[maybe_unused]] mfem::Vector& z, [[maybe_unused]] const HessVecFunc& hess_vec_func,
-                               [[maybe_unused]] const std::vector<const mfem::Vector*> ds,
-                               [[maybe_unused]] const std::vector<const mfem::Vector*> Hds,
-                               [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] double delta,
-                               [[maybe_unused]] int num_leftmost,
-                               [[maybe_unused]] std::vector<std::shared_ptr<mfem::Vector>>& candidate_left_mosts) const
+  /// build reusable subspace data for line-search retries
+  void prepareSubspaceProblemCache([[maybe_unused]] const std::vector<const mfem::Vector*>& ds,
+                                   [[maybe_unused]] const std::vector<const mfem::Vector*>& Hds,
+                                   [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] int num_leftmost,
+                                   [[maybe_unused]] CachedTrustRegionSubspaceProblem& prepared_subspace) const
   {
+#ifdef MFEM_USE_LAPACK
     SMITH_MARK_FUNCTION;
-    std::vector<const mfem::Vector*> directions;
-    for (auto& d : ds) {
-      directions.emplace_back(d);
-    }
-    for (auto& left : left_mosts) {
-      directions.emplace_back(left.get());
-    }
-
-    std::vector<const mfem::Vector*> H_directions;
-    for (auto& Hd : Hds) {
-      H_directions.emplace_back(Hd);
-    }
-    for (auto& H_left : H_left_mosts) {
-      H_directions.emplace_back(H_left.get());
-    }
+    std::vector<const mfem::Vector*> directions(ds.begin(), ds.end());
+    std::vector<const mfem::Vector*> H_directions(Hds.begin(), Hds.end());
+    for (auto& left : left_mosts) directions.emplace_back(left.get());
+    for (auto& H_left : H_left_mosts) H_directions.emplace_back(H_left.get());
 
     mfem::Vector b(g);
     b *= -1;
 
+    try {
+      prepared_subspace = smith::prepareSubspaceProblem(directions, H_directions, b, num_leftmost);
+    } catch (const std::exception& e) {
+      if (print_level >= 1) {
+        mfem::out << "subspace solve failed with " << e.what() << std::endl;
+      }
+      return;
+    }
+#endif
+  }
+
+  /// solve cached exact trust-region subspace problem for current trust-region size
+  template <typename HessVecFunc>
+  void solvePreparedSubspaceProblem([[maybe_unused]] mfem::Vector& z, [[maybe_unused]] const HessVecFunc& hess_vec_func,
+                                    [[maybe_unused]] const CachedTrustRegionSubspaceProblem& prepared_subspace,
+                                    [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] double delta) const
+  {
+#ifdef MFEM_USE_LAPACK
+    SMITH_MARK_FUNCTION;
     mfem::Vector sol;
-    std::vector<std::shared_ptr<mfem::Vector>> leftvecs;
-    std::vector<double> leftvals;
     double energy_change;
 
     try {
-      std::tie(sol, leftvecs, leftvals, energy_change) =
-          solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost);
+      std::tie(sol, std::ignore, std::ignore, energy_change) =
+          smith::solvePreparedSubspaceProblem(prepared_subspace, delta);
     } catch (const std::exception& e) {
       if (print_level >= 1) {
         mfem::out << "subspace solve failed with " << e.what() << std::endl;
@@ -398,16 +365,11 @@ class TrustRegion : public mfem::NewtonSolver {
       return;
     }
 
-    candidate_left_mosts.clear();
-    for (auto& lv : leftvecs) {
-      candidate_left_mosts.emplace_back(std::move(lv));
-    }
-
     double base_energy = computeEnergy(g, hess_vec_func, z);
     double subspace_energy = computeEnergy(g, hess_vec_func, sol);
 
     if (print_level >= 2) {
-      double leftval = leftvals.size() ? leftvals[0] : 1.0;
+      double leftval = prepared_subspace.leftvals.size() ? prepared_subspace.leftvals[0] : 1.0;
       mfem::out << "Energy using subspace solver from: " << base_energy << ", to: " << subspace_energy << " / "
                 << energy_change << ".  Min eig: " << leftval << std::endl;
     }
@@ -415,6 +377,7 @@ class TrustRegion : public mfem::NewtonSolver {
     if (subspace_energy < base_energy) {
       z = sol;
     }
+#endif
   }
 
   /// finds tau s.t. (z + tau*(y-z))^2 = trSize^2
@@ -516,12 +479,7 @@ class TrustRegion : public mfem::NewtonSolver {
     print_level = static_cast<size_t>(std::max(nonlinear_options.print_level, 0));
     print_level = print_options.iterations ? std::max<size_t>(1, print_level) : print_level;
     print_level = print_options.summary ? std::max<size_t>(2, print_level) : print_level;
-    print_level = rootOnlyPrintLevel(print_level
-#ifdef MFEM_USE_MPI
-                                     ,
-                                     GetComm()
-#endif
-    );
+    print_level = rootOnlyPrintLevel(*this, print_level);
 
     using real_t = mfem::real_t;
 
@@ -636,7 +594,13 @@ class TrustRegion : public mfem::NewtonSolver {
 
       bool have_computed_Hvs = false;
       bool have_computed_H_left_mosts = false;
-      std::vector<std::shared_ptr<mfem::Vector>> candidate_left_mosts;
+      bool have_prepared_subspace = false;
+      CachedTrustRegionSubspaceProblem prepared_subspace;
+#ifdef MFEM_USE_LAPACK
+      constexpr bool can_use_subspace_solver = true;
+#else
+      constexpr bool can_use_subspace_solver = false;
+#endif
 
       int lineSearchIter = 0;
       while (lineSearchIter <= nonlinear_options.max_line_search_iterations) {
@@ -652,7 +616,7 @@ class TrustRegion : public mfem::NewtonSolver {
         bool use_with_option2 = (subspace_option >= 2) && (d_norm > (1.0 - 1.0e-6) * tr_size);
         bool use_with_option3 = (subspace_option >= 3);
 
-        if (use_with_option1 || use_with_option2 || use_with_option3) {
+        if (can_use_subspace_solver && (use_with_option1 || use_with_option2 || use_with_option3)) {
           if (!have_computed_Hvs) {
             have_computed_Hvs = true;
 
@@ -687,14 +651,20 @@ class TrustRegion : public mfem::NewtonSolver {
             }
           }
 
-          std::vector<const mfem::Vector*> ds{&trResults.z, &trResults.cauchy_point};
-          std::vector<const mfem::Vector*> H_ds{&trResults.H_z, &trResults.H_cauchy_point};
-          if (trResults.has_d_old) {
-            ds.push_back(&trResults.d_old);
-            H_ds.push_back(&trResults.H_d_old);
+          if (!have_prepared_subspace) {
+            have_prepared_subspace = true;
+
+            std::vector<const mfem::Vector*> ds{&trResults.z, &trResults.cauchy_point};
+            std::vector<const mfem::Vector*> H_ds{&trResults.H_z, &trResults.H_cauchy_point};
+            if (trResults.has_d_old) {
+              ds.push_back(&trResults.d_old);
+              H_ds.push_back(&trResults.H_d_old);
+            }
+
+            prepareSubspaceProblemCache(ds, H_ds, r, num_leftmost, prepared_subspace);
           }
 
-          solveTheSubspaceProblem(trResults.d, hess_vec_func, ds, H_ds, r, tr_size, num_leftmost, candidate_left_mosts);
+          solvePreparedSubspaceProblem(trResults.d, hess_vec_func, prepared_subspace, r, tr_size);
         }
 
         static constexpr double roundOffTol = 0.0;  // 1e-14;
@@ -721,8 +691,8 @@ class TrustRegion : public mfem::NewtonSolver {
         if (normPred <= norm_goal) {
           trResults.d_old = trResults.d;
           trResults.has_d_old = true;
-          if (!candidate_left_mosts.empty()) {
-            left_mosts = std::move(candidate_left_mosts);
+          if (!prepared_subspace.leftmosts.empty()) {
+            left_mosts = prepared_subspace.leftmosts;
           }
           X = x_pred;
           r = r_pred;
@@ -776,8 +746,8 @@ class TrustRegion : public mfem::NewtonSolver {
         if (willAccept) {
           trResults.d_old = trResults.d;
           trResults.has_d_old = true;
-          if (!candidate_left_mosts.empty()) {
-            left_mosts = std::move(candidate_left_mosts);
+          if (!prepared_subspace.leftmosts.empty()) {
+            left_mosts = prepared_subspace.leftmosts;
           }
           X = x_pred;
           r = r_pred;
diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp
index 9edb69756e..b725dcb53c 100644
--- a/src/smith/numerics/mfem_trust_region_subspace.cpp
+++ b/src/smith/numerics/mfem_trust_region_subspace.cpp
@@ -29,13 +29,6 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm
 
 #ifdef MFEM_USE_LAPACK
 
-TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
-                                               const std::vector<const mfem::Vector*>& A_directions,
-                                               const mfem::Vector& b, double delta, int num_leftmost)
-{
-  return solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost);
-}
-
 namespace {
 
 double dot(const mfem::Vector& a, const mfem::Vector& b) { return a * b; }
@@ -133,15 +126,6 @@ SubspaceProjections projectSubspaceGlobally(const std::vector<const mfem::Vector
   return globalSubspaceProjectionFromLocalInnerProducts(states, Astates, b);
 }
 
-mfem::Vector solveDense(const mfem::DenseMatrix& A, const mfem::Vector& b)
-{
-  mfem::DenseMatrix A_copy(A);
-  mfem::DenseMatrixInverse inv(A_copy);
-  mfem::Vector x(b.Size());
-  inv.Mult(b, x);
-  return x;
-}
-
 double quadraticEnergy(const mfem::DenseMatrix& A, const mfem::Vector& b, const mfem::Vector& x)
 {
   mfem::Vector Ax(x.Size());
@@ -187,40 +171,102 @@ mfem::DenseMatrix columnsToMatrix(const std::vector<mfem::Vector>& cols)
   return A;
 }
 
-/**
- * @brief Solves the exact trust region subproblem:
- *        min 1/2 x^T A x - b^T x, subject to ||x|| <= delta.
- *
- * Implements a variant of the Moore-Sorensen algorithm:
- * 1. Computes the eigensystem of A.
- * 2. Checks if the unconstrained minimum lies strictly inside the trust region.
- * 3. Checks for the "hard case" where the minimum eigenvalue is near zero or negative,
- *    and the Newton step points outside the trust region, requiring a shift along the leftmost eigenvector.
- * 4. Otherwise, performs a Newton iteration on the secular equation (1/||p(\lambda)|| - 1/delta = 0)
- *    to find the optimal Lagrange multiplier \lambda.
- *
- * @param A The reduced Hessian matrix (square).
- * @param b The reduced gradient vector.
- * @param delta The trust region radius.
- * @param num_leftmost The number of leftmost eigenvectors/values to return.
- * @return A tuple containing:
- *         - The optimal solution vector.
- *         - A list of the leftmost eigenvectors.
- *         - A list of the corresponding leftmost eigenvalues.
- *         - A boolean indicating success.
- */
-std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> exactTrustRegionSolve(
-    mfem::DenseMatrix A, const mfem::Vector& b, double delta, int num_leftmost)
+mfem::DenseMatrix orthonormalBasisTransform(const mfem::DenseMatrix& gram, double& trace_mag)
+{
+  mfem::DenseMatrix gram_copy(gram);
+  mfem::Vector evals;
+  mfem::DenseMatrix evecs;
+  gram_copy.Eigensystem(evals, evecs);
+
+  trace_mag = 0.0;
+  for (int i = 0; i < evals.Size(); ++i) {
+    trace_mag += std::abs(evals[i]);
+  }
+
+  std::vector<mfem::Vector> kept_columns;
+  for (int i = 0; i < evals.Size(); ++i) {
+    if (evals[i] > 1e-9 * trace_mag) {
+      mfem::Vector col = matrixColumn(evecs, i);
+      col /= std::sqrt(evals[i]);
+      kept_columns.emplace_back(std::move(col));
+    }
+  }
+
+  return columnsToMatrix(kept_columns);
+}
+
+mfem::DenseMatrix tripleProduct(const mfem::DenseMatrix& L, const mfem::DenseMatrix& A, const mfem::DenseMatrix& R)
+{
+  mfem::DenseMatrix tmp(A.Height(), R.Width());
+  mfem::Mult(A, R, tmp);
+  mfem::DenseMatrix out(L.Width(), R.Width());
+  mfem::MultAtB(L, tmp, out);
+  return out;
+}
+
+mfem::Vector projectWithTranspose(const mfem::DenseMatrix& A, const mfem::Vector& x)
+{
+  mfem::Vector out(A.Width());
+  A.MultTranspose(x, out);
+  return out;
+}
+
+mfem::Vector combineDirections(const std::vector<const mfem::Vector*>& states, const mfem::Vector& coeffs)
+{
+  mfem::Vector out(*states[0]);
+  out = 0.0;
+  for (int i = 0; i < coeffs.Size(); ++i) {
+    out.Add(coeffs[i], *states[size_t(i)]);
+  }
+  return out;
+}
+
+std::vector<const mfem::Vector*> toPointers(const std::vector<std::shared_ptr<mfem::Vector>>& vectors)
+{
+  std::vector<const mfem::Vector*> ptrs;
+  ptrs.reserve(vectors.size());
+  for (const auto& vector : vectors) {
+    ptrs.push_back(vector.get());
+  }
+  return ptrs;
+}
+
+std::vector<mfem::Vector> prepareExactTrustRegionLeftmosts(CachedTrustRegionSubspaceProblem& prepared, int num_leftmost)
 {
-  if (A.Height() != A.Width()) {
-    throw TrustRegionException("Exact trust region solver requires square matrices");
+  prepared.eigenvalues.SetSize(prepared.projected_rhs.Size());
+  prepared.eigenvectors.SetSize(prepared.projected_hessian.Height(), prepared.projected_hessian.Width());
+
+  mfem::DenseMatrix projected_hessian_copy(prepared.projected_hessian);
+  projected_hessian_copy.Eigensystem(prepared.eigenvalues, prepared.eigenvectors);
+
+  prepared.eigen_rhs.SetSize(prepared.eigenvalues.Size());
+  for (int i = 0; i < prepared.eigenvalues.Size(); ++i) {
+    const mfem::Vector vi = matrixColumn(prepared.eigenvectors, i);
+    prepared.eigen_rhs[i] = dot(vi, prepared.projected_rhs);
   }
-  if (A.Height() != b.Size()) {
-    throw TrustRegionException(
-        "The right hand size for exact trust region solve must be consistent with the input matrix size");
+
+  std::vector<mfem::Vector> reduced_leftmosts;
+  const int num_leftmost_possible = std::min(num_leftmost, prepared.eigenvalues.Size());
+  reduced_leftmosts.reserve(static_cast<size_t>(num_leftmost_possible));
+  prepared.leftvals.clear();
+  prepared.leftvals.reserve(static_cast<size_t>(num_leftmost_possible));
+  for (int i = 0; i < num_leftmost_possible; ++i) {
+    reduced_leftmosts.emplace_back(matrixColumn(prepared.eigenvectors, i));
+    prepared.leftvals.emplace_back(prepared.eigenvalues[i]);
   }
+  return reduced_leftmosts;
+}
 
-  mfem::Vector workspace(b.Size() * b.Size() + 8 * b.Size());
+std::pair<mfem::Vector, bool> solvePreparedExactTrustRegionProblem(const CachedTrustRegionSubspaceProblem& prepared,
+                                                                   double delta)
+{
+  const mfem::DenseMatrix& A = prepared.projected_hessian;
+  const mfem::Vector& b = prepared.projected_rhs;
+  const mfem::Vector& sigs = prepared.eigenvalues;
+  const mfem::DenseMatrix& V = prepared.eigenvectors;
+  const mfem::Vector& bv = prepared.eigen_rhs;
+
+  mfem::Vector workspace(6 * b.Size());
   int offset = 0;
   auto alloc_vector = [&](int size) {
     mfem::Vector v(workspace.GetData() + offset, size);
@@ -228,35 +274,23 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
     return v;
   };
 
-  mfem::Vector sigs = alloc_vector(b.Size());
-  mfem::DenseMatrix V(workspace.GetData() + offset, b.Size(), b.Size());
-  offset += b.Size() * b.Size();
-
-  A.Eigensystem(sigs, V);
-  std::vector<mfem::Vector> leftmosts;
-  std::vector<double> minsigs;
-  const int num_leftmost_possible = std::min(num_leftmost, sigs.Size());
-  for (int i = 0; i < num_leftmost_possible; ++i) {
-    leftmosts.emplace_back(matrixColumn(V, i));
-    minsigs.emplace_back(sigs[i]);
-  }
-
-  const mfem::Vector leftMost = matrixColumn(V, 0);
-  const double minSig = sigs[0];
-
-  mfem::Vector bv = alloc_vector(sigs.Size());
+  mfem::Vector bvOverSigs = alloc_vector(sigs.Size());
   for (int i = 0; i < sigs.Size(); ++i) {
-    const mfem::Vector vi = matrixColumn(V, i);
-    bv[i] = dot(vi, b);
+    bvOverSigs[i] = bv[i] / sigs[i];
   }
-
-  mfem::Vector bvOverSigs = alloc_vector(sigs.Size());
-  for (int i = 0; i < sigs.Size(); ++i) bvOverSigs[i] = bv[i] / sigs[i];
   const double sigScale = sumAbs(sigs) / sigs.Size();
   const double eps = 1e-12 * sigScale;
+  const mfem::Vector leftMost = matrixColumn(V, 0);
+  const double minSig = sigs[0];
 
   if ((minSig >= eps) && (norm(bvOverSigs) <= delta)) {
-    return std::make_tuple(solveDense(A, b), leftmosts, minsigs, true);
+    mfem::Vector x = alloc_vector(b.Size());
+    x = 0.0;
+    for (int i = 0; i < b.Size(); ++i) {
+      const mfem::Vector vi = matrixColumn(V, i);
+      x.Add(bvOverSigs[i], vi);
+    }
+    return std::make_pair(x, true);
   }
 
   double lam = minSig < eps ? -minSig + eps : 0.0;
@@ -289,7 +323,7 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
     const double e1 = quadraticEnergy(A, b, x1);
     const double e2 = quadraticEnergy(A, b, x2);
 
-    return std::make_tuple(e1 < e2 ? x1 : x2, leftmosts, minsigs, true);
+    return std::make_pair(e1 < e2 ? x1 : x2, true);
   }
 
   mfem::Vector bvbv = alloc_vector(bv.Size());
@@ -330,67 +364,22 @@ std::tuple<mfem::Vector, std::vector<mfem::Vector>, std::vector<double>, bool> e
 
   x *= (e2 < e1 ? -delta : delta) / norm(x);
 
-  return std::make_tuple(x, leftmosts, minsigs, success);
-}
-
-mfem::DenseMatrix orthonormalBasisTransform(const mfem::DenseMatrix& gram, double& trace_mag)
-{
-  mfem::DenseMatrix gram_copy(gram);
-  mfem::Vector evals;
-  mfem::DenseMatrix evecs;
-  gram_copy.Eigensystem(evals, evecs);
-
-  trace_mag = 0.0;
-  for (int i = 0; i < evals.Size(); ++i) {
-    trace_mag += std::abs(evals[i]);
-  }
-
-  std::vector<mfem::Vector> kept_columns;
-  for (int i = 0; i < evals.Size(); ++i) {
-    if (evals[i] > 1e-9 * trace_mag) {
-      mfem::Vector col = matrixColumn(evecs, i);
-      col /= std::sqrt(evals[i]);
-      kept_columns.emplace_back(std::move(col));
-    }
-  }
-
-  return columnsToMatrix(kept_columns);
-}
-
-mfem::DenseMatrix tripleProduct(const mfem::DenseMatrix& L, const mfem::DenseMatrix& A, const mfem::DenseMatrix& R)
-{
-  mfem::DenseMatrix tmp(A.Height(), R.Width());
-  mfem::Mult(A, R, tmp);
-  mfem::DenseMatrix out(L.Width(), R.Width());
-  mfem::MultAtB(L, tmp, out);
-  return out;
-}
-
-mfem::Vector projectWithTranspose(const mfem::DenseMatrix& A, const mfem::Vector& x)
-{
-  mfem::Vector out(A.Width());
-  A.MultTranspose(x, out);
-  return out;
-}
-
-mfem::Vector combineDirections(const std::vector<const mfem::Vector*>& states, const mfem::Vector& coeffs)
-{
-  mfem::Vector out(*states[0]);
-  out = 0.0;
-  for (int i = 0; i < coeffs.Size(); ++i) {
-    out.Add(coeffs[i], *states[size_t(i)]);
-  }
-  return out;
+  return std::make_pair(x, success);
 }
 
 }  // namespace
 
-TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem::Vector*>& states,
-                                                   const std::vector<const mfem::Vector*>& Astates,
-                                                   const mfem::Vector& b, double delta, int num_leftmost)
+/// @brief prepares reduced trust-region subspace data reusable across trust-region radius updates
+CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
+                                                        const std::vector<const mfem::Vector*>& A_directions,
+                                                        const mfem::Vector& b, int num_leftmost)
 {
   SMITH_MARK_FUNCTION;
-  SubspaceProjections projections = projectSubspaceGlobally(states, Astates, b);
+  CachedTrustRegionSubspaceProblem prepared;
+  prepared.zero_solution = b;
+  prepared.zero_solution = 0.0;
+
+  SubspaceProjections projections = projectSubspaceGlobally(directions, A_directions, b);
   mfem::DenseMatrix& sAs = projections.sAs;
   symmetrize(sAs);
 
@@ -408,33 +397,61 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem:
   double trace_mag = 0.0;
   mfem::DenseMatrix T = orthonormalBasisTransform(ss, trace_mag);
   if (trace_mag == 0.0) {
-    mfem::Vector sol(*states[0]);
-    sol = 0.0;
-    return std::make_tuple(sol, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0.0);
+    return prepared;
   }
   if (T.Width() == 0) {
     throw TrustRegionException("No independent directions in MFEM subspace solve.");
   }
-  mfem::DenseMatrix pAp = tripleProduct(T, sAs, T);
-  symmetrize(pAp);
+  prepared.projected_hessian = tripleProduct(T, sAs, T);
+  symmetrize(prepared.projected_hessian);
 
   const mfem::Vector& sb = projections.sb;
-  const mfem::Vector pb = projectWithTranspose(T, sb);
-
-  auto [reduced_x, leftvecs, leftvals, success] = exactTrustRegionSolve(pAp, pb, delta, num_leftmost);
-  (void)success;
-  const double energy = quadraticEnergy(pAp, pb, reduced_x);
-
-  mfem::Vector coeffs(T.Height());
-  T.Mult(reduced_x, coeffs);
-  mfem::Vector sol = combineDirections(states, coeffs);
-  std::vector<std::shared_ptr<mfem::Vector>> leftmosts;
-  for (const auto& leftvec : leftvecs) {
-    mfem::Vector left_coeffs(T.Height());
-    T.Mult(leftvec, left_coeffs);
-    leftmosts.emplace_back(std::make_shared<mfem::Vector>(combineDirections(states, left_coeffs)));
+  prepared.projected_rhs = projectWithTranspose(T, sb);
+
+  for (int j = 0; j < T.Width(); ++j) {
+    prepared.basis.emplace_back(std::make_shared<mfem::Vector>(combineDirections(directions, matrixColumn(T, j))));
+  }
+  const auto reduced_leftmosts = prepareExactTrustRegionLeftmosts(prepared, num_leftmost);
+  const auto basis_ptrs = toPointers(prepared.basis);
+  prepared.leftmosts.clear();
+  prepared.leftmosts.reserve(reduced_leftmosts.size());
+  for (const auto& leftvec : reduced_leftmosts) {
+    prepared.leftmosts.emplace_back(std::make_shared<mfem::Vector>(combineDirections(basis_ptrs, leftvec)));
+  }
+
+  return prepared;
+}
+
+/// @brief solves cached reduced trust-region problem for given trust-region radius
+TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSubspaceProblem& prepared, double delta)
+{
+  SMITH_MARK_FUNCTION;
+  if (prepared.basis.empty()) {
+    mfem::Vector sol(prepared.zero_solution);
+    sol = 0.0;
+    return std::make_tuple(sol, prepared.leftmosts, prepared.leftvals, 0.0);
   }
-  return std::make_tuple(sol, leftmosts, leftvals, energy);
+
+  auto [reduced_x, success] = solvePreparedExactTrustRegionProblem(prepared, delta);
+  const double energy = quadraticEnergy(prepared.projected_hessian, prepared.projected_rhs, reduced_x);
+
+  const auto basis_ptrs = toPointers(prepared.basis);
+  mfem::Vector sol = combineDirections(basis_ptrs, reduced_x);
+  return std::make_tuple(sol, prepared.leftmosts, prepared.leftvals, energy);
+}
+
+TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
+                                               const std::vector<const mfem::Vector*>& A_directions,
+                                               const mfem::Vector& b, double delta, int num_leftmost)
+{
+  return solvePreparedSubspaceProblem(prepareSubspaceProblem(directions, A_directions, b, num_leftmost), delta);
+}
+
+TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem::Vector*>& directions,
+                                                   const std::vector<const mfem::Vector*>& A_directions,
+                                                   const mfem::Vector& b, double delta, int num_leftmost)
+{
+  return solvePreparedSubspaceProblem(prepareSubspaceProblem(directions, A_directions, b, num_leftmost), delta);
 }
 
 #else
@@ -447,6 +464,16 @@ TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vec
   return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0.0);
 }
 
+CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
+                                                        const std::vector<const mfem::Vector*>& A_directions,
+                                                        const mfem::Vector& b, int)
+{
+  throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support.");
+  CachedTrustRegionSubspaceProblem prepared;
+  prepared.zero_solution = b;
+  return prepared;
+}
+
 /// @brief report unavailable MFEM subspace solve when MFEM was built without LAPACK.
 TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem::Vector*>&,
                                                    const std::vector<const mfem::Vector*>&, const mfem::Vector& b,
@@ -456,6 +483,13 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem:
   return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0.0);
 }
 
+TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSubspaceProblem& prepared, double)
+{
+  throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support.");
+  return std::make_tuple(prepared.zero_solution, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{},
+                         0.0);
+}
+
 #endif  // MFEM_USE_LAPACK
 
 }  // namespace smith
diff --git a/src/smith/numerics/steihaug_toint_cg.cpp b/src/smith/numerics/steihaug_toint_cg.cpp
index 4964d3a641..784dc0ac89 100644
--- a/src/smith/numerics/steihaug_toint_cg.cpp
+++ b/src/smith/numerics/steihaug_toint_cg.cpp
@@ -20,6 +20,16 @@ void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double d
 
 }  // namespace
 
+std::vector<double> dotMany(const std::vector<DotPair>& pairs)
+{
+  std::vector<double> products(pairs.size(), 0.0);
+  for (size_t i = 0; i < pairs.size(); ++i) {
+    MFEM_ASSERT(pairs[i].first->Size() == pairs[i].second->Size(), "Incompatible vector sizes.");
+    products[i] = (*pairs[i].first) * (*pairs[i].second);
+  }
+  return products;
+}
+
 void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P,
                      const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results,
                      double r0_norm_squared, const DotManyFunction& dot_many)
diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp
index 24b5a43801..672bd26dc2 100644
--- a/src/smith/numerics/steihaug_toint_cg.hpp
+++ b/src/smith/numerics/steihaug_toint_cg.hpp
@@ -111,6 +111,9 @@ struct TrustRegionResults {
 using DotPair = std::pair<const mfem::Vector*, const mfem::Vector*>;                      ///< using
 using DotManyFunction = std::function<std::vector<double>(const std::vector<DotPair>&)>;  ///< using
 
+/// compute local dot products for many vector pairs
+std::vector<double> dotMany(const std::vector<DotPair>& pairs);
+
 /**
  * @brief Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner
  *
diff --git a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp
index 03c212aa0d..860e6d3192 100644
--- a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp
+++ b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp
@@ -7,20 +7,6 @@
 #include <gtest/gtest.h>
 #include "smith/numerics/steihaug_toint_cg.hpp"
 
-namespace {
-
-std::vector<double> dot_many(const std::vector<smith::DotPair>& pairs)
-{
-  std::vector<double> out;
-  out.reserve(pairs.size());
-  for (const auto& [a, b] : pairs) {
-    out.push_back((*a) * (*b));
-  }
-  return out;
-}
-
-}  // namespace
-
 TEST(SteihaugTointCG, SolvesSPDInsideBoundary)
 {
   int size = 2;
@@ -42,7 +28,8 @@ TEST(SteihaugTointCG, SolvesSPDInsideBoundary)
 
   mfem::Vector rCurrent(size);
 
-  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), dot_many);
+  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(),
+                         smith::dotMany);
 
   // Solution should be H^{-1} (-r0)
   // x = -0.5, y = -0.25
@@ -69,7 +56,8 @@ TEST(SteihaugTointCG, HitsBoundary)
 
   mfem::Vector rCurrent(size);
 
-  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), dot_many);
+  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(),
+                         smith::dotMany);
 
   EXPECT_NEAR(results.z.Norml2(), 0.5, 1e-9);
   EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::OnBoundary);
@@ -93,7 +81,8 @@ TEST(SteihaugTointCG, DetectsNegativeCurvature)
 
   mfem::Vector rCurrent(size);
 
-  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), dot_many);
+  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(),
+                         smith::dotMany);
 
   // For negative curvature, it should go to boundary
   EXPECT_NEAR(results.z.Norml2(), 2.0, 1e-9);
diff --git a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
index cb5fc328af..f2e46a06c2 100644
--- a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
+++ b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
@@ -4,7 +4,6 @@
 //
 // SPDX-License-Identifier: (BSD-3-Clause)
 
-#include <cmath>
 #include <vector>
 
 #include "gtest/gtest.h"
@@ -32,14 +31,6 @@ std::vector<mfem::Vector> applyDiagonalOperator(const mfem::Vector& diag,
   return out;
 }
 
-void expectNearVector(const mfem::Vector& a, const mfem::Vector& b, double tol)
-{
-  ASSERT_EQ(a.Size(), b.Size());
-  for (int i = 0; i < a.Size(); ++i) {
-    EXPECT_NEAR(a[i], b[i], tol);
-  }
-}
-
 std::vector<const mfem::Vector*> toPointers(const std::vector<mfem::Vector>& vectors)
 {
   std::vector<const mfem::Vector*> ptrs;
@@ -79,8 +70,7 @@ TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary)
   const auto astates = applyDiagonalOperator(fixture.diag, states);
   const auto astate_ptrs = toPointers(astates);
 
-  auto [sol, leftvecs, leftvals, energy] =
-      smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 1);
+  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblem(states, astate_ptrs, fixture.b, test_delta, 1);
 
   EXPECT_NEAR(sol.Norml2(), test_delta, 1.0e-12);
   EXPECT_FALSE(leftvecs.empty());
@@ -88,37 +78,6 @@ TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary)
   EXPECT_LT(energy, 0.0);
 }
 
-TEST(TrustRegionSubspaceMfem, GenericSolveUsesMfemBackend)
-{
-  DiagonalSubspaceFixture fixture(test_size);
-
-  const std::vector<const mfem::Vector*> states = {&fixture.u1, &fixture.u2, &fixture.u3, &fixture.u2};
-  const auto astates = applyDiagonalOperator(fixture.diag, states);
-  const auto astate_ptrs = toPointers(astates);
-
-  auto [generic_sol, generic_leftvecs, generic_leftvals, generic_energy] =
-      smith::solveSubspaceProblem(states, astate_ptrs, fixture.b, test_delta, 2);
-  auto [mfem_sol, mfem_leftvecs, mfem_leftvals, mfem_energy] =
-      smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 2);
-
-  expectNearVector(generic_sol, mfem_sol, 1.0e-12);
-  ASSERT_EQ(generic_leftvecs.size(), mfem_leftvecs.size());
-  ASSERT_EQ(generic_leftvals.size(), mfem_leftvals.size());
-  for (size_t i = 0; i < generic_leftvecs.size(); ++i) {
-    const double same = smith::innerProduct(*generic_leftvecs[i], *mfem_leftvecs[i], MPI_COMM_WORLD);
-    mfem::Vector neg(*mfem_leftvecs[i]);
-    neg *= -1.0;
-    const double flipped = smith::innerProduct(*generic_leftvecs[i], neg, MPI_COMM_WORLD);
-    if (std::abs(flipped) > std::abs(same)) {
-      expectNearVector(*generic_leftvecs[i], neg, 1.0e-10);
-    } else {
-      expectNearVector(*generic_leftvecs[i], *mfem_leftvecs[i], 1.0e-10);
-    }
-    EXPECT_NEAR(generic_leftvals[i], mfem_leftvals[i], 1.0e-12);
-  }
-  EXPECT_NEAR(generic_energy, mfem_energy, 1.0e-12);
-}
-
 TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection)
 {
   mfem::Vector u1(4);
diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp
index 1a024e5032..11fff281c3 100644
--- a/src/smith/numerics/trust_region_solver.hpp
+++ b/src/smith/numerics/trust_region_solver.hpp
@@ -43,6 +43,28 @@ class TrustRegionException : public std::exception {
 using TrustRegionSubspaceResult =
     std::tuple<mfem::Vector, std::vector<std::shared_ptr<mfem::Vector>>, std::vector<double>, double>;
 
+/// Cached reduced trust-region subspace data reusable across trust-region radius updates.
+struct CachedTrustRegionSubspaceProblem {
+  /// zero vector with correct size/layout for empty-subspace returns
+  mfem::Vector zero_solution;
+  /// orthonormalized physical-space basis spanning reduced subspace
+  std::vector<std::shared_ptr<mfem::Vector>> basis;
+  /// reduced Hessian in cached subspace basis
+  mfem::DenseMatrix projected_hessian;
+  /// reduced right-hand side in cached subspace basis
+  mfem::Vector projected_rhs;
+  /// eigenvalues of reduced Hessian
+  mfem::Vector eigenvalues;
+  /// eigenvectors of reduced Hessian
+  mfem::DenseMatrix eigenvectors;
+  /// reduced right-hand side projected onto reduced Hessian eigenvectors
+  mfem::Vector eigen_rhs;
+  /// cached leftmost eigenvectors lifted back to physical space
+  std::vector<std::shared_ptr<mfem::Vector>> leftmosts;
+  /// eigenvalues corresponding to cached leftmost eigenvectors
+  std::vector<double> leftvals;
+};
+
 /// @brief computes the global size of mfem::Vector
 int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm);
 
@@ -59,4 +81,12 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem:
                                                    const std::vector<const mfem::Vector*>& A_directions,
                                                    const mfem::Vector& b, double delta, int num_leftmost);
 
+/// @brief prepares reduced trust-region subspace data reusable across trust-region radius updates
+CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
+                                                        const std::vector<const mfem::Vector*>& A_directions,
+                                                        const mfem::Vector& b, int num_leftmost);
+
+/// @brief solves cached reduced trust-region problem for given trust-region radius
+TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSubspaceProblem& prepared, double delta);
+
 }  // namespace smith

From ca4242b818fbda37a2ce5e592a1dab7bcc3067e3 Mon Sep 17 00:00:00 2001
From: chapman39 <chapman39@llnl.gov>
Date: Tue, 12 May 2026 16:06:29 -0700
Subject: [PATCH 22/27] remove version from `conf.py`

---
 src/docs/conf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/docs/conf.py b/src/docs/conf.py
index 770b0304db..6769080598 100644
--- a/src/docs/conf.py
+++ b/src/docs/conf.py
@@ -81,9 +81,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '0.1'
+# version = '0.1'
 # The full version, including alpha/beta/rc tags.
-release = '0.1'
+# release = '0.1'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

From 632d41e16164d427ce45f0f2cd503b68cfaa47e0 Mon Sep 17 00:00:00 2001
From: chapman39 <chapman39@llnl.gov>
Date: Tue, 12 May 2026 16:32:35 -0700
Subject: [PATCH 23/27] add explaination for adding new readthedocs release
 version

---
 src/docs/sphinx/dev_guide/release.rst | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/docs/sphinx/dev_guide/release.rst b/src/docs/sphinx/dev_guide/release.rst
index fcdc7812a1..23dc1cef7e 100644
--- a/src/docs/sphinx/dev_guide/release.rst
+++ b/src/docs/sphinx/dev_guide/release.rst
@@ -198,6 +198,21 @@ the history. After merging, the release candidate branch can be deleted.
 
 #. Click the ``Update release`` button.
 
+7: Add Version to ReadTheDocs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A new version our ReadTheDocs should be created so that users can refer to older snapshots of Smith's documentation.
+
+#. Go to this page `https://app.readthedocs.org/projects/llnlsmith/_`.
+
+#. Click "Add version".
+
+#. Find the version tag that was just created in the previous steps. (It might take a couple minutes to show up.)
+
+#. Toggle the "Activate" to be on.
+
+#. Select "Update version".
+
 8: Merge Main to Develop
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

From 95b170d29d9deedf47ac8947552cf2727f3cd258 Mon Sep 17 00:00:00 2001
From: chapman39 <chapman39@llnl.gov>
Date: Tue, 12 May 2026 16:34:42 -0700
Subject: [PATCH 24/27] syntax error

---
 src/docs/sphinx/dev_guide/release.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/docs/sphinx/dev_guide/release.rst b/src/docs/sphinx/dev_guide/release.rst
index 23dc1cef7e..296194b5fb 100644
--- a/src/docs/sphinx/dev_guide/release.rst
+++ b/src/docs/sphinx/dev_guide/release.rst
@@ -203,7 +203,7 @@ the history. After merging, the release candidate branch can be deleted.
 
 A new version our ReadTheDocs should be created so that users can refer to older snapshots of Smith's documentation.
 
-#. Go to this page `https://app.readthedocs.org/projects/llnlsmith/_`.
+#. Go to this page `https://app.readthedocs.org/projects/llnlsmith/`_.
 
 #. Click "Add version".
 

From 2c2297d7f8d5a36f13a0965a21b2d417dfb5f8dc Mon Sep 17 00:00:00 2001
From: chapman39 <chapman39@llnl.gov>
Date: Tue, 12 May 2026 16:36:27 -0700
Subject: [PATCH 25/27] syntax.. again!

---
 src/docs/sphinx/dev_guide/release.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/docs/sphinx/dev_guide/release.rst b/src/docs/sphinx/dev_guide/release.rst
index 296194b5fb..7894f5512e 100644
--- a/src/docs/sphinx/dev_guide/release.rst
+++ b/src/docs/sphinx/dev_guide/release.rst
@@ -203,7 +203,7 @@ the history. After merging, the release candidate branch can be deleted.
 
 A new version our ReadTheDocs should be created so that users can refer to older snapshots of Smith's documentation.
 
-#. Go to this page `https://app.readthedocs.org/projects/llnlsmith/`_.
+#. Go to `our ReadTheDocs app page <https://app.readthedocs.org/projects/llnlsmith/>`_.
 
 #. Click "Add version".
 

From 974c23c5932f076732976c1a2b72d6df9fae5392 Mon Sep 17 00:00:00 2001
From: Michael Tupek <mrtupek@gmail.com>
Date: Fri, 22 May 2026 15:01:14 -0600
Subject: [PATCH 26/27] Fix some comm issues, better fallback when trust region
 fails, use common mfem functions, pull out more free functions.

---
 src/smith/numerics/equation_solver.cpp        | 115 +++++++++++++-----
 .../numerics/mfem_trust_region_subspace.cpp   |  90 +++++---------
 src/smith/numerics/steihaug_toint_cg.cpp      |  22 ++--
 src/smith/numerics/steihaug_toint_cg.hpp      |   3 +
 .../numerics/tests/test_steihaug_toint_cg.cpp |  15 +++
 .../tests/test_trust_region_solver_mfem.cpp   |  54 +++++++-
 src/smith/numerics/trust_region_solver.hpp    |  10 +-
 7 files changed, 202 insertions(+), 107 deletions(-)

diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index e7c59c0760..a197d7884e 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -133,6 +133,24 @@ ConvergenceStatus scalarConvergenceStatus(double residual_norm, double initial_n
   return status;
 }
 
+bool shouldUseSubspaceStep(int subspace_option, TrustRegionResults::Status status, double step_norm, double tr_size,
+                           int line_search_iter)
+{
+  const bool failed_or_indefinite = status == TrustRegionResults::Status::NonDescentDirection ||
+                                    status == TrustRegionResults::Status::NegativeCurvature ||
+                                    ((step_norm > (1.0 - 1.0e-6) * tr_size) && line_search_iter > 1);
+  const bool on_boundary = step_norm > (1.0 - 1.0e-6) * tr_size;
+  return ((subspace_option >= 1) && failed_or_indefinite) || ((subspace_option >= 2) && on_boundary) ||
+         (subspace_option >= 3);
+}
+
+enum class SubspaceStepStatus
+{
+  Unavailable,
+  Unchanged,
+  Replaced
+};
+
 }  // namespace
 
 /// @cond
@@ -460,7 +478,7 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
   }
 
   /// build reusable subspace data for line-search retries
-  void prepareSubspaceProblemCache([[maybe_unused]] const std::vector<const mfem::Vector*>& ds,
+  bool prepareSubspaceProblemCache([[maybe_unused]] const std::vector<const mfem::Vector*>& ds,
                                    [[maybe_unused]] const std::vector<const mfem::Vector*>& Hds,
                                    [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] int num_leftmost,
                                    [[maybe_unused]] CachedTrustRegionSubspaceProblem& prepared_subspace) const
@@ -476,21 +494,25 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
     b *= -1;
 
     try {
-      prepared_subspace = smith::prepareSubspaceProblem(directions, H_directions, b, num_leftmost);
+      prepared_subspace = smith::prepareSubspaceProblem(directions, H_directions, b, num_leftmost, GetComm());
     } catch (const std::exception& e) {
       if (print_level >= 1) {
-        mfem::out << "subspace solve failed with " << e.what() << std::endl;
+        mfem::out << "subspace preparation failed with " << e.what() << "; using dogleg fallback." << std::endl;
       }
-      return;
+      return false;
     }
+    return true;
+#else
+    return false;
 #endif
   }
 
   /// solve cached exact trust-region subspace problem for current trust-region size
   template <typename HessVecFunc>
-  void solvePreparedSubspaceProblem([[maybe_unused]] mfem::Vector& z, [[maybe_unused]] const HessVecFunc& hess_vec_func,
-                                    [[maybe_unused]] const CachedTrustRegionSubspaceProblem& prepared_subspace,
-                                    [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] double delta) const
+  SubspaceStepStatus solvePreparedSubspaceProblem(
+      [[maybe_unused]] mfem::Vector& z, [[maybe_unused]] const HessVecFunc& hess_vec_func,
+      [[maybe_unused]] const CachedTrustRegionSubspaceProblem& prepared_subspace,
+      [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] double delta) const
   {
 #ifdef MFEM_USE_LAPACK
     SMITH_MARK_FUNCTION;
@@ -502,9 +524,9 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
           smith::solvePreparedSubspaceProblem(prepared_subspace, delta);
     } catch (const std::exception& e) {
       if (print_level >= 1) {
-        mfem::out << "subspace solve failed with " << e.what() << std::endl;
+        mfem::out << "subspace solve failed with " << e.what() << "; using dogleg fallback." << std::endl;
       }
-      return;
+      return SubspaceStepStatus::Unavailable;
     }
 
     double base_energy = computeEnergy(g, hess_vec_func, z);
@@ -518,7 +540,11 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
 
     if (subspace_energy < base_energy) {
       z = sol;
+      return SubspaceStepStatus::Replaced;
     }
+    return SubspaceStepStatus::Unchanged;
+#else
+    return SubspaceStepStatus::Unavailable;
 #endif
   }
 
@@ -528,7 +554,9 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
   {
     double dd = yy - 2 * zy + zz;
     double zd = zy - zz;
-    double tau = (std::sqrt((trSize * trSize - zz) * dd + zd * zd) - zd) / dd;
+    double boundary_gap = std::max(trSize * trSize - zz, 0.0);
+    if (boundary_gap == 0.0) return;
+    double tau = (std::sqrt(boundary_gap * dd + zd * zd) - zd) / dd;
     z.Add(-tau, z);
     z.Add(tau, y);
   }
@@ -580,6 +608,30 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
     steihaugTointCG(r0, rCurrent, H, P, settings, trSize, results, r0_norm_squared, dot_many_lambda);
   }
 
+  void fallbackToCauchyPoint(TrustRegionResults& results, const char* reason) const
+  {
+    if (print_level >= 2) {
+      mfem::out << reason << "; using cauchy point fallback." << std::endl;
+    }
+    results.d = results.cauchy_point;
+  }
+
+  bool isDescentStep(const mfem::Vector& step, const mfem::Vector& residual) const
+  {
+    auto dot_many_lambda = [this](const std::vector<DotPair>& pairs) { return dot_many(pairs); };
+    return smith::isDescentDirection(step, residual, dot_many_lambda);
+  }
+
+  template <typename HessVecFunc>
+  void computeHessianActions(const std::vector<const mfem::Vector*>& inputs, const std::vector<mfem::Vector*>& outputs,
+                             const HessVecFunc& hess_vec_func) const
+  {
+    MFEM_VERIFY(inputs.size() == outputs.size(), "Subspace Hessian-vector batch input/output size mismatch");
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      hess_vec_func(*inputs[i], *outputs[i]);
+    }
+  }
+
   /// assemble the jacobian
   void assembleJacobian(const mfem::Vector& x) const
   {
@@ -771,16 +823,13 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
         ++lineSearchIter;
 
         doglegStep(trResults.cauchy_point, trResults.z, tr_size, trResults.d);
-        const bool check_subspace_boundary = subspace_option >= 1;
-        const double d_norm = check_subspace_boundary ? std::sqrt(Dot(trResults.d, trResults.d)) : 0.0;
-        bool use_with_option1 =
-            (subspace_option >= 1) && (trResults.interior_status == TrustRegionResults::Status::NonDescentDirection ||
-                                       trResults.interior_status == TrustRegionResults::Status::NegativeCurvature ||
-                                       ((d_norm > (1.0 - 1.0e-6) * tr_size) && lineSearchIter > 1));
-        bool use_with_option2 = (subspace_option >= 2) && (d_norm > (1.0 - 1.0e-6) * tr_size);
-        bool use_with_option3 = (subspace_option >= 3);
-
-        if (can_use_subspace_solver && (use_with_option1 || use_with_option2 || use_with_option3)) {
+        const double d_norm = subspace_option >= 1 ? std::sqrt(Dot(trResults.d, trResults.d)) : 0.0;
+        const bool use_subspace =
+            can_use_subspace_solver &&
+            shouldUseSubspaceStep(subspace_option, trResults.interior_status, d_norm, tr_size, lineSearchIter);
+
+        bool subspace_unavailable = false;
+        if (use_subspace) {
           if (!have_computed_Hvs) {
             have_computed_Hvs = true;
 
@@ -791,11 +840,7 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
               subspace_hess_outputs.push_back(&trResults.H_d_old);
             }
 
-            MFEM_VERIFY(subspace_hess_inputs.size() == subspace_hess_outputs.size(),
-                        "Subspace Hessian-vector batch input/output size mismatch");
-            for (size_t i = 0; i < subspace_hess_inputs.size(); ++i) {
-              hess_vec_func(*subspace_hess_inputs[i], *subspace_hess_outputs[i]);
-            }
+            computeHessianActions(subspace_hess_inputs, subspace_hess_outputs, hess_vec_func);
           }
 
           if (!have_computed_H_left_mosts) {
@@ -808,11 +853,7 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
               leftmost_inputs.push_back(left.get());
               leftmost_outputs.push_back(H_left_mosts.back().get());
             }
-            MFEM_VERIFY(leftmost_inputs.size() == leftmost_outputs.size(),
-                        "Subspace Hessian-vector batch input/output size mismatch");
-            for (size_t i = 0; i < leftmost_inputs.size(); ++i) {
-              hess_vec_func(*leftmost_inputs[i], *leftmost_outputs[i]);
-            }
+            computeHessianActions(leftmost_inputs, leftmost_outputs, hess_vec_func);
           }
 
           if (!have_prepared_subspace) {
@@ -825,10 +866,20 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
               H_ds.push_back(&trResults.H_d_old);
             }
 
-            prepareSubspaceProblemCache(ds, H_ds, r, num_leftmost, prepared_subspace);
+            have_prepared_subspace = prepareSubspaceProblemCache(ds, H_ds, r, num_leftmost, prepared_subspace);
+            subspace_unavailable = !have_prepared_subspace;
           }
 
-          solvePreparedSubspaceProblem(trResults.d, hess_vec_func, prepared_subspace, r, tr_size);
+          if (have_prepared_subspace) {
+            const SubspaceStepStatus subspace_status =
+                solvePreparedSubspaceProblem(trResults.d, hess_vec_func, prepared_subspace, r, tr_size);
+            subspace_unavailable = subspace_status == SubspaceStepStatus::Unavailable;
+          }
+        }
+
+        if (subspace_unavailable || !isDescentStep(trResults.d, r)) {
+          fallbackToCauchyPoint(
+              trResults, subspace_unavailable ? "Subspace step unavailable" : "Fallback step is not a descent step");
         }
 
         static constexpr double roundOffTol = 0.0;  // 1e-14;
diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp
index b725dcb53c..453d8351a3 100644
--- a/src/smith/numerics/mfem_trust_region_subspace.cpp
+++ b/src/smith/numerics/mfem_trust_region_subspace.cpp
@@ -44,18 +44,6 @@ double sumAbs(const mfem::Vector& x)
   return total;
 }
 
-void symmetrize(mfem::DenseMatrix& A)
-{
-  MFEM_VERIFY(A.Height() == A.Width(), "symmetrize requires square matrix");
-  for (int i = 0; i < A.Height(); ++i) {
-    for (int j = 0; j < i; ++j) {
-      const double value = 0.5 * (A(i, j) + A(j, i));
-      A(i, j) = value;
-      A(j, i) = value;
-    }
-  }
-}
-
 struct SubspaceProjections {
   mfem::DenseMatrix sAs;
   mfem::DenseMatrix ss;
@@ -80,7 +68,7 @@ void checkProjectionInputs(const std::vector<const mfem::Vector*>& states,
 
 SubspaceProjections globalSubspaceProjectionFromLocalInnerProducts(const std::vector<const mfem::Vector*>& states,
                                                                    const std::vector<const mfem::Vector*>& Astates,
-                                                                   const mfem::Vector& b)
+                                                                   const mfem::Vector& b, MPI_Comm comm)
 {
   const int n = static_cast<int>(states.size());
   const int triangular_size = n * (n + 1) / 2;
@@ -102,7 +90,7 @@ SubspaceProjections globalSubspaceProjectionFromLocalInnerProducts(const std::ve
   }
 
   MPI_Allreduce(local_projection_entries.data(), global_projection_entries.data(), buffer_size, MFEM_MPI_REAL_T,
-                MPI_SUM, MPI_COMM_WORLD);
+                MPI_SUM, comm);
 
   SubspaceProjections projections{mfem::DenseMatrix(n), mfem::DenseMatrix(n), mfem::Vector(n)};
   for (int i = 0; i < n; ++i) {
@@ -120,10 +108,11 @@ SubspaceProjections globalSubspaceProjectionFromLocalInnerProducts(const std::ve
 }
 
 SubspaceProjections projectSubspaceGlobally(const std::vector<const mfem::Vector*>& states,
-                                            const std::vector<const mfem::Vector*>& Astates, const mfem::Vector& b)
+                                            const std::vector<const mfem::Vector*>& Astates, const mfem::Vector& b,
+                                            MPI_Comm comm)
 {
   checkProjectionInputs(states, Astates, b);
-  return globalSubspaceProjectionFromLocalInnerProducts(states, Astates, b);
+  return globalSubspaceProjectionFromLocalInnerProducts(states, Astates, b, comm);
 }
 
 double quadraticEnergy(const mfem::DenseMatrix& A, const mfem::Vector& b, const mfem::Vector& x)
@@ -211,6 +200,17 @@ mfem::Vector projectWithTranspose(const mfem::DenseMatrix& A, const mfem::Vector
   return out;
 }
 
+mfem::Vector combineColumns(const mfem::DenseMatrix& basis, const mfem::Vector& coeffs)
+{
+  mfem::Vector out(basis.Height());
+  out = 0.0;
+  for (int i = 0; i < coeffs.Size(); ++i) {
+    const mfem::Vector vi = matrixColumn(basis, i);
+    out.Add(coeffs[i], vi);
+  }
+  return out;
+}
+
 mfem::Vector combineDirections(const std::vector<const mfem::Vector*>& states, const mfem::Vector& coeffs)
 {
   mfem::Vector out(*states[0]);
@@ -284,12 +284,7 @@ std::pair<mfem::Vector, bool> solvePreparedExactTrustRegionProblem(const CachedT
   const double minSig = sigs[0];
 
   if ((minSig >= eps) && (norm(bvOverSigs) <= delta)) {
-    mfem::Vector x = alloc_vector(b.Size());
-    x = 0.0;
-    for (int i = 0; i < b.Size(); ++i) {
-      const mfem::Vector vi = matrixColumn(V, i);
-      x.Add(bvOverSigs[i], vi);
-    }
+    mfem::Vector x = combineColumns(V, bvOverSigs);
     return std::make_pair(x, true);
   }
 
@@ -299,12 +294,7 @@ std::pair<mfem::Vector, bool> solvePreparedExactTrustRegionProblem(const CachedT
   for (int i = 0; i < sigs.Size(); ++i) bvOverSigs[i] = bv[i] / sigsPlusLam[i];
 
   if ((minSig < eps) && (norm(bvOverSigs) < delta)) {
-    mfem::Vector p = alloc_vector(b.Size());
-    p = 0.0;
-    for (int i = 0; i < b.Size(); ++i) {
-      const mfem::Vector vi = matrixColumn(V, i);
-      p.Add(bv[i], vi);
-    }
+    mfem::Vector p = combineColumns(V, bvOverSigs);
 
     const double pz = dot(p, leftMost);
     const double pp = dot(p, p);
@@ -349,12 +339,7 @@ std::pair<mfem::Vector, bool> solvePreparedExactTrustRegionProblem(const CachedT
 
   for (int i = 0; i < sigs.Size(); ++i) bvOverSigs[i] = bv[i] / sigsPlusLam[i];
 
-  mfem::Vector x = alloc_vector(b.Size());
-  x = 0.0;
-  for (int i = 0; i < b.Size(); ++i) {
-    const mfem::Vector vi = matrixColumn(V, i);
-    x.Add(bvOverSigs[i], vi);
-  }
+  mfem::Vector x = combineColumns(V, bvOverSigs);
 
   const double e1 = quadraticEnergy(A, b, x);
   mfem::Vector neg_x = alloc_vector(x.Size());
@@ -372,16 +357,16 @@ std::pair<mfem::Vector, bool> solvePreparedExactTrustRegionProblem(const CachedT
 /// @brief prepares reduced trust-region subspace data reusable across trust-region radius updates
 CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
                                                         const std::vector<const mfem::Vector*>& A_directions,
-                                                        const mfem::Vector& b, int num_leftmost)
+                                                        const mfem::Vector& b, int num_leftmost, MPI_Comm comm)
 {
   SMITH_MARK_FUNCTION;
   CachedTrustRegionSubspaceProblem prepared;
   prepared.zero_solution = b;
   prepared.zero_solution = 0.0;
 
-  SubspaceProjections projections = projectSubspaceGlobally(directions, A_directions, b);
+  SubspaceProjections projections = projectSubspaceGlobally(directions, A_directions, b, comm);
   mfem::DenseMatrix& sAs = projections.sAs;
-  symmetrize(sAs);
+  sAs.Symmetrize();
 
   for (int i = 0; i < sAs.Height(); ++i) {
     for (int j = 0; j < sAs.Width(); ++j) {
@@ -392,7 +377,7 @@ CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector<const
   }
 
   mfem::DenseMatrix& ss = projections.ss;
-  symmetrize(ss);
+  ss.Symmetrize();
 
   double trace_mag = 0.0;
   mfem::DenseMatrix T = orthonormalBasisTransform(ss, trace_mag);
@@ -403,7 +388,7 @@ CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector<const
     throw TrustRegionException("No independent directions in MFEM subspace solve.");
   }
   prepared.projected_hessian = tripleProduct(T, sAs, T);
-  symmetrize(prepared.projected_hessian);
+  prepared.projected_hessian.Symmetrize();
 
   const mfem::Vector& sb = projections.sb;
   prepared.projected_rhs = projectWithTranspose(T, sb);
@@ -433,6 +418,9 @@ TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSu
   }
 
   auto [reduced_x, success] = solvePreparedExactTrustRegionProblem(prepared, delta);
+  if (!success) {
+    throw TrustRegionException("Trust-region subspace solve failed to converge.");
+  }
   const double energy = quadraticEnergy(prepared.projected_hessian, prepared.projected_rhs, reduced_x);
 
   const auto basis_ptrs = toPointers(prepared.basis);
@@ -442,23 +430,16 @@ TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSu
 
 TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
                                                const std::vector<const mfem::Vector*>& A_directions,
-                                               const mfem::Vector& b, double delta, int num_leftmost)
-{
-  return solvePreparedSubspaceProblem(prepareSubspaceProblem(directions, A_directions, b, num_leftmost), delta);
-}
-
-TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem::Vector*>& directions,
-                                                   const std::vector<const mfem::Vector*>& A_directions,
-                                                   const mfem::Vector& b, double delta, int num_leftmost)
+                                               const mfem::Vector& b, double delta, int num_leftmost, MPI_Comm comm)
 {
-  return solvePreparedSubspaceProblem(prepareSubspaceProblem(directions, A_directions, b, num_leftmost), delta);
+  return solvePreparedSubspaceProblem(prepareSubspaceProblem(directions, A_directions, b, num_leftmost, comm), delta);
 }
 
 #else
 
 TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
                                                const std::vector<const mfem::Vector*>& A_directions,
-                                               const mfem::Vector& b, double delta, int num_leftmost)
+                                               const mfem::Vector& b, double delta, int num_leftmost, MPI_Comm)
 {
   throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support.");
   return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0.0);
@@ -466,7 +447,7 @@ TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vec
 
 CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
                                                         const std::vector<const mfem::Vector*>& A_directions,
-                                                        const mfem::Vector& b, int)
+                                                        const mfem::Vector& b, int, MPI_Comm)
 {
   throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support.");
   CachedTrustRegionSubspaceProblem prepared;
@@ -474,15 +455,6 @@ CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector<const
   return prepared;
 }
 
-/// @brief report unavailable MFEM subspace solve when MFEM was built without LAPACK.
-TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem::Vector*>&,
-                                                   const std::vector<const mfem::Vector*>&, const mfem::Vector& b,
-                                                   double, int)
-{
-  throw TrustRegionException("MFEM trust-region subspace solve requires MFEM LAPACK support.");
-  return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0.0);
-}
-
 TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSubspaceProblem& prepared, double)
 {
   throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support.");
diff --git a/src/smith/numerics/steihaug_toint_cg.cpp b/src/smith/numerics/steihaug_toint_cg.cpp
index 784dc0ac89..b33fae7864 100644
--- a/src/smith/numerics/steihaug_toint_cg.cpp
+++ b/src/smith/numerics/steihaug_toint_cg.cpp
@@ -10,10 +10,12 @@ namespace smith {
 
 namespace {
 
+bool isDescentDirection(double directional_derivative) { return directional_derivative < 0.0; }
+
 void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, double dd)
 {
-  const double deltadelta_m_zz = delta * delta - zz;
-  if (deltadelta_m_zz <= 0.0) return;
+  const double deltadelta_m_zz = std::max(delta * delta - zz, 0.0);
+  if (deltadelta_m_zz == 0.0) return;
   const double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd;
   z.Add(tau, d);
 }
@@ -30,6 +32,11 @@ std::vector<double> dotMany(const std::vector<DotPair>& pairs)
   return products;
 }
 
+bool isDescentDirection(const mfem::Vector& direction, const mfem::Vector& residual, const DotManyFunction& dot_many)
+{
+  return isDescentDirection(dot_many({{&direction, &residual}})[0]);
+}
+
 void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P,
                      const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results,
                      double r0_norm_squared, const DotManyFunction& dot_many)
@@ -65,7 +72,7 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem:
   double zz = 0.;
 
   // rPr = dot(rCurrent, Pr)
-  double rPr = dot_many({{&rCurrent, &Pr}, {&rCurrent, &Pr}})[0];
+  double rPr = dot_many({{&rCurrent, &Pr}})[0];
 
   for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) {
     H.Mult(d, Hd);
@@ -76,13 +83,9 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem:
     double zd = dots[2];
     double dd = dots[3];
 
-    if (descent_check > 0) {
-      d *= -1;
-      Hd *= -1;
+    if (!isDescentDirection(descent_check)) {
       results.interior_status = TrustRegionResults::Status::NonDescentDirection;
-      descent_check *= -1.0;
-      curvature *= -1.0;
-      zd *= -1.0;
+      return;
     }
 
     const double alphaCg = curvature != 0.0 ? rPr / curvature : 0.0;
@@ -99,6 +102,7 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem:
       return;
     }
 
+    // Alias Pr as temporary workspace 'zPred' to avoid allocation
     auto& zPred = Pr;
     zPred = z;
     zPred.Add(alphaCg, d);
diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp
index 672bd26dc2..60c3d2c371 100644
--- a/src/smith/numerics/steihaug_toint_cg.hpp
+++ b/src/smith/numerics/steihaug_toint_cg.hpp
@@ -114,6 +114,9 @@ using DotManyFunction = std::function<std::vector<double>(const std::vector<DotP
 /// compute local dot products for many vector pairs
 std::vector<double> dotMany(const std::vector<DotPair>& pairs);
 
+/// true when direction is locally downhill for the quadratic model's linear term
+bool isDescentDirection(const mfem::Vector& direction, const mfem::Vector& residual, const DotManyFunction& dot_many);
+
 /**
  * @brief Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner
  *
diff --git a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp
index 860e6d3192..5522144763 100644
--- a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp
+++ b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp
@@ -88,3 +88,18 @@ TEST(SteihaugTointCG, DetectsNegativeCurvature)
   EXPECT_NEAR(results.z.Norml2(), 2.0, 1e-9);
   EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::NegativeCurvature);
 }
+
+TEST(SteihaugTointCG, DetectsDirectlyFlippedAscentDirection)
+{
+  mfem::Vector residual(2);
+  residual[0] = 1.0;
+  residual[1] = -2.0;
+
+  mfem::Vector descent_direction(residual);
+  descent_direction *= -1.0;
+  EXPECT_TRUE(smith::isDescentDirection(descent_direction, residual, smith::dotMany));
+
+  mfem::Vector ascent_direction(descent_direction);
+  ascent_direction *= -1.0;
+  EXPECT_FALSE(smith::isDescentDirection(ascent_direction, residual, smith::dotMany));
+}
diff --git a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
index f2e46a06c2..46cbe0e39b 100644
--- a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
+++ b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
@@ -4,6 +4,7 @@
 //
 // SPDX-License-Identifier: (BSD-3-Clause)
 
+#include <limits>
 #include <vector>
 
 #include "gtest/gtest.h"
@@ -98,7 +99,7 @@ TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection)
   const auto astates = applyDiagonalOperator(diag, states);
   const auto astate_ptrs = toPointers(astates);
 
-  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemMfem(states, astate_ptrs, b, 0.25, 1);
+  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblem(states, astate_ptrs, b, 0.25, 1);
 
   EXPECT_LE(sol.Norml2(), 0.25 + 1.0e-12);
   EXPECT_FALSE(leftvecs.empty());
@@ -106,6 +107,57 @@ TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection)
   EXPECT_LT(energy, 0.0);
 }
 
+TEST(TrustRegionSubspaceMfem, SolveIndefiniteHardCaseUsesShiftedNewtonPoint)
+{
+  mfem::Vector e0(2);
+  mfem::Vector e1(2);
+  mfem::Vector Ae0(2);
+  mfem::Vector Ae1(2);
+  mfem::Vector b(2);
+
+  e0 = 0.0;
+  e1 = 0.0;
+  Ae0 = 0.0;
+  Ae1 = 0.0;
+  b = 0.0;
+
+  e0[0] = 1.0;
+  e1[1] = 1.0;
+  Ae0[0] = -1.0;
+  Ae1[1] = 2.0;
+  b[1] = 1.0;
+
+  const std::vector<const mfem::Vector*> states = {&e0, &e1};
+  const std::vector<const mfem::Vector*> astates = {&Ae0, &Ae1};
+
+  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblem(states, astates, b, 1.0, 1);
+
+  EXPECT_NEAR(sol.Norml2(), 1.0, 1.0e-12);
+  EXPECT_NEAR(std::abs(sol[0]), std::sqrt(8.0 / 9.0), 1.0e-10);
+  EXPECT_NEAR(sol[1], 1.0 / 3.0, 1.0e-10);
+  EXPECT_EQ(leftvecs.size(), 1);
+  EXPECT_EQ(leftvals.size(), 1);
+  EXPECT_NEAR(leftvals[0], -1.0, 1.0e-12);
+  EXPECT_NEAR(energy, -2.0 / 3.0, 1.0e-10);
+}
+
+TEST(TrustRegionSubspaceMfem, SolveThrowsOnNanProjection)
+{
+  mfem::Vector state(2);
+  mfem::Vector astate(2);
+  mfem::Vector b(2);
+
+  state = 1.0;
+  astate = 1.0;
+  b = 0.0;
+  astate[1] = std::numeric_limits<double>::quiet_NaN();
+
+  const std::vector<const mfem::Vector*> states = {&state};
+  const std::vector<const mfem::Vector*> astates = {&astate};
+
+  EXPECT_THROW(smith::solveSubspaceProblem(states, astates, b, 1.0, 1), smith::TrustRegionException);
+}
+
 int main(int argc, char* argv[])
 {
   ::testing::InitGoogleTest(&argc, argv);
diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp
index 11fff281c3..efd7f27a8b 100644
--- a/src/smith/numerics/trust_region_solver.hpp
+++ b/src/smith/numerics/trust_region_solver.hpp
@@ -75,16 +75,14 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm
 /// and their eigenvalues, and the predicted model energy change
 TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
                                                const std::vector<const mfem::Vector*>& A_directions,
-                                               const mfem::Vector& b, double delta, int num_leftmost);
-
-TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector<const mfem::Vector*>& directions,
-                                                   const std::vector<const mfem::Vector*>& A_directions,
-                                                   const mfem::Vector& b, double delta, int num_leftmost);
+                                               const mfem::Vector& b, double delta, int num_leftmost,
+                                               MPI_Comm comm = MPI_COMM_WORLD);
 
 /// @brief prepares reduced trust-region subspace data reusable across trust-region radius updates
 CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
                                                         const std::vector<const mfem::Vector*>& A_directions,
-                                                        const mfem::Vector& b, int num_leftmost);
+                                                        const mfem::Vector& b, int num_leftmost,
+                                                        MPI_Comm comm = MPI_COMM_WORLD);
 
 /// @brief solves cached reduced trust-region problem for given trust-region radius
 TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSubspaceProblem& prepared, double delta);

From 72dfe74a37c77cd48ad4d2a66fc996b1c59ee6cc Mon Sep 17 00:00:00 2001
From: Michael Tupek <tupek2@llnl.gov>
Date: Tue, 26 May 2026 15:37:02 -0700
Subject: [PATCH 27/27] be a bit more careful about updating contact state when
 doing finite differencing.

---
 src/smith/physics/tests/contact_finite_diff.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/smith/physics/tests/contact_finite_diff.cpp b/src/smith/physics/tests/contact_finite_diff.cpp
index 531b7f2eaa..87f141330f 100644
--- a/src/smith/physics/tests/contact_finite_diff.cpp
+++ b/src/smith/physics/tests/contact_finite_diff.cpp
@@ -162,9 +162,6 @@ TEST_P(ContactFiniteDiff, patch)
     merged_sol.SetVector(u, 0);
     merged_sol.SetVector(pressure, u.Size());
     mfem::Vector f(merged_sol.Size());
-    f = 0.0;
-    oper->Mult(merged_sol, f);
-    auto* J_op = &oper->GetGradient(merged_sol);
     mfem::Vector u_dot(merged_sol.Size());
     u_dot = 0.0;
     // wiggle displacement (col = j)
@@ -174,6 +171,9 @@ TEST_P(ContactFiniteDiff, patch)
         ++dof_ct;
         continue;
       }
+      f = 0.0;
+      oper->Mult(merged_sol, f);
+      auto* J_op = &oper->GetGradient(merged_sol);
       u_dot[j] = 1.0;
       mfem::Vector J_exact(merged_sol.Size());
       J_exact = 0.0;
@@ -206,6 +206,10 @@ TEST_P(ContactFiniteDiff, patch)
     }
     std::cout << "Max diff = " << std::setprecision(15) << max_diff << std::endl;
 
+    // Restore the contact state after the finite-difference probes before advancing the timestep.
+    f = 0.0;
+    oper->Mult(merged_sol, f);
+
     solid_solver.advanceTimestep(dt);
   }
 }