diff --git a/src/smith/numerics/CMakeLists.txt b/src/smith/numerics/CMakeLists.txt
index cc16a2ea04..a39370fdd1 100644
--- a/src/smith/numerics/CMakeLists.txt
+++ b/src/smith/numerics/CMakeLists.txt
@@ -5,23 +5,23 @@
 # SPDX-License-Identifier: (BSD-3-Clause)
 
 add_subdirectory(functional)
-
 set(numerics_headers
     equation_solver.hpp
+    steihaug_toint_cg.hpp
     nonlinear_convergence.hpp
     odes.hpp
     solver_config.hpp
     stdfunction_operator.hpp
     petsc_solvers.hpp
     trust_region_solver.hpp
-    dense_petsc.hpp
     block_preconditioner.hpp
     )
 
 set(numerics_sources
     equation_solver.cpp
+    steihaug_toint_cg.cpp
+    mfem_trust_region_subspace.cpp
     nonlinear_convergence.cpp
-    trust_region_solver.cpp
     odes.cpp
     petsc_solvers.cpp
     block_preconditioner.cpp
diff --git a/src/smith/numerics/dense_petsc.hpp b/src/smith/numerics/dense_petsc.hpp
deleted file mode 100644
index 516b3ee01d..0000000000
--- a/src/smith/numerics/dense_petsc.hpp
+++ /dev/null
@@ -1,380 +0,0 @@
-// Copyright (c) Lawrence Livermore National Security, LLC and
-// other Smith Project Developers. See the top-level LICENSE file for
-// details.
-//
-// SPDX-License-Identifier: (BSD-3-Clause)
-
-#pragma once
-
-#ifdef SMITH_USE_SLEPC
-
-#include <slepcsvd.h>
-#include <slepcbv.h>
-#include <vector>
-
-struct DenseVec;
-
-/// Dense Matrix class which wraps petsc matrix for the case of a SeqDense matrix (on 1 processor)
-struct DenseMat {
-  /// @brief copy constructor
-  /// @param a matrix
-  DenseMat(const Mat& a) : A(a) {}
-
-  /// @brief constructor
-  /// @param a matrix
-  DenseMat(const DenseMat& a)
-  {
-    MatDuplicate(a.A, MAT_COPY_VALUES, &A);
-    MatCopy(a.A, A, SAME_NONZERO_PATTERN);
-    MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
-    MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
-  }
-
-  /// @brief destructor
-  ~DenseMat() { MatDestroy(&A); }
-
-  /// @brief size
-  auto size() const
-  {
-    int isize;
-    int jsize;
-    MatGetSize(A, &isize, &jsize);
-    return std::make_pair(isize, jsize);
-  }
-
-  /// @brief index into
-  double operator()(int i, int j) const
-  {
-    double val;
-    MatGetValue(A, i, j, &val);
-    return val;
-  }
-
-  /// @brief set value
-  void setValue(int i, int j, double val) { MatSetValues(A, 1, &i, 1, &j, &val, INSERT_VALUES); }
-
-  /// @brief matrix-vector multiply
-  DenseVec operator*(const DenseVec& v) const;
-
-  /// @brief solve
-  DenseVec solve(const DenseVec& v) const;
-
-  /// @brief multiply this by P transpose on left and P on the right
-  DenseMat PtAP(const DenseMat& P) const;
-
-  /// @brief print utility
-  void print(std::string first = "") const
-  {
-    if (first.size()) {
-      std::cout << first << ": ";
-    }
-    MatView(A, PETSC_VIEWER_STDOUT_SELF);
-  }
-
-  /// @brief check for nans
-  bool hasNan() const
-  {
-    auto [rows, cols] = size();
-    for (int i = 0; i < rows; ++i) {
-      for (int j = 0; j < cols; ++j) {
-        double val = (*this)(i, j);
-        if (val != val) return true;
-      }
-    }
-    return false;
-  }
-
-  /// @brief  reassemble petsc dense matrix after values have been modified
-  void reassemble()
-  {
-    MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
-    MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
-  }
-
-  /// petsc matrix
-  Mat A;
-};
-
-/// matrix inverse
-/// @param a matrix
-DenseMat inverse(const DenseMat& a)
-{
-  Mat inv;
-  MatDuplicate(a.A, MAT_COPY_VALUES, &inv);
-  MatSeqDenseInvert(inv);
-  return inv;
-}
-
-/// compute the symmetric part
-/// @param a matrix
-DenseMat sym(const DenseMat& a)
-{
-  DenseMat b = a;
-  auto [rows, cols] = b.size();
-  SLIC_ERROR_IF(rows != cols, "Calling sym on a non-square DenseMat");
-
-  for (int i = 0; i < rows; ++i) {
-    for (int j = 0; j < i; ++j) {
-      auto val = 0.5 * a(i, j) + 0.5 * a(j, i);
-      b.setValue(i, j, val);
-      b.setValue(j, i, val);
-    }
-  }
-
-  b.reassemble();
-
-  return b;
-}
-
-/// Dense Vector class which wraps petsc vector for the case of a SeqDense vector (on 1 processor)
-struct DenseVec {
-  /// @brief constructor
-  DenseVec(const Vec& vin) : v(vin) {}
-
-  /// @brief constructor
-  DenseVec(const DenseVec& vin)
-  {
-    VecDuplicate(vin.v, &v);
-    VecCopy(vin.v, v);
-  }
-
-  /// @brief constructor from size
-  DenseVec(size_t size) { VecCreateSeq(PETSC_COMM_SELF, static_cast<int>(size), &v); }
-
-  /// @brief constructor from size
-  DenseVec(int size) { VecCreateSeq(PETSC_COMM_SELF, size, &v); }
-
-  /// @brief constructor standard vector
-  DenseVec(const std::vector<double> vin)
-  {
-    const auto sz = vin.size();
-    std::vector<int> allints(sz);
-    for (size_t i = 0; i < sz; ++i) {
-      allints[i] = static_cast<int>(i);
-    }
-    int sz_int = static_cast<int>(sz);
-    VecCreateSeq(PETSC_COMM_SELF, sz_int, &v);
-    VecSetValues(v, sz_int, &allints[0], &vin[0], INSERT_VALUES);
-  }
-
-  /// @brief assignment
-  DenseVec& operator=(const DenseVec& vin)
-  {
-    VecCopy(vin.v, v);
-    return *this;
-  }
-
-  /// @brief assignment from scalar
-  DenseVec& operator=(const double val)
-  {
-    VecSet(v, val);
-    return *this;
-  }
-
-  /// @brief destructor
-  ~DenseVec()
-  {
-    if (v) VecDestroy(&v);
-  }
-
-  /// @brief negate
-  DenseVec operator-() const
-  {
-    Vec minus;
-    VecDuplicate(v, &minus);
-    VecCopy(v, minus);
-    VecScale(minus, -1.0);
-    return minus;
-  }
-
-  /// @brief scale
-  DenseVec& operator*=(double scale)
-  {
-    VecScale(v, scale);
-    return *this;
-  }
-
-  /// @brief size
-  int size() const
-  {
-    int isize;
-    VecGetSize(v, &isize);
-    return isize;
-  }
-
-  /// @brief index into
-  double operator[](int i) const
-  {
-    double val;
-    VecGetValues(v, 1, &i, &val);
-    return val;
-  }
-
-  /// @brief index into
-  double operator[](size_t i) const { return (*this)[int(i)]; }
-
-  /// @brief set value
-  void setValue(int i, double val) { VecSetValues(v, 1, &i, &val, INSERT_VALUES); }
-
-  /// @brief set value
-  void setValue(size_t i, double val) { setValue(int(i), val); }
-
-  /// @brief add scaled vector
-  void add(double val, const DenseVec& w) { VecAXPY(v, val, w.v); }
-
-  /// @brief convert to standard vector
-  std::vector<double> getValues() const
-  {
-    size_t sz = static_cast<size_t>(size());
-    std::vector<double> vout(sz);
-    std::vector<int> allints(sz);
-    for (size_t i = 0; i < sz; ++i) {
-      allints[i] = static_cast<int>(i);
-    }
-    int sz_int = static_cast<int>(sz);
-    VecGetValues(v, sz_int, &allints[0], &vout[0]);
-    return vout;
-  }
-
-  /// @brief print utility
-  void print(std::string first = "") const
-  {
-    if (first.size()) {
-      std::cout << first << ": ";
-    }
-    VecView(v, PETSC_VIEWER_STDOUT_SELF);
-  }
-
-  /// petsc vector
-  Vec v;
-};
-
-/// @brief matrix vector multiply
-DenseVec DenseMat::operator*(const DenseVec& v) const
-{
-  Vec out;
-  auto [rows, cols] = size();
-  SLIC_ERROR_IF(cols != v.size(), "Column size of dense matrix and length of multiplied vector do not match");
-  VecCreateSeq(PETSC_COMM_SELF, rows, &out);
-  MatMult(A, v.v, out);
-  return out;
-}
-
-/// @brief matrix linear solve
-DenseVec DenseMat::solve(const DenseVec& v) const
-{
-  Vec out;
-  VecDuplicate(v.v, &out);
-  MatLUFactor(A, NULL, NULL, NULL);  // not efficient if done a lot
-  MatSolve(A, v.v, out);
-  return out;
-}
-
-/// @brief multiply matrix by P-transpose on left, P on right
-DenseMat DenseMat::PtAP(const DenseMat& P) const
-{
-  Mat pAp;
-  MatPtAP(A, P.A, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pAp);
-  return pAp;
-}
-
-/// @brief vector dot product
-double dot(const DenseVec& a, const DenseVec& b)
-{
-  double d;
-  VecDot(a.v, b.v, &d);
-  return d;
-}
-
-/// @brief add a scalar to a vector
-DenseVec operator+(const DenseVec& a, double b)
-{
-  Vec c;
-  VecDuplicate(a.v, &c);
-  VecSet(c, b);
-  VecAXPY(c, 1.0, a.v);
-  return c;
-}
-
-DenseVec operator+(double b, const DenseVec& a) { return a + b; }
-
-/// @brief component-wise multiplication of vectors
-DenseVec operator*(const DenseVec& a, const DenseVec& b)
-{
-  Vec c;
-  VecDuplicate(a.v, &c);
-  VecPointwiseMult(c, a.v, b.v);
-  return c;
-}
-
-/// @brief component-wise vector divide
-DenseVec operator/(const DenseVec& a, const DenseVec& b)
-{
-  Vec c;
-  VecDuplicate(a.v, &c);
-  VecPointwiseDivide(c, a.v, b.v);
-  return c;
-}
-
-/// @brief component-wise vector absolute value
-DenseVec abs(const DenseVec& a)
-{
-  Vec absa;
-  VecDuplicate(a.v, &absa);
-  VecCopy(a.v, absa);
-  VecAbs(absa);
-  return absa;
-}
-
-/// @brief sum values in a vector
-double sum(const DenseVec& a)
-{
-  double s;
-  VecSum(a.v, &s);
-  return s;
-}
-
-/// @brief l2-norm of vector
-double norm(const DenseVec& a)
-{
-  double n;
-  VecNorm(a.v, NORM_2, &n);
-  return n;
-}
-
-/// @brief computes the eigenvectors and eigenvalues of a dense symmetric matrix
-auto eigh(const DenseMat& Adense)
-{
-  auto [isize, jsize] = Adense.size();
-  SLIC_ERROR_IF(isize != jsize, "Eig must be called for symmetric matrices");
-
-  const Mat& A = Adense.A;
-
-  EPS eps;
-  EPSCreate(PETSC_COMM_SELF, &eps);
-  EPSSetOperators(eps, A, NULL);
-  EPSSetProblemType(eps, EPS_HEP);
-  EPSSetWhichEigenpairs(eps, EPS_SMALLEST_REAL);
-  EPSSetDimensions(eps, isize, PETSC_DETERMINE, PETSC_DETERMINE);
-  EPSSetFromOptions(eps);
-
-  EPSSolve(eps);
-
-  EPSType type;
-  EPSGetType(eps, &type);
-  EPSGetDimensions(eps, &jsize, NULL, NULL);
-
-  DenseVec eigenvalues(isize);
-  std::vector<DenseVec> eigenvectors;
-  for (int i = 0; i < isize; ++i) {
-    eigenvectors.emplace_back(isize);
-    double eigenvalue;
-    EPSGetEigenpair(eps, i, &eigenvalue, PETSC_NULLPTR, eigenvectors[static_cast<size_t>(i)].v, PETSC_NULLPTR);
-    eigenvalues.setValue(i, eigenvalue);
-  }
-
-  EPSDestroy(&eps);
-  return std::make_pair(std::move(eigenvalues), std::move(eigenvectors));
-}
-
-#endif  // SMITH_USE_SLEPC
diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp
index 70ebd6a41c..a197d7884e 100644
--- a/src/smith/numerics/equation_solver.cpp
+++ b/src/smith/numerics/equation_solver.cpp
@@ -5,9 +5,12 @@
 // SPDX-License-Identifier: (BSD-3-Clause)
 
 #include "smith/numerics/equation_solver.hpp"
+#include "smith/numerics/steihaug_toint_cg.hpp"
 #include "smith/numerics/block_preconditioner.hpp"
 
+#include <array>
 #include <cstdlib>
+#include <functional>
 #include <iomanip>
 #include <iostream>
 #include <algorithm>
@@ -16,6 +19,7 @@
 #include <limits>
 #include <string>
 #include <tuple>
+#include <utility>
 
 #include "smith/smith_config.hpp"
 #include "smith/infrastructure/profiling.hpp"
@@ -26,6 +30,21 @@ namespace smith {
 
 namespace {
 
+size_t rootOnlyPrintLevel(const mfem::NewtonSolver& solver, size_t level)
+{
+#ifdef MFEM_USE_MPI
+  const MPI_Comm comm = solver.GetComm();
+  if (level > 0 && comm != MPI_COMM_NULL) {
+    int rank = 0;
+    MPI_Comm_rank(comm, &rank);
+    if (rank != 0) {
+      return 0;
+    }
+  }
+#endif
+  return level;
+}
+
 class SolverWithPreconditioner : public mfem::Solver {
  public:
   SolverWithPreconditioner(std::unique_ptr<mfem::Solver> linear_solver, std::unique_ptr<mfem::Solver> preconditioner)
@@ -102,6 +121,36 @@ bool monolithicizeOperatorIfNeeded(const LinearSolverOptions& linear_options, mf
   return true;
 }
 
+ConvergenceStatus scalarConvergenceStatus(double residual_norm, double initial_norm, double abs_tol, double rel_tol)
+{
+  ConvergenceStatus status;
+  status.block_norms = {residual_norm};
+  status.global_norm = residual_norm;
+  const double relative_base = initial_norm > 0.0 ? initial_norm : residual_norm;
+  status.global_goal = std::max(abs_tol, rel_tol * relative_base);
+  status.global_converged = status.global_norm <= status.global_goal;
+  status.converged = status.global_converged;
+  return status;
+}
+
+bool shouldUseSubspaceStep(int subspace_option, TrustRegionResults::Status status, double step_norm, double tr_size,
+                           int line_search_iter)
+{
+  const bool failed_or_indefinite = status == TrustRegionResults::Status::NonDescentDirection ||
+                                    status == TrustRegionResults::Status::NegativeCurvature ||
+                                    ((step_norm > (1.0 - 1.0e-6) * tr_size) && line_search_iter > 1);
+  const bool on_boundary = step_norm > (1.0 - 1.0e-6) * tr_size;
+  return ((subspace_option >= 1) && failed_or_indefinite) || ((subspace_option >= 2) && on_boundary) ||
+         (subspace_option >= 3);
+}
+
+enum class SubspaceStepStatus
+{
+  Unavailable,
+  Unchanged,
+  Replaced
+};
+
 }  // namespace
 
 /// @cond
@@ -163,11 +212,7 @@ class NewtonSolver : public mfem::NewtonSolver, public ConvergenceManagedNonline
       if (convergence_manager_) {
         status = convergence_manager_->evaluate(1.0, rOut);
       } else {
-        status.block_norms = {Norm(rOut)};
-        status.global_norm = status.block_norms.front();
-        status.global_goal = std::max(rel_tol * initial_norm, abs_tol);
-        status.global_converged = status.global_norm <= status.global_goal;
-        status.converged = status.global_converged;
+        status = scalarConvergenceStatus(Norm(rOut), initial_norm, abs_tol, rel_tol);
       }
     } catch (const std::exception&) {
       status.global_norm = std::numeric_limits<double>::max();
@@ -209,8 +254,10 @@ class NewtonSolver : public mfem::NewtonSolver, public ConvergenceManagedNonline
     MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator).");
     MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver).");
 
-    print_level = print_options.iterations ? 1 : print_level;
-    print_level = print_options.summary ? 2 : print_level;
+    print_level = static_cast<size_t>(std::max(nonlinear_options.print_level, 0));
+    print_level = print_options.iterations ? std::max<size_t>(1, print_level) : print_level;
+    print_level = print_options.summary ? std::max<size_t>(2, print_level) : print_level;
+    print_level = rootOnlyPrintLevel(*this, print_level);
 
     using real_t = mfem::real_t;
 
@@ -336,94 +383,12 @@ class NewtonSolver : public mfem::NewtonSolver, public ConvergenceManagedNonline
   }
 };
 
-/// Internal structure for storing trust region settings
-struct TrustRegionSettings {
-  /// cg tol
-  double cg_tol = 1e-8;
-  /// min cg iters
-  size_t min_cg_iterations = 0;  //
-  /// max cg iters should be around # of system dofs
-  size_t max_cg_iterations = 10000;  //
-  /// max cumulative iterations
-  size_t max_cumulative_iteration = 1;
-  /// minimum trust region size
-  double min_tr_size = 1e-13;
-  /// trust region decrease factor
-  double t1 = 0.25;
-  /// trust region increase factor
-  double t2 = 1.75;
-  /// worse case energy drop ratio.  trust region accepted if energy drop is better than this.
-  double eta1 = 1e-9;
-  /// non-ideal energy drop ratio.  trust region decreases if energy drop is worse than this.
-  double eta2 = 0.1;
-  /// ideal energy drop ratio.  trust region increases if energy drop is better than this.
-  double eta3 = 0.6;
-  /// parameter limiting how fast the energy can drop relative to the prediction (in case the energy surrogate is poor)
-  double eta4 = 4.2;
-};
-
-/// Internal structure for storing trust region stateful data
-struct TrustRegionResults {
-  /// Constructor takes the size of the solution vector
-  TrustRegionResults(int size)
-  {
-    z.SetSize(size);
-    H_z.SetSize(size);
-    d_old.SetSize(size);
-    H_d_old.SetSize(size);
-    d.SetSize(size);
-    H_d.SetSize(size);
-    Pr.SetSize(size);
-    cauchy_point.SetSize(size);
-    H_cauchy_point.SetSize(size);
-  }
-
-  /// resets trust region results for a new outer iteration
-  void reset()
-  {
-    z = 0.0;
-    cauchy_point = 0.0;
-  }
-
-  /// enumerates the possible final status of the trust region steps
-  enum class Status
-  {
-    Interior,
-    NegativeCurvature,
-    OnBoundary,
-    NonDescentDirection
-  };
-
-  /// step direction
-  mfem::Vector z;
-  /// action of hessian on current step z
-  mfem::Vector H_z;
-  /// old step direction
-  mfem::Vector d_old;
-  /// action of hessian on previous step z_old
-  mfem::Vector H_d_old;
-  /// incrementalCG direction
-  mfem::Vector d;
-  /// action of hessian on direction d
-  mfem::Vector H_d;
-  /// preconditioned residual
-  mfem::Vector Pr;
-  /// cauchy point
-  mfem::Vector cauchy_point;
-  /// action of hessian on direction of cauchy point
-  mfem::Vector H_cauchy_point;
-  /// specifies if step is interior, exterior, negative curvature, etc.
-  Status interior_status = Status::Interior;
-  /// iteration counter
-  size_t cg_iterations_count = 0;
-};
-
 /// trust region printing utility function
-void printTrustRegionInfo(double realObjective, double modelObjective, size_t cgIters, double trSize, bool willAccept)
+void printTrustRegionInfo(double realWork, double modelObjective, size_t cgIters, double trSize, bool willAccept)
 {
-  mfem::out << "real energy = " << std::setw(13) << realObjective << ", model energy = " << std::setw(13)
-            << modelObjective << ", cg iter = " << std::setw(7) << cgIters << ", next tr size = " << std::setw(8)
-            << trSize << ", accepting = " << willAccept << std::endl;
+  mfem::out << "real work = " << std::setw(13) << realWork << ", model energy = " << std::setw(13) << modelObjective
+            << ", cg iter = " << std::setw(7) << cgIters << ", next tr size = " << std::setw(8) << trSize
+            << ", accepting = " << willAccept << std::endl;
 }
 
 /**
@@ -466,17 +431,6 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
   std::shared_ptr<EquationSolverConvergenceManager> convergence_manager_ = nullptr;
 
  public:
-  /// internal counter for hess-vecs
-  mutable size_t num_hess_vecs = 0;
-  /// internal counter for preconditions
-  mutable size_t num_preconds = 0;
-  /// internal counter for residuals
-  mutable size_t num_residuals = 0;
-  /// internal counter for subspace solves
-  mutable size_t num_subspace_solves = 0;
-  /// internal counter for matrix assembles
-  mutable size_t num_jacobian_assembles = 0;
-
 #ifdef MFEM_USE_MPI
   /// constructor
   TrustRegion(MPI_Comm comm_, const NonlinearSolverOptions& nonlinear_opts, const LinearSolverOptions& linear_opts,
@@ -497,89 +451,100 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
     convergence_manager_ = std::move(convergence_manager);
   }
 
-  /// finds tau s.t. (z + tau*d)^2 = trSize^2
-  void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd,
-                                  double dd) const
+  /// compute several vector inner products with a single MPI reduction when possible
+  std::vector<double> dot_many(const std::vector<DotPair>& pairs) const
   {
-    // find z + tau d
-    double deltadelta_m_zz = delta * delta - zz;
-    if (deltadelta_m_zz == 0) return;  // already on boundary
-    double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd;
-    z.Add(tau, d);
+    if (dot_oper) {
+      std::vector<double> products(pairs.size(), 0.0);
+      for (size_t i = 0; i < pairs.size(); ++i) {
+        products[i] = Dot(*pairs[i].first, *pairs[i].second);
+      }
+      return products;
+    }
+
+    std::vector<double> products = smith::dotMany(pairs);
+
+#ifdef MFEM_USE_MPI
+    const MPI_Comm dot_comm = GetComm();
+    if (dot_comm != MPI_COMM_NULL) {
+      std::vector<mfem::real_t> global_products(pairs.size());
+      MPI_Allreduce(products.data(), global_products.data(), static_cast<int>(pairs.size()), MFEM_MPI_REAL_T, MPI_SUM,
+                    dot_comm);
+      products.assign(global_products.begin(), global_products.end());
+    }
+#endif
+
+    return products;
   }
 
-  /// solve the exact trust-region subspace problem with directions ds, and the leftmosts
-  template <typename HessVecFunc>
-  void solveTheSubspaceProblem([[maybe_unused]] mfem::Vector& z, [[maybe_unused]] const HessVecFunc& hess_vec_func,
-                               [[maybe_unused]] const std::vector<const mfem::Vector*> ds,
-                               [[maybe_unused]] const std::vector<const mfem::Vector*> Hds,
-                               [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] double delta,
-                               [[maybe_unused]] int num_leftmost) const
+  /// build reusable subspace data for line-search retries
+  bool prepareSubspaceProblemCache([[maybe_unused]] const std::vector<const mfem::Vector*>& ds,
+                                   [[maybe_unused]] const std::vector<const mfem::Vector*>& Hds,
+                                   [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] int num_leftmost,
+                                   [[maybe_unused]] CachedTrustRegionSubspaceProblem& prepared_subspace) const
   {
-#ifdef SMITH_USE_SLEPC
+#ifdef MFEM_USE_LAPACK
     SMITH_MARK_FUNCTION;
-    ++num_subspace_solves;
-
-    std::vector<const mfem::Vector*> directions;
-    for (auto& d : ds) {
-      directions.emplace_back(d);
-    }
-    for (auto& left : left_mosts) {
-      directions.emplace_back(left.get());
-    }
+    std::vector<const mfem::Vector*> directions(ds.begin(), ds.end());
+    std::vector<const mfem::Vector*> H_directions(Hds.begin(), Hds.end());
+    for (auto& left : left_mosts) directions.emplace_back(left.get());
+    for (auto& H_left : H_left_mosts) H_directions.emplace_back(H_left.get());
 
-    std::vector<const mfem::Vector*> H_directions;
-    for (auto& Hd : Hds) {
-      H_directions.emplace_back(Hd);
-    }
-    for (auto& H_left : H_left_mosts) {
-      H_directions.emplace_back(H_left.get());
-    }
+    mfem::Vector b(g);
+    b *= -1;
 
     try {
-      std::tie(directions, H_directions) = removeDependentDirections(directions, H_directions);
+      prepared_subspace = smith::prepareSubspaceProblem(directions, H_directions, b, num_leftmost, GetComm());
     } catch (const std::exception& e) {
-      if (print_level >= 2) {
-        mfem::out << "remove dependent directions failed with " << e.what() << std::endl;
+      if (print_level >= 1) {
+        mfem::out << "subspace preparation failed with " << e.what() << "; using dogleg fallback." << std::endl;
       }
-      return;
+      return false;
     }
+    return true;
+#else
+    return false;
+#endif
+  }
 
-    mfem::Vector b(g);
-    b *= -1;
-
+  /// solve cached exact trust-region subspace problem for current trust-region size
+  template <typename HessVecFunc>
+  SubspaceStepStatus solvePreparedSubspaceProblem(
+      [[maybe_unused]] mfem::Vector& z, [[maybe_unused]] const HessVecFunc& hess_vec_func,
+      [[maybe_unused]] const CachedTrustRegionSubspaceProblem& prepared_subspace,
+      [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] double delta) const
+  {
+#ifdef MFEM_USE_LAPACK
+    SMITH_MARK_FUNCTION;
     mfem::Vector sol;
-    std::vector<std::shared_ptr<mfem::Vector>> leftvecs;
-    std::vector<double> leftvals;
     double energy_change;
 
     try {
-      std::tie(sol, leftvecs, leftvals, energy_change) =
-          solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost);
+      std::tie(sol, std::ignore, std::ignore, energy_change) =
+          smith::solvePreparedSubspaceProblem(prepared_subspace, delta);
     } catch (const std::exception& e) {
-      if (print_level == 1) {
-        mfem::out << "subspace solve failed with " << e.what() << std::endl;
+      if (print_level >= 1) {
+        mfem::out << "subspace solve failed with " << e.what() << "; using dogleg fallback." << std::endl;
       }
-      return;
-    }
-
-    left_mosts.clear();
-    for (auto& lv : leftvecs) {
-      left_mosts.emplace_back(std::move(lv));
+      return SubspaceStepStatus::Unavailable;
     }
 
     double base_energy = computeEnergy(g, hess_vec_func, z);
     double subspace_energy = computeEnergy(g, hess_vec_func, sol);
 
     if (print_level >= 2) {
-      double leftval = leftvals.size() ? leftvals[0] : 1.0;
+      double leftval = prepared_subspace.leftvals.size() ? prepared_subspace.leftvals[0] : 1.0;
       mfem::out << "Energy using subspace solver from: " << base_energy << ", to: " << subspace_energy << " / "
                 << energy_change << ".  Min eig: " << leftval << std::endl;
     }
 
     if (subspace_energy < base_energy) {
       z = sol;
+      return SubspaceStepStatus::Replaced;
     }
+    return SubspaceStepStatus::Unchanged;
+#else
+    return SubspaceStepStatus::Unavailable;
 #endif
   }
 
@@ -589,7 +554,9 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
   {
     double dd = yy - 2 * zy + zz;
     double zd = zy - zz;
-    double tau = (std::sqrt((trSize * trSize - zz) * dd + zd * zd) - zd) / dd;
+    double boundary_gap = std::max(trSize * trSize - zz, 0.0);
+    if (boundary_gap == 0.0) return;
+    double tau = (std::sqrt(boundary_gap * dd + zd * zd) - zd) / dd;
     z.Add(-tau, z);
     z.Add(tau, y);
   }
@@ -598,9 +565,9 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
   void doglegStep(const mfem::Vector& cp, const mfem::Vector& newtonP, double trSize, mfem::Vector& s) const
   {
     SMITH_MARK_FUNCTION;
-    // MRT, could optimize some of these eventually, compute on the outside and save
-    double cc = Dot(cp, cp);
-    double nn = Dot(newtonP, newtonP);
+    const auto dots = dot_many({{&cp, &cp}, {&newtonP, &newtonP}});
+    const double cc = dots[0];
+    const double nn = dots[1];
     double tt = trSize * trSize;
 
     s = 0.0;
@@ -633,109 +600,42 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
   }
 
   /// Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner
-  template <typename HessVecFunc, typename PrecondFunc>
-  void solveTrustRegionModelProblem(const mfem::Vector& r0, mfem::Vector& rCurrent, HessVecFunc hess_vec_func,
-                                    PrecondFunc precond, const TrustRegionSettings& settings, double& trSize,
-                                    TrustRegionResults& results) const
+  void solveModelProblem(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P,
+                         const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results,
+                         double r0_norm_squared) const
   {
-    SMITH_MARK_FUNCTION;
-    // minimize r0@z + 0.5*z@J@z
-    results.interior_status = TrustRegionResults::Status::Interior;
-    results.cg_iterations_count = 0;
-
-    auto& z = results.z;
-    auto& cgIter = results.cg_iterations_count;
-    auto& d = results.d;
-    auto& Pr = results.Pr;
-    auto& Hd = results.H_d;
-
-    const double cg_tol_squared = settings.cg_tol * settings.cg_tol;
+    auto dot_many_lambda = [this](const std::vector<DotPair>& pairs) { return dot_many(pairs); };
+    steihaugTointCG(r0, rCurrent, H, P, settings, trSize, results, r0_norm_squared, dot_many_lambda);
+  }
 
-    if (Dot(r0, r0) <= cg_tol_squared && settings.min_cg_iterations == 0) {
-      if (print_level >= 2) {
-        mfem::out << "Trust region solution state within tolerance on first iteration."
-                  << "\n";
-      }
-      return;
+  void fallbackToCauchyPoint(TrustRegionResults& results, const char* reason) const
+  {
+    if (print_level >= 2) {
+      mfem::out << reason << "; using cauchy point fallback." << std::endl;
     }
+    results.d = results.cauchy_point;
+  }
 
-    rCurrent = r0;
-    precond(rCurrent, Pr);
-
-    // d = -Pr
-    d = Pr;
-    d *= -1.0;
-
-    z = 0.0;
-    double zz = 0.;
-    double rPr = Dot(rCurrent, Pr);
-    double zd = 0.0;
-    double dd = Dot(d, d);
-
-    // std::cout << "initial energy = " << computeEnergy(r0, hess_vec_func, z) << std::endl;
-
-    for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) {
-      // check if this is a descent direction
-      if (Dot(d, rCurrent) > 0) {
-        d *= -1;
-        results.interior_status = TrustRegionResults::Status::NonDescentDirection;
-      }
-
-      hess_vec_func(d, Hd);
-      const double curvature = Dot(d, Hd);
-      const double alphaCg = curvature != 0.0 ? rPr / curvature : 0.0;
-
-      auto& zPred = Pr;  // re-use Pr memory.
-                         // This predicted step will no longer be used by the time Pr is, so we can avoid an extra
-                         // vector floating around
-      add(z, alphaCg, d, zPred);
-      double zzNp1 = Dot(zPred, zPred);
-
-      const bool go_to_boundary = curvature <= 0 || zzNp1 >= trSize * trSize;
-      if (go_to_boundary) {
-        projectToBoundaryWithCoefs(z, d, trSize, zz, zd, dd);
-        if (curvature <= 0) {
-          results.interior_status = TrustRegionResults::Status::NegativeCurvature;
-        } else {
-          results.interior_status = TrustRegionResults::Status::OnBoundary;
-        }
-        return;
-      }
-
-      z = zPred;
-
-      if (results.interior_status == TrustRegionResults::Status::NonDescentDirection) {
-        if (print_level >= 2) {
-          mfem::out << "Found a non descent direction\n";
-        }
-        return;
-      }
-
-      add(rCurrent, alphaCg, Hd, rCurrent);
-
-      precond(rCurrent, Pr);
-      double rPrNp1 = Dot(rCurrent, Pr);
-
-      if (Dot(rCurrent, rCurrent) <= cg_tol_squared && cgIter >= settings.min_cg_iterations) {
-        return;
-      }
-
-      double beta = rPrNp1 / rPr;
-      rPr = rPrNp1;
-      add(-1.0, Pr, beta, d, d);
+  bool isDescentStep(const mfem::Vector& step, const mfem::Vector& residual) const
+  {
+    auto dot_many_lambda = [this](const std::vector<DotPair>& pairs) { return dot_many(pairs); };
+    return smith::isDescentDirection(step, residual, dot_many_lambda);
+  }
 
-      zz = zzNp1;
-      zd = Dot(z, d);
-      dd = Dot(d, d);
+  template <typename HessVecFunc>
+  void computeHessianActions(const std::vector<const mfem::Vector*>& inputs, const std::vector<mfem::Vector*>& outputs,
+                             const HessVecFunc& hess_vec_func) const
+  {
+    MFEM_VERIFY(inputs.size() == outputs.size(), "Subspace Hessian-vector batch input/output size mismatch");
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      hess_vec_func(*inputs[i], *outputs[i]);
     }
-    cgIter--;  // if all cg iterations are taken, correct for output
   }
 
   /// assemble the jacobian
   void assembleJacobian(const mfem::Vector& x) const
   {
     SMITH_MARK_FUNCTION;
-    ++num_jacobian_assembles;
     if (grad_monolithic) {
       delete grad;
       grad = nullptr;
@@ -749,11 +649,11 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
   mfem::real_t computeResidual(const mfem::Vector& x_, mfem::Vector& r_) const
   {
     SMITH_MARK_FUNCTION;
-    ++num_residuals;
     oper->Mult(x_, r_);
     return Norm(r_);
   }
 
+  /// apply the action of the current Jacobian representation to a vector
   ConvergenceStatus evaluateConvergence(const mfem::Vector& x_, mfem::Vector& r_) const
   {
     ConvergenceStatus status;
@@ -764,10 +664,7 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
       if (convergence_manager_) {
         status = convergence_manager_->evaluate(1.0, r_);
       } else {
-        status.block_norms = {status.global_norm};
-        status.global_goal = std::max(rel_tol * initial_norm, abs_tol);
-        status.global_converged = status.global_norm <= status.global_goal;
-        status.converged = status.global_converged;
+        status = scalarConvergenceStatus(status.global_norm, initial_norm, abs_tol, rel_tol);
       }
     } catch (const std::exception&) {
       status.global_norm = std::numeric_limits<double>::max();
@@ -780,7 +677,6 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
   void hessVec(const mfem::Vector& x_, mfem::Vector& v_) const
   {
     SMITH_MARK_FUNCTION;
-    ++num_hess_vecs;
     grad->Mult(x_, v_);
   }
 
@@ -788,7 +684,6 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
   void precond(const mfem::Vector& x_, mfem::Vector& v_) const
   {
     SMITH_MARK_FUNCTION;
-    ++num_preconds;
     tr_precond.Mult(x_, v_);
   };
 
@@ -797,18 +692,13 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
   {
     MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator).");
     MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver).");
-
-    print_level = print_options.iterations ? 1 : print_level;
-    print_level = print_options.summary ? 2 : print_level;
+    print_level = static_cast<size_t>(std::max(nonlinear_options.print_level, 0));
+    print_level = print_options.iterations ? std::max<size_t>(1, print_level) : print_level;
+    print_level = print_options.summary ? std::max<size_t>(2, print_level) : print_level;
+    print_level = rootOnlyPrintLevel(*this, print_level);
 
     using real_t = mfem::real_t;
 
-    num_hess_vecs = 0;
-    num_preconds = 0;
-    num_residuals = 0;
-    num_subspace_solves = 0;
-    num_jacobian_assembles = 0;
-
     ConvergenceStatus status = evaluateConvergence(X, r);
     real_t norm = status.global_norm;
     real_t norm_goal = status.global_goal;
@@ -880,23 +770,25 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
       }
 
       auto hess_vec_func = [&](const mfem::Vector& x_, mfem::Vector& v_) { hessVec(x_, v_); };
-      auto precond_func = [&](const mfem::Vector& x_, mfem::Vector& v_) { precond(x_, v_); };
 
       double cauchyPointNormSquared = tr_size * tr_size;
       trResults.reset();
 
-      hess_vec_func(r, trResults.H_d);
-      const double gKg = Dot(r, trResults.H_d);
-      if (gKg > 0) {
-        const double alphaCp = -Dot(r, r) / gKg;
-        add(trResults.cauchy_point, alphaCp, r, trResults.cauchy_point);
-        cauchyPointNormSquared = Dot(trResults.cauchy_point, trResults.cauchy_point);
-      } else {
-        const double alphaTr = -tr_size / std::sqrt(Dot(r, r));
-        add(trResults.cauchy_point, alphaTr, r, trResults.cauchy_point);
-        if (print_level >= 2) {
-          mfem::out << "Negative curvature un-preconditioned cauchy point direction found."
-                    << "\n";
+      {
+        hess_vec_func(r, trResults.H_d);
+        const double gKg = Dot(r, trResults.H_d);
+        const double residual_norm_squared = norm * norm;
+        if (gKg > 0) {
+          const double alphaCp = -residual_norm_squared / gKg;
+          add(trResults.cauchy_point, alphaCp, r, trResults.cauchy_point);
+          cauchyPointNormSquared = Dot(trResults.cauchy_point, trResults.cauchy_point);
+        } else {
+          const double alphaTr = -tr_size / norm;
+          add(trResults.cauchy_point, alphaTr, r, trResults.cauchy_point);
+          if (print_level >= 2) {
+            mfem::out << "Negative curvature un-preconditioned cauchy point direction found."
+                      << "\n";
+          }
         }
       }
 
@@ -912,49 +804,91 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
         trResults.interior_status = TrustRegionResults::Status::OnBoundary;
       } else {
         settings.cg_tol = std::max(0.5 * norm_goal, 5e-5 * norm);
-        solveTrustRegionModelProblem(r, scratch, hess_vec_func, precond_func, settings, tr_size, trResults);
+        solveModelProblem(r, scratch, *grad, &this->tr_precond, settings, tr_size, trResults, norm * norm);
       }
       cumulative_cg_iters_from_last_precond_update += trResults.cg_iterations_count;
 
       bool have_computed_Hvs = false;
+      bool have_computed_H_left_mosts = false;
+      bool have_prepared_subspace = false;
+      CachedTrustRegionSubspaceProblem prepared_subspace;
+#ifdef MFEM_USE_LAPACK
+      constexpr bool can_use_subspace_solver = true;
+#else
+      constexpr bool can_use_subspace_solver = false;
+#endif
 
       int lineSearchIter = 0;
       while (lineSearchIter <= nonlinear_options.max_line_search_iterations) {
         ++lineSearchIter;
 
         doglegStep(trResults.cauchy_point, trResults.z, tr_size, trResults.d);
+        const double d_norm = subspace_option >= 1 ? std::sqrt(Dot(trResults.d, trResults.d)) : 0.0;
+        const bool use_subspace =
+            can_use_subspace_solver &&
+            shouldUseSubspaceStep(subspace_option, trResults.interior_status, d_norm, tr_size, lineSearchIter);
 
-        bool use_with_option1 =
-            (subspace_option >= 1) && (trResults.interior_status == TrustRegionResults::Status::NonDescentDirection ||
-                                       trResults.interior_status == TrustRegionResults::Status::NegativeCurvature ||
-                                       ((Norm(trResults.d) > (1.0 - 1.0e-6) * tr_size) && lineSearchIter > 1));
-        bool use_with_option2 = (subspace_option >= 2) && (Norm(trResults.d) > (1.0 - 1.0e-6) * tr_size);
-        bool use_with_option3 = (subspace_option >= 3);
-
-        if (use_with_option1 || use_with_option2 || use_with_option3) {
+        bool subspace_unavailable = false;
+        if (use_subspace) {
           if (!have_computed_Hvs) {
             have_computed_Hvs = true;
-            hess_vec_func(trResults.z, trResults.H_z);
-            hess_vec_func(trResults.d_old, trResults.H_d_old);
-            hess_vec_func(trResults.cauchy_point, trResults.H_cauchy_point);
+
+            std::vector<const mfem::Vector*> subspace_hess_inputs{&trResults.z, &trResults.cauchy_point};
+            std::vector<mfem::Vector*> subspace_hess_outputs{&trResults.H_z, &trResults.H_cauchy_point};
+            if (trResults.has_d_old) {
+              subspace_hess_inputs.push_back(&trResults.d_old);
+              subspace_hess_outputs.push_back(&trResults.H_d_old);
+            }
+
+            computeHessianActions(subspace_hess_inputs, subspace_hess_outputs, hess_vec_func);
+          }
+
+          if (!have_computed_H_left_mosts) {
+            have_computed_H_left_mosts = true;
+            H_left_mosts.clear();
+            std::vector<const mfem::Vector*> leftmost_inputs;
+            std::vector<mfem::Vector*> leftmost_outputs;
+            for (auto& left : left_mosts) {
+              H_left_mosts.emplace_back(std::make_shared<mfem::Vector>(*left));
+              leftmost_inputs.push_back(left.get());
+              leftmost_outputs.push_back(H_left_mosts.back().get());
+            }
+            computeHessianActions(leftmost_inputs, leftmost_outputs, hess_vec_func);
+          }
+
+          if (!have_prepared_subspace) {
+            have_prepared_subspace = true;
+
+            std::vector<const mfem::Vector*> ds{&trResults.z, &trResults.cauchy_point};
+            std::vector<const mfem::Vector*> H_ds{&trResults.H_z, &trResults.H_cauchy_point};
+            if (trResults.has_d_old) {
+              ds.push_back(&trResults.d_old);
+              H_ds.push_back(&trResults.H_d_old);
+            }
+
+            have_prepared_subspace = prepareSubspaceProblemCache(ds, H_ds, r, num_leftmost, prepared_subspace);
+            subspace_unavailable = !have_prepared_subspace;
           }
 
-          H_left_mosts.clear();
-          for (auto& left : left_mosts) {
-            H_left_mosts.emplace_back(std::make_shared<mfem::Vector>(*left));
-            hess_vec_func(*left, *H_left_mosts.back());
+          if (have_prepared_subspace) {
+            const SubspaceStepStatus subspace_status =
+                solvePreparedSubspaceProblem(trResults.d, hess_vec_func, prepared_subspace, r, tr_size);
+            subspace_unavailable = subspace_status == SubspaceStepStatus::Unavailable;
           }
+        }
 
-          std::vector<const mfem::Vector*> ds{&trResults.z, &trResults.d_old, &trResults.cauchy_point};
-          std::vector<const mfem::Vector*> H_ds{&trResults.H_z, &trResults.H_d_old, &trResults.H_cauchy_point};
-          solveTheSubspaceProblem(trResults.d, hess_vec_func, ds, H_ds, r, tr_size, num_leftmost);
+        if (subspace_unavailable || !isDescentStep(trResults.d, r)) {
+          fallbackToCauchyPoint(
+              trResults, subspace_unavailable ? "Subspace step unavailable" : "Fallback step is not a descent step");
         }
 
         static constexpr double roundOffTol = 0.0;  // 1e-14;
 
         hess_vec_func(trResults.d, trResults.H_d);
-        double dHd = Dot(trResults.d, trResults.H_d);
-        double modelObjective = Dot(r, trResults.d) + 0.5 * dHd - roundOffTol;
+        const auto dots = dot_many({{&trResults.d, &trResults.H_d}, {&r, &trResults.d}});
+        const double dHd = dots[0];
+        const double rd = dots[1];
+        double modelObjective = rd + 0.5 * dHd - roundOffTol;
 
         add(X, trResults.d, x_pred);
 
@@ -982,6 +916,23 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
           normPred = std::numeric_limits<double>::max();
         }
 
+        if (normPred <= norm_goal) {
+          trResults.d_old = trResults.d;
+          trResults.has_d_old = true;
+          if (!prepared_subspace.leftmosts.empty()) {
+            left_mosts = prepared_subspace.leftmosts;
+          }
+          X = x_pred;
+          r = r_pred;
+          norm = normPred;
+          if (print_level >= 2) {
+            printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, true);
+            trResults.cg_iterations_count =
+                0;  // zero this output so it doesn't look like the linesearch is doing cg iterations
+          }
+          break;
+        }
+
         double modelImprove = -modelObjective;
         double realImprove = -realObjective;
 
@@ -1012,7 +963,7 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
         // modelRes = g + Jd
         // modelResNorm = np.linalg.norm(modelRes)
         // realResNorm = np.linalg.norm(gy)
-        bool willAccept = rho >= settings.eta1 && rho <= settings.eta4;  // or (rho >= -0 and realResNorm <= gNorm)
+        const bool willAccept = rho >= settings.eta1 && rho <= settings.eta4;
 
         if (print_level >= 2) {
           printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, willAccept);
@@ -1022,9 +973,14 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
 
         if (willAccept) {
           trResults.d_old = trResults.d;
+          trResults.has_d_old = true;
+          if (!prepared_subspace.leftmosts.empty()) {
+            left_mosts = prepared_subspace.leftmosts;
+          }
           X = x_pred;
           r = r_pred;
-          status = convergence_manager_ ? convergence_manager_->evaluate(1.0, r_pred) : status;
+          status = convergence_manager_ ? convergence_manager_->evaluate(1.0, r_pred)
+                                        : scalarConvergenceStatus(normPred, initial_norm, abs_tol, rel_tol);
           norm = normPred;
           break;
         }
@@ -1041,14 +997,6 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea
     if (!converged && print_level >= 1) {  // (print_options.summary || print_options.warnings)) {
       mfem::out << "TrustRegion: No convergence!\n";
     }
-
-    if (false && print_level >= 2) {
-      mfem::out << "num hess vecs = " << num_hess_vecs << "\n";
-      mfem::out << "num preconds = " << num_preconds << "\n";
-      mfem::out << "num residuals = " << num_residuals << "\n";
-      mfem::out << "num subspace solves = " << num_subspace_solves << "\n";
-      mfem::out << "num jacobian_assembles = " << num_jacobian_assembles << "\n";
-    }
   }
 };
 /// @endcond
@@ -1540,7 +1488,9 @@ void EquationSolver::defineInputFileSchema(axom::inlet::Container& container)
   nonlinear_container.addDouble("abs_tol", "Absolute tolerance for the Newton solve.").defaultValue(1.0e-4);
   nonlinear_container.addInt("max_iter", "Maximum iterations for the Newton solve.").defaultValue(500);
   nonlinear_container.addInt("print_level", "Nonlinear print level.").defaultValue(0);
-  nonlinear_container.addString("solver_type", "Solver type (Newton|KINFullStep|KINLineSearch)").defaultValue("Newton");
+  nonlinear_container
+      .addString("solver_type", "Solver type (Newton|NewtonLineSearch|TrustRegion|KINFullStep|KINLineSearch)")
+      .defaultValue("Newton");
 }
 
 }  // namespace smith
@@ -1615,6 +1565,10 @@ smith::NonlinearSolverOptions FromInlet<smith::NonlinearSolverOptions>::operator
   const std::string solver_type = base["solver_type"];
   if (solver_type == "Newton") {
     options.nonlin_solver = smith::NonlinearSolver::Newton;
+  } else if (solver_type == "NewtonLineSearch") {
+    options.nonlin_solver = smith::NonlinearSolver::NewtonLineSearch;
+  } else if (solver_type == "TrustRegion") {
+    options.nonlin_solver = smith::NonlinearSolver::TrustRegion;
   } else if (solver_type == "KINFullStep") {
     options.nonlin_solver = smith::NonlinearSolver::KINFullStep;
   } else if (solver_type == "KINLineSearch") {
diff --git a/src/smith/numerics/equation_solver.hpp b/src/smith/numerics/equation_solver.hpp
index baac9936e1..17c5112626 100644
--- a/src/smith/numerics/equation_solver.hpp
+++ b/src/smith/numerics/equation_solver.hpp
@@ -12,6 +12,7 @@
 
 #pragma once
 
+#include <cstddef>
 #include <memory>
 #include <optional>
 #include <variant>
@@ -21,6 +22,7 @@
 #include "mfem.hpp"
 
 #include "smith/infrastructure/input.hpp"
+#include "smith/infrastructure/logger.hpp"
 #include "smith/numerics/nonlinear_convergence.hpp"
 #include "smith/numerics/solver_config.hpp"
 #include "smith/numerics/petsc_solvers.hpp"
diff --git a/src/smith/numerics/functional/functional.hpp b/src/smith/numerics/functional/functional.hpp
index f5a40d8259..dddeadc4d0 100644
--- a/src/smith/numerics/functional/functional.hpp
+++ b/src/smith/numerics/functional/functional.hpp
@@ -828,7 +828,7 @@ class Functional<test(trials...), exec> {
       }
     };
 
-    uint64_t max_buffer_size()
+    uint64_t max_buffer_size() const
     {
       uint64_t max_entries = 0;
       for (auto& integral : form_.integrals_) {
diff --git a/src/smith/numerics/functional/tests/functional_comparisons.cpp b/src/smith/numerics/functional/tests/functional_comparisons.cpp
index 12314e45d0..a272498955 100644
--- a/src/smith/numerics/functional/tests/functional_comparisons.cpp
+++ b/src/smith/numerics/functional/tests/functional_comparisons.cpp
@@ -4,9 +4,9 @@
 //
 // SPDX-License-Identifier: (BSD-3-Clause)
 
+#include <algorithm>
 #include <cstdlib>
 #include <iostream>
-#include <algorithm>
 #include <map>
 #include <memory>
 #include <set>
diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp
new file mode 100644
index 0000000000..453d8351a3
--- /dev/null
+++ b/src/smith/numerics/mfem_trust_region_subspace.cpp
@@ -0,0 +1,467 @@
+// Copyright (c) Lawrence Livermore National Security, LLC and
+// other Smith Project Developers. See the top-level LICENSE file for
+// details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+
+#include "smith/numerics/trust_region_solver.hpp"
+
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+#include "smith/infrastructure/profiling.hpp"
+
+namespace smith {
+
+int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm)
+{
+  int local_size = parallel_v.Size();
+  int global_size;
+  MPI_Allreduce(&local_size, &global_size, 1, MPI_INT, MPI_SUM, comm);
+  return global_size;
+}
+
+double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm& comm)
+{
+  return mfem::InnerProduct(comm, a, b);
+}
+
+#ifdef MFEM_USE_LAPACK
+
+namespace {
+
+double dot(const mfem::Vector& a, const mfem::Vector& b) { return a * b; }
+
+double norm(const mfem::Vector& x) { return x.Norml2(); }
+
+double sumAbs(const mfem::Vector& x)
+{
+  double total = 0.0;
+  for (int i = 0; i < x.Size(); ++i) {
+    total += std::abs(x[i]);
+  }
+  return total;
+}
+
+struct SubspaceProjections {
+  mfem::DenseMatrix sAs;
+  mfem::DenseMatrix ss;
+  mfem::Vector sb;
+};
+
+void checkProjectionInputs(const std::vector<const mfem::Vector*>& states,
+                           const std::vector<const mfem::Vector*>& Astates, const mfem::Vector& b)
+{
+  MFEM_VERIFY(states.size() == Astates.size(),
+              "Search directions and their linear operator result must have same number of columns");
+  MFEM_VERIFY(!states.empty(), "Subspace projections require at least one direction.");
+
+  const int n = static_cast<int>(states.size());
+  const int vector_size = states[0]->Size();
+  for (int j = 0; j < n; ++j) {
+    MFEM_VERIFY(states[size_t(j)]->Size() == vector_size, "Subspace direction sizes differ.");
+    MFEM_VERIFY(Astates[size_t(j)]->Size() == vector_size, "Subspace Hessian-vector sizes differ.");
+  }
+  MFEM_VERIFY(b.Size() == vector_size, "Subspace right-hand-side size differs.");
+}
+
+SubspaceProjections globalSubspaceProjectionFromLocalInnerProducts(const std::vector<const mfem::Vector*>& states,
+                                                                   const std::vector<const mfem::Vector*>& Astates,
+                                                                   const mfem::Vector& b, MPI_Comm comm)
+{
+  const int n = static_cast<int>(states.size());
+  const int triangular_size = n * (n + 1) / 2;
+  const auto triangular_index = [n](int i, int j) { return i * n - (i * (i - 1)) / 2 + (j - i); };
+  const int sAs_offset = 0;
+  const int ss_offset = triangular_size;
+  const int sb_offset = 2 * triangular_size;
+  const int buffer_size = 2 * triangular_size + n;
+  std::vector<mfem::real_t> local_projection_entries(size_t(buffer_size), 0.0);
+  std::vector<mfem::real_t> global_projection_entries(size_t(buffer_size), 0.0);
+
+  for (int i = 0; i < n; ++i) {
+    local_projection_entries[size_t(sb_offset + i)] = mfem::InnerProduct(*states[size_t(i)], b);
+    for (int j = i; j < n; ++j) {
+      const size_t ij = size_t(triangular_index(i, j));
+      local_projection_entries[size_t(sAs_offset) + ij] = mfem::InnerProduct(*states[size_t(i)], *Astates[size_t(j)]);
+      local_projection_entries[size_t(ss_offset) + ij] = mfem::InnerProduct(*states[size_t(i)], *states[size_t(j)]);
+    }
+  }
+
+  MPI_Allreduce(local_projection_entries.data(), global_projection_entries.data(), buffer_size, MFEM_MPI_REAL_T,
+                MPI_SUM, comm);
+
+  SubspaceProjections projections{mfem::DenseMatrix(n), mfem::DenseMatrix(n), mfem::Vector(n)};
+  for (int i = 0; i < n; ++i) {
+    projections.sb[i] = global_projection_entries[size_t(sb_offset + i)];
+    for (int j = i; j < n; ++j) {
+      const size_t ij = size_t(triangular_index(i, j));
+      projections.sAs(i, j) = global_projection_entries[size_t(sAs_offset) + ij];
+      projections.sAs(j, i) = projections.sAs(i, j);
+      projections.ss(i, j) = global_projection_entries[size_t(ss_offset) + ij];
+      projections.ss(j, i) = projections.ss(i, j);
+    }
+  }
+
+  return projections;
+}
+
+SubspaceProjections projectSubspaceGlobally(const std::vector<const mfem::Vector*>& states,
+                                            const std::vector<const mfem::Vector*>& Astates, const mfem::Vector& b,
+                                            MPI_Comm comm)
+{
+  checkProjectionInputs(states, Astates, b);
+  return globalSubspaceProjectionFromLocalInnerProducts(states, Astates, b, comm);
+}
+
+double quadraticEnergy(const mfem::DenseMatrix& A, const mfem::Vector& b, const mfem::Vector& x)
+{
+  mfem::Vector Ax(x.Size());
+  A.Mult(x, Ax);
+  return 0.5 * dot(x, Ax) - dot(x, b);
+}
+
+double pnormSquared(const mfem::Vector& bvv, const mfem::Vector& sig)
+{
+  double total = 0.0;
+  for (int i = 0; i < bvv.Size(); ++i) {
+    total += bvv[i] / (sig[i] * sig[i]);
+  }
+  return total;
+}
+
+double qnormSquared(const mfem::Vector& bvv, const mfem::Vector& sig)
+{
+  double total = 0.0;
+  for (int i = 0; i < bvv.Size(); ++i) {
+    total += bvv[i] / (sig[i] * sig[i] * sig[i]);
+  }
+  return total;
+}
+
+mfem::Vector matrixColumn(const mfem::DenseMatrix& A, int j)
+{
+  mfem::Vector col(A.Height());
+  for (int i = 0; i < A.Height(); ++i) {
+    col[i] = A(i, j);
+  }
+  return col;
+}
+
+mfem::DenseMatrix columnsToMatrix(const std::vector<mfem::Vector>& cols)
+{
+  mfem::DenseMatrix A(cols.empty() ? 0 : cols[0].Size(), static_cast<int>(cols.size()));
+  for (int j = 0; j < A.Width(); ++j) {
+    for (int i = 0; i < A.Height(); ++i) {
+      A(i, j) = cols[size_t(j)][i];
+    }
+  }
+  return A;
+}
+
+mfem::DenseMatrix orthonormalBasisTransform(const mfem::DenseMatrix& gram, double& trace_mag)
+{
+  mfem::DenseMatrix gram_copy(gram);
+  mfem::Vector evals;
+  mfem::DenseMatrix evecs;
+  gram_copy.Eigensystem(evals, evecs);
+
+  trace_mag = 0.0;
+  for (int i = 0; i < evals.Size(); ++i) {
+    trace_mag += std::abs(evals[i]);
+  }
+
+  std::vector<mfem::Vector> kept_columns;
+  for (int i = 0; i < evals.Size(); ++i) {
+    if (evals[i] > 1e-9 * trace_mag) {
+      mfem::Vector col = matrixColumn(evecs, i);
+      col /= std::sqrt(evals[i]);
+      kept_columns.emplace_back(std::move(col));
+    }
+  }
+
+  return columnsToMatrix(kept_columns);
+}
+
+mfem::DenseMatrix tripleProduct(const mfem::DenseMatrix& L, const mfem::DenseMatrix& A, const mfem::DenseMatrix& R)
+{
+  mfem::DenseMatrix tmp(A.Height(), R.Width());
+  mfem::Mult(A, R, tmp);
+  mfem::DenseMatrix out(L.Width(), R.Width());
+  mfem::MultAtB(L, tmp, out);
+  return out;
+}
+
+mfem::Vector projectWithTranspose(const mfem::DenseMatrix& A, const mfem::Vector& x)
+{
+  mfem::Vector out(A.Width());
+  A.MultTranspose(x, out);
+  return out;
+}
+
+mfem::Vector combineColumns(const mfem::DenseMatrix& basis, const mfem::Vector& coeffs)
+{
+  mfem::Vector out(basis.Height());
+  out = 0.0;
+  for (int i = 0; i < coeffs.Size(); ++i) {
+    const mfem::Vector vi = matrixColumn(basis, i);
+    out.Add(coeffs[i], vi);
+  }
+  return out;
+}
+
+mfem::Vector combineDirections(const std::vector<const mfem::Vector*>& states, const mfem::Vector& coeffs)
+{
+  mfem::Vector out(*states[0]);
+  out = 0.0;
+  for (int i = 0; i < coeffs.Size(); ++i) {
+    out.Add(coeffs[i], *states[size_t(i)]);
+  }
+  return out;
+}
+
+std::vector<const mfem::Vector*> toPointers(const std::vector<std::shared_ptr<mfem::Vector>>& vectors)
+{
+  std::vector<const mfem::Vector*> ptrs;
+  ptrs.reserve(vectors.size());
+  for (const auto& vector : vectors) {
+    ptrs.push_back(vector.get());
+  }
+  return ptrs;
+}
+
+std::vector<mfem::Vector> prepareExactTrustRegionLeftmosts(CachedTrustRegionSubspaceProblem& prepared, int num_leftmost)
+{
+  prepared.eigenvalues.SetSize(prepared.projected_rhs.Size());
+  prepared.eigenvectors.SetSize(prepared.projected_hessian.Height(), prepared.projected_hessian.Width());
+
+  mfem::DenseMatrix projected_hessian_copy(prepared.projected_hessian);
+  projected_hessian_copy.Eigensystem(prepared.eigenvalues, prepared.eigenvectors);
+
+  prepared.eigen_rhs.SetSize(prepared.eigenvalues.Size());
+  for (int i = 0; i < prepared.eigenvalues.Size(); ++i) {
+    const mfem::Vector vi = matrixColumn(prepared.eigenvectors, i);
+    prepared.eigen_rhs[i] = dot(vi, prepared.projected_rhs);
+  }
+
+  std::vector<mfem::Vector> reduced_leftmosts;
+  const int num_leftmost_possible = std::min(num_leftmost, prepared.eigenvalues.Size());
+  reduced_leftmosts.reserve(static_cast<size_t>(num_leftmost_possible));
+  prepared.leftvals.clear();
+  prepared.leftvals.reserve(static_cast<size_t>(num_leftmost_possible));
+  for (int i = 0; i < num_leftmost_possible; ++i) {
+    reduced_leftmosts.emplace_back(matrixColumn(prepared.eigenvectors, i));
+    prepared.leftvals.emplace_back(prepared.eigenvalues[i]);
+  }
+  return reduced_leftmosts;
+}
+
+std::pair<mfem::Vector, bool> solvePreparedExactTrustRegionProblem(const CachedTrustRegionSubspaceProblem& prepared,
+                                                                   double delta)
+{
+  const mfem::DenseMatrix& A = prepared.projected_hessian;
+  const mfem::Vector& b = prepared.projected_rhs;
+  const mfem::Vector& sigs = prepared.eigenvalues;
+  const mfem::DenseMatrix& V = prepared.eigenvectors;
+  const mfem::Vector& bv = prepared.eigen_rhs;
+
+  mfem::Vector workspace(6 * b.Size());
+  int offset = 0;
+  auto alloc_vector = [&](int size) {
+    mfem::Vector v(workspace.GetData() + offset, size);
+    offset += size;
+    return v;
+  };
+
+  mfem::Vector bvOverSigs = alloc_vector(sigs.Size());
+  for (int i = 0; i < sigs.Size(); ++i) {
+    bvOverSigs[i] = bv[i] / sigs[i];
+  }
+  const double sigScale = sumAbs(sigs) / sigs.Size();
+  const double eps = 1e-12 * sigScale;
+  const mfem::Vector leftMost = matrixColumn(V, 0);
+  const double minSig = sigs[0];
+
+  if ((minSig >= eps) && (norm(bvOverSigs) <= delta)) {
+    mfem::Vector x = combineColumns(V, bvOverSigs);
+    return std::make_pair(x, true);
+  }
+
+  double lam = minSig < eps ? -minSig + eps : 0.0;
+  mfem::Vector sigsPlusLam = alloc_vector(sigs.Size());
+  for (int i = 0; i < sigs.Size(); ++i) sigsPlusLam[i] = sigs[i] + lam;
+  for (int i = 0; i < sigs.Size(); ++i) bvOverSigs[i] = bv[i] / sigsPlusLam[i];
+
+  if ((minSig < eps) && (norm(bvOverSigs) < delta)) {
+    mfem::Vector p = combineColumns(V, bvOverSigs);
+
+    const double pz = dot(p, leftMost);
+    const double pp = dot(p, p);
+    const double ddmpp = std::max(delta * delta - pp, 0.0);
+
+    const double tau1 = -pz + std::sqrt(pz * pz + ddmpp);
+    const double tau2 = -pz - std::sqrt(pz * pz + ddmpp);
+
+    mfem::Vector x1 = alloc_vector(p.Size());
+    x1 = p;
+    mfem::Vector x2 = alloc_vector(p.Size());
+    x2 = p;
+    x1.Add(tau1, leftMost);
+    x2.Add(tau2, leftMost);
+
+    const double e1 = quadraticEnergy(A, b, x1);
+    const double e2 = quadraticEnergy(A, b, x2);
+
+    return std::make_pair(e1 < e2 ? x1 : x2, true);
+  }
+
+  mfem::Vector bvbv = alloc_vector(bv.Size());
+  for (int i = 0; i < bv.Size(); ++i) bvbv[i] = bv[i] * bv[i];
+  for (int i = 0; i < sigs.Size(); ++i) sigsPlusLam[i] = sigs[i] + lam;
+
+  double pNormSq = pnormSquared(bvbv, sigsPlusLam);
+  double pNorm = std::sqrt(pNormSq);
+  double bError = (pNorm - delta) / delta;
+
+  size_t iters = 0;
+  constexpr size_t maxIters = 30;
+  while ((std::abs(bError) > 1e-9) && (iters++ < maxIters)) {
+    const double qNormSq = qnormSquared(bvbv, sigsPlusLam);
+    lam += (pNormSq / qNormSq) * bError;
+    for (int i = 0; i < sigs.Size(); ++i) sigsPlusLam[i] = sigs[i] + lam;
+    pNormSq = pnormSquared(bvbv, sigsPlusLam);
+    pNorm = std::sqrt(pNormSq);
+    bError = (pNorm - delta) / delta;
+  }
+
+  const bool success = iters < maxIters;
+
+  for (int i = 0; i < sigs.Size(); ++i) bvOverSigs[i] = bv[i] / sigsPlusLam[i];
+
+  mfem::Vector x = combineColumns(V, bvOverSigs);
+
+  const double e1 = quadraticEnergy(A, b, x);
+  mfem::Vector neg_x = alloc_vector(x.Size());
+  neg_x = x;
+  neg_x *= -1.0;
+  const double e2 = quadraticEnergy(A, b, neg_x);
+
+  x *= (e2 < e1 ? -delta : delta) / norm(x);
+
+  return std::make_pair(x, success);
+}
+
+}  // namespace
+
+/// @brief prepares reduced trust-region subspace data reusable across trust-region radius updates
+CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
+                                                        const std::vector<const mfem::Vector*>& A_directions,
+                                                        const mfem::Vector& b, int num_leftmost, MPI_Comm comm)
+{
+  SMITH_MARK_FUNCTION;
+  CachedTrustRegionSubspaceProblem prepared;
+  prepared.zero_solution = b;
+  prepared.zero_solution = 0.0;
+
+  SubspaceProjections projections = projectSubspaceGlobally(directions, A_directions, b, comm);
+  mfem::DenseMatrix& sAs = projections.sAs;
+  sAs.Symmetrize();
+
+  for (int i = 0; i < sAs.Height(); ++i) {
+    for (int j = 0; j < sAs.Width(); ++j) {
+      if (std::isnan(sAs(i, j))) {
+        throw TrustRegionException("States in subspace solve contain NaNs.");
+      }
+    }
+  }
+
+  mfem::DenseMatrix& ss = projections.ss;
+  ss.Symmetrize();
+
+  double trace_mag = 0.0;
+  mfem::DenseMatrix T = orthonormalBasisTransform(ss, trace_mag);
+  if (trace_mag == 0.0) {
+    return prepared;
+  }
+  if (T.Width() == 0) {
+    throw TrustRegionException("No independent directions in MFEM subspace solve.");
+  }
+  prepared.projected_hessian = tripleProduct(T, sAs, T);
+  prepared.projected_hessian.Symmetrize();
+
+  const mfem::Vector& sb = projections.sb;
+  prepared.projected_rhs = projectWithTranspose(T, sb);
+
+  for (int j = 0; j < T.Width(); ++j) {
+    prepared.basis.emplace_back(std::make_shared<mfem::Vector>(combineDirections(directions, matrixColumn(T, j))));
+  }
+  const auto reduced_leftmosts = prepareExactTrustRegionLeftmosts(prepared, num_leftmost);
+  const auto basis_ptrs = toPointers(prepared.basis);
+  prepared.leftmosts.clear();
+  prepared.leftmosts.reserve(reduced_leftmosts.size());
+  for (const auto& leftvec : reduced_leftmosts) {
+    prepared.leftmosts.emplace_back(std::make_shared<mfem::Vector>(combineDirections(basis_ptrs, leftvec)));
+  }
+
+  return prepared;
+}
+
+/// @brief solves cached reduced trust-region problem for given trust-region radius
+TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSubspaceProblem& prepared, double delta)
+{
+  SMITH_MARK_FUNCTION;
+  if (prepared.basis.empty()) {
+    mfem::Vector sol(prepared.zero_solution);
+    sol = 0.0;
+    return std::make_tuple(sol, prepared.leftmosts, prepared.leftvals, 0.0);
+  }
+
+  auto [reduced_x, success] = solvePreparedExactTrustRegionProblem(prepared, delta);
+  if (!success) {
+    throw TrustRegionException("Trust-region subspace solve failed to converge.");
+  }
+  const double energy = quadraticEnergy(prepared.projected_hessian, prepared.projected_rhs, reduced_x);
+
+  const auto basis_ptrs = toPointers(prepared.basis);
+  mfem::Vector sol = combineDirections(basis_ptrs, reduced_x);
+  return std::make_tuple(sol, prepared.leftmosts, prepared.leftvals, energy);
+}
+
+TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
+                                               const std::vector<const mfem::Vector*>& A_directions,
+                                               const mfem::Vector& b, double delta, int num_leftmost, MPI_Comm comm)
+{
+  return solvePreparedSubspaceProblem(prepareSubspaceProblem(directions, A_directions, b, num_leftmost, comm), delta);
+}
+
+#else
+
+TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
+                                               const std::vector<const mfem::Vector*>& A_directions,
+                                               const mfem::Vector& b, double delta, int num_leftmost, MPI_Comm)
+{
+  throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support.");
+  return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0.0);
+}
+
+CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
+                                                        const std::vector<const mfem::Vector*>& A_directions,
+                                                        const mfem::Vector& b, int, MPI_Comm)
+{
+  throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support.");
+  CachedTrustRegionSubspaceProblem prepared;
+  prepared.zero_solution = b;
+  return prepared;
+}
+
+TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSubspaceProblem& prepared, double)
+{
+  throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support.");
+  return std::make_tuple(prepared.zero_solution, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{},
+                         0.0);
+}
+
+#endif  // MFEM_USE_LAPACK
+
+}  // namespace smith
diff --git a/src/smith/numerics/steihaug_toint_cg.cpp b/src/smith/numerics/steihaug_toint_cg.cpp
new file mode 100644
index 0000000000..b33fae7864
--- /dev/null
+++ b/src/smith/numerics/steihaug_toint_cg.cpp
@@ -0,0 +1,141 @@
+// Copyright (c) Lawrence Livermore National Security, LLC and
+// other Smith Project Developers. See the top-level LICENSE file for
+// details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+
+#include "smith/numerics/steihaug_toint_cg.hpp"
+
+namespace smith {
+
+namespace {
+
+bool isDescentDirection(double directional_derivative) { return directional_derivative < 0.0; }
+
+void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, double dd)
+{
+  const double deltadelta_m_zz = std::max(delta * delta - zz, 0.0);
+  if (deltadelta_m_zz == 0.0) return;
+  const double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd;
+  z.Add(tau, d);
+}
+
+}  // namespace
+
+std::vector<double> dotMany(const std::vector<DotPair>& pairs)
+{
+  std::vector<double> products(pairs.size(), 0.0);
+  for (size_t i = 0; i < pairs.size(); ++i) {
+    MFEM_ASSERT(pairs[i].first->Size() == pairs[i].second->Size(), "Incompatible vector sizes.");
+    products[i] = (*pairs[i].first) * (*pairs[i].second);
+  }
+  return products;
+}
+
+bool isDescentDirection(const mfem::Vector& direction, const mfem::Vector& residual, const DotManyFunction& dot_many)
+{
+  return isDescentDirection(dot_many({{&direction, &residual}})[0]);
+}
+
+void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P,
+                     const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results,
+                     double r0_norm_squared, const DotManyFunction& dot_many)
+{
+  // minimize r0@z + 0.5*z@J@z
+  results.interior_status = TrustRegionResults::Status::Interior;
+  results.cg_iterations_count = 0;
+
+  auto& z = results.z;
+  auto& cgIter = results.cg_iterations_count;
+  auto& d = results.d;
+  auto& Pr = results.Pr;
+  auto& Hd = results.H_d;
+
+  const double cg_tol_squared = settings.cg_tol * settings.cg_tol;
+
+  if (r0_norm_squared <= cg_tol_squared && settings.min_cg_iterations == 0) {
+    return;
+  }
+
+  rCurrent = r0;
+  if (P) {
+    P->Mult(rCurrent, Pr);
+  } else {
+    Pr = rCurrent;
+  }
+
+  // d = -Pr
+  d = Pr;
+  d *= -1.0;
+
+  z = 0.0;
+  double zz = 0.;
+
+  // rPr = dot(rCurrent, Pr)
+  double rPr = dot_many({{&rCurrent, &Pr}})[0];
+
+  for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) {
+    H.Mult(d, Hd);
+
+    auto dots = dot_many({{&d, &rCurrent}, {&d, &Hd}, {&z, &d}, {&d, &d}});
+    double descent_check = dots[0];
+    double curvature = dots[1];
+    double zd = dots[2];
+    double dd = dots[3];
+
+    if (!isDescentDirection(descent_check)) {
+      results.interior_status = TrustRegionResults::Status::NonDescentDirection;
+      return;
+    }
+
+    const double alphaCg = curvature != 0.0 ? rPr / curvature : 0.0;
+    const double zzNp1 = zz + 2.0 * alphaCg * zd + alphaCg * alphaCg * dd;
+
+    const bool go_to_boundary = curvature <= 0 || zzNp1 >= trSize * trSize;
+    if (go_to_boundary) {
+      projectToBoundaryWithCoefs(z, d, trSize, zz, zd, dd);
+      if (curvature <= 0) {
+        results.interior_status = TrustRegionResults::Status::NegativeCurvature;
+      } else {
+        results.interior_status = TrustRegionResults::Status::OnBoundary;
+      }
+      return;
+    }
+
+    // Alias Pr as temporary workspace 'zPred' to avoid allocation
+    auto& zPred = Pr;
+    zPred = z;
+    zPred.Add(alphaCg, d);
+    z = zPred;
+
+    if (results.interior_status == TrustRegionResults::Status::NonDescentDirection) {
+      return;
+    }
+
+    rCurrent.Add(alphaCg, Hd);
+
+    if (P) {
+      P->Mult(rCurrent, Pr);
+    } else {
+      Pr = rCurrent;
+    }
+
+    auto dots2 = dot_many({{&rCurrent, &Pr}, {&rCurrent, &rCurrent}});
+    double rPrNp1 = dots2[0];
+    double r_current_norm_squared = dots2[1];
+
+    if (r_current_norm_squared <= cg_tol_squared && cgIter >= settings.min_cg_iterations) {
+      return;
+    }
+
+    double beta = rPrNp1 / rPr;
+    rPr = rPrNp1;
+    d *= beta;
+    d.Add(-1.0, Pr);
+
+    zz = zzNp1;
+  }
+  cgIter--;
+}
+
+}  // namespace smith
diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp
new file mode 100644
index 0000000000..60c3d2c371
--- /dev/null
+++ b/src/smith/numerics/steihaug_toint_cg.hpp
@@ -0,0 +1,131 @@
+// Copyright (c) Lawrence Livermore National Security, LLC and
+// other Smith Project Developers. See the top-level LICENSE file for
+// details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+
+#pragma once
+
+#include <array>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "mfem.hpp"
+
+namespace smith {
+
+/// Internal structure for storing trust region settings
+struct TrustRegionSettings {
+  /// cg tol
+  double cg_tol = 1e-8;
+  /// min cg iters
+  size_t min_cg_iterations = 0;  //
+  /// max cg iters should be around # of system dofs
+  size_t max_cg_iterations = 10000;  //
+  /// max cumulative iterations
+  size_t max_cumulative_iteration = 1;
+  /// minimum trust region size
+  double min_tr_size = 1e-13;
+  /// trust region decrease factor
+  double t1 = 0.25;
+  /// trust region increase factor
+  double t2 = 1.75;
+  /// worse case energy drop ratio.  trust region accepted if energy drop is better than this.
+  double eta1 = 1e-9;
+  /// non-ideal energy drop ratio.  trust region decreases if energy drop is worse than this.
+  double eta2 = 0.1;
+  /// ideal energy drop ratio.  trust region increases if energy drop is better than this.
+  double eta3 = 0.6;
+  /// parameter limiting how fast the energy can drop relative to the prediction (in case the energy surrogate is poor)
+  double eta4 = 4.2;
+};
+
+/// Internal structure for storing trust region stateful data
+struct TrustRegionResults {
+  /// Constructor takes the size of the solution vector
+  TrustRegionResults(int size)
+  {
+    z.SetSize(size);
+    H_z.SetSize(size);
+    d_old.SetSize(size);
+    H_d_old.SetSize(size);
+    d.SetSize(size);
+    H_d.SetSize(size);
+    Pr.SetSize(size);
+    cauchy_point.SetSize(size);
+    H_cauchy_point.SetSize(size);
+    z = 0.0;
+    H_z = 0.0;
+    d_old = 0.0;
+    H_d_old = 0.0;
+    d = 0.0;
+    H_d = 0.0;
+    Pr = 0.0;
+    cauchy_point = 0.0;
+    H_cauchy_point = 0.0;
+  }
+
+  /// resets trust region results for a new outer iteration
+  void reset()
+  {
+    z = 0.0;
+    cauchy_point = 0.0;
+  }
+
+  /// enumerates the possible final status of the trust region steps
+  enum class Status
+  {
+    Interior,
+    NegativeCurvature,
+    OnBoundary,
+    NonDescentDirection
+  };
+
+  /// step direction
+  mfem::Vector z;
+  /// action of hessian on current step z
+  mfem::Vector H_z;
+  /// old step direction
+  mfem::Vector d_old;
+  /// action of hessian on previous step z_old
+  mfem::Vector H_d_old;
+  /// true after at least one accepted line-search step has populated d_old
+  bool has_d_old = false;
+  /// incrementalCG direction
+  mfem::Vector d;
+  /// action of hessian on direction d
+  mfem::Vector H_d;
+  /// preconditioned residual
+  mfem::Vector Pr;
+  /// cauchy point
+  mfem::Vector cauchy_point;
+  /// action of hessian on direction of cauchy point
+  mfem::Vector H_cauchy_point;
+  /// specifies if step is interior, exterior, negative curvature, etc.
+  Status interior_status = Status::Interior;
+  /// iteration counter
+  size_t cg_iterations_count = 0;
+};
+
+using DotPair = std::pair<const mfem::Vector*, const mfem::Vector*>;                      ///< using
+using DotManyFunction = std::function<std::vector<double>(const std::vector<DotPair>&)>;  ///< using
+
+/// compute local dot products for many vector pairs
+std::vector<double> dotMany(const std::vector<DotPair>& pairs);
+
+/// true when direction is locally downhill for the quadratic model's linear term
+bool isDescentDirection(const mfem::Vector& direction, const mfem::Vector& residual, const DotManyFunction& dot_many);
+
+/**
+ * @brief Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner
+ *
+ * This is a standard implementation of 'The Conjugate Gradient Method and Trust Regions in Large Scale Optimization'
+ * by T. Steihaug. It is also called the Steihaug-Toint CG trust region algorithm (see also Trust Region Methods
+ * by Conn, Gould, and Toint).
+ */
+void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P,
+                     const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results,
+                     double r0_norm_squared, const DotManyFunction& dot_many);
+
+}  // namespace smith
diff --git a/src/smith/numerics/tests/CMakeLists.txt b/src/smith/numerics/tests/CMakeLists.txt
index 10e693b21a..aa2ea5e011 100644
--- a/src/smith/numerics/tests/CMakeLists.txt
+++ b/src/smith/numerics/tests/CMakeLists.txt
@@ -10,9 +10,11 @@ set(numerics_serial_test_sources
     test_equationsolver.cpp
     test_operator.cpp
     test_odes.cpp
+    test_steihaug_toint_cg.cpp
     test_block_preconditioner.cpp
     test_block_preconditioner_backend.cpp
     test_block_preconditioner_custom_operators.cpp
+    test_trust_region_solver_mfem.cpp
     )
 
 smith_add_tests( SOURCES       ${numerics_serial_test_sources}
@@ -30,7 +32,6 @@ if(PETSC_FOUND)
     if(SLEPC_FOUND)
         set(slepc_solver_tests
             test_eigensolver.cpp
-            test_trust_region_solver.cpp
             )
         smith_add_tests(SOURCES       ${slepc_solver_tests}
                         DEPENDS_ON    ${numerics_test_dependencies}
diff --git a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp
new file mode 100644
index 0000000000..5522144763
--- /dev/null
+++ b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp
@@ -0,0 +1,105 @@
+// Copyright (c) Lawrence Livermore National Security, LLC and
+// other Smith Project Developers. See the top-level LICENSE file for
+// details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+
+#include <gtest/gtest.h>
+#include "smith/numerics/steihaug_toint_cg.hpp"
+
+TEST(SteihaugTointCG, SolvesSPDInsideBoundary)
+{
+  int size = 2;
+  mfem::Vector diag(size);
+  diag[0] = 2.0;
+  diag[1] = 4.0;
+  mfem::SparseMatrix H(diag);
+
+  mfem::Vector r0(size);
+  r0[0] = 1.0;
+  r0[1] = 1.0;
+
+  smith::TrustRegionSettings settings;
+  settings.cg_tol = 1e-10;
+  settings.max_cg_iterations = 10;
+
+  double trSize = 100.0;  // Huge trust region
+  smith::TrustRegionResults results(size);
+
+  mfem::Vector rCurrent(size);
+
+  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(),
+                         smith::dotMany);
+
+  // Solution should be H^{-1} (-r0)
+  // x = -0.5, y = -0.25
+  EXPECT_NEAR(results.z[0], -0.5, 1e-9);
+  EXPECT_NEAR(results.z[1], -0.25, 1e-9);
+  EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::Interior);
+}
+
+TEST(SteihaugTointCG, HitsBoundary)
+{
+  int size = 1;
+  mfem::Vector diag(size);
+  diag[0] = 1.0;
+  mfem::SparseMatrix H(diag);
+
+  mfem::Vector r0(size);
+  r0[0] = 1.0;
+
+  smith::TrustRegionSettings settings;
+  settings.max_cg_iterations = 10;
+
+  double trSize = 0.5;  // Small trust region, solution would be -1.0
+  smith::TrustRegionResults results(size);
+
+  mfem::Vector rCurrent(size);
+
+  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(),
+                         smith::dotMany);
+
+  EXPECT_NEAR(results.z.Norml2(), 0.5, 1e-9);
+  EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::OnBoundary);
+}
+
+TEST(SteihaugTointCG, DetectsNegativeCurvature)
+{
+  int size = 1;
+  mfem::Vector diag(size);
+  diag[0] = -1.0;  // Negative curvature
+  mfem::SparseMatrix H(diag);
+
+  mfem::Vector r0(size);
+  r0[0] = 1.0;
+
+  smith::TrustRegionSettings settings;
+  settings.max_cg_iterations = 10;
+
+  double trSize = 2.0;
+  smith::TrustRegionResults results(size);
+
+  mfem::Vector rCurrent(size);
+
+  smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(),
+                         smith::dotMany);
+
+  // For negative curvature, it should go to boundary
+  EXPECT_NEAR(results.z.Norml2(), 2.0, 1e-9);
+  EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::NegativeCurvature);
+}
+
+TEST(SteihaugTointCG, DetectsDirectlyFlippedAscentDirection)
+{
+  mfem::Vector residual(2);
+  residual[0] = 1.0;
+  residual[1] = -2.0;
+
+  mfem::Vector descent_direction(residual);
+  descent_direction *= -1.0;
+  EXPECT_TRUE(smith::isDescentDirection(descent_direction, residual, smith::dotMany));
+
+  mfem::Vector ascent_direction(descent_direction);
+  ascent_direction *= -1.0;
+  EXPECT_FALSE(smith::isDescentDirection(ascent_direction, residual, smith::dotMany));
+}
diff --git a/src/smith/numerics/tests/test_trust_region_solver.cpp b/src/smith/numerics/tests/test_trust_region_solver.cpp
deleted file mode 100644
index af030fc4c3..0000000000
--- a/src/smith/numerics/tests/test_trust_region_solver.cpp
+++ /dev/null
@@ -1,165 +0,0 @@
-// Copyright (c) Lawrence Livermore National Security, LLC and
-// other Smith Project Developers. See the top-level LICENSE file for
-// details.
-//
-// SPDX-License-Identifier: (BSD-3-Clause)
-
-#include <cmath>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "gtest/gtest.h"
-#include "mfem.hpp"
-
-#include "smith/physics/state/state_manager.hpp"
-#include "smith/infrastructure/application_manager.hpp"
-#include "smith/numerics/trust_region_solver.hpp"
-#include "smith/infrastructure/profiling.hpp"
-#include "smith/mesh_utils/mesh_utils.hpp"
-#include "smith/numerics/functional/finite_element.hpp"
-#include "smith/physics/state/finite_element_state.hpp"
-#include "smith/physics/state/finite_element_vector.hpp"
-#include "smith/numerics/petsc_solvers.hpp"
-
-const std::string MESHTAG = "mesh";
-
-static constexpr int scalar_field_order = 1;
-
-struct MeshFixture : public testing::Test {
-  void SetUp()
-  {
-    smith::StateManager::initialize(datastore_, "solver_test");
-
-    auto mfem_shape = mfem::Element::QUADRILATERAL;
-
-    double length = 0.5;
-    double width = 2.0;
-    auto meshtmp =
-        smith::mesh::refineAndDistribute(mfem::Mesh::MakeCartesian2D(2, 1, mfem_shape, true, length, width), 0, 0);
-    mesh_ = &smith::StateManager::setMesh(std::move(meshtmp), MESHTAG);
-  }
-
-  axom::sidre::DataStore datastore_;
-  mfem::ParMesh* mesh_;
-};
-
-std::vector<mfem::Vector> applyLinearOperator(const Mat& A, const std::vector<const mfem::Vector*>& states)
-{
-  std::vector<mfem::Vector> Astates;
-  for (auto s : states) {
-    Astates.emplace_back(*s);
-  }
-
-  int local_rows(states[0]->Size());
-  int global_rows(smith::globalSize(*states[0], PETSC_COMM_WORLD));
-
-  Vec x;
-  Vec y;
-
-  VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &x);
-  VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &y);
-
-  PetscInt iStart, iEnd;
-  VecGetOwnershipRange(x, &iStart, &iEnd);
-
-  std::vector<int> col_indices;
-  col_indices.reserve(static_cast<size_t>(local_rows));
-  for (int i = iStart; i < iEnd; ++i) {
-    col_indices.push_back(i);
-  }
-
-  size_t num_cols = states.size();
-  for (size_t c = 0; c < num_cols; ++c) {
-    VecSetValues(x, local_rows, &col_indices[0], &(*states[c])[0], INSERT_VALUES);
-    VecAssemblyBegin(x);
-    VecAssemblyEnd(x);
-    MatMult(A, x, y);
-    VecGetValues(y, local_rows, &col_indices[0], &Astates[c][0]);
-  }
-
-  VecDestroy(&x);
-  VecDestroy(&y);
-
-  return Astates;
-}
-
-// auto createDiagonalTestMatrix(smith::FiniteElementState& x)
-auto createDiagonalTestMatrix(mfem::Vector& x)
-{
-  const int local_rows = x.Size();
-  mfem::Vector one = x;
-  one = 1.0;
-  const int global_rows = smith::globalSize(x, PETSC_COMM_WORLD);
-
-  Vec b;
-  VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &b);
-
-  PetscInt iStart, iEnd;
-  VecGetOwnershipRange(b, &iStart, &iEnd);
-  VecDestroy(&b);
-
-  std::vector<int> col_indices;
-  col_indices.reserve(static_cast<size_t>(local_rows));
-  for (int i = iStart; i < iEnd; ++i) {
-    col_indices.push_back(i);
-  }
-
-  std::vector<int> row_offsets(static_cast<size_t>(local_rows) + 1);
-  for (int i = 0; i < local_rows + 1; ++i) {
-    row_offsets[static_cast<size_t>(i)] = i;
-  }
-
-  Mat A;
-  MatCreateMPIAIJWithArrays(PETSC_COMM_WORLD, local_rows, local_rows, global_rows, global_rows, &row_offsets[0],
-                            &col_indices[0], &x[0], &A);
-
-  return A;
-}
-
-TEST_F(MeshFixture, QR)
-{
-  SMITH_MARK_FUNCTION;
-
-  auto u1 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u1", MESHTAG);
-  auto u2 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u2", MESHTAG);
-  auto u3 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u3", MESHTAG);
-  auto u4 = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "u4", MESHTAG);
-  auto a = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "a", MESHTAG);
-  auto b = smith::StateManager::newState(smith::H1<scalar_field_order, 1>{}, "b", MESHTAG);
-
-  u1 = 1.0;
-  for (int i = 0; i < u2.Size(); ++i) {
-    u2[i] = i + 2;
-    u3[i] = i * i - 15.0;
-    u4[i] = -i + 0.1 * i * i * i - 1.0;
-    a[i] = 2 * i + 0.01 * i * i + 1.25;
-    b[i] = -i + 0.02 * i * i + 0.1;
-  }
-  std::vector<const mfem::Vector*> states = {&u1, &u2, &u3};  //,u4};
-
-  auto A_parallel = createDiagonalTestMatrix(a);
-  std::vector<mfem::Vector> Astates = applyLinearOperator(A_parallel, states);
-
-  std::vector<const mfem::Vector*> AstatePtrs;
-  for (size_t i = 0; i < Astates.size(); ++i) {
-    AstatePtrs.push_back(&Astates[i]);
-  }
-
-  double delta = 0.001;
-  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblem(states, AstatePtrs, b, delta, 1);
-
-  smith::FiniteElementState smith_sol(b);
-  smith_sol = sol;
-
-  EXPECT_NEAR(std::sqrt(smith::innerProduct(smith_sol, smith_sol)), delta, 1e-12);
-
-  MatDestroy(&A_parallel);
-}
-
-int main(int argc, char* argv[])
-{
-  ::testing::InitGoogleTest(&argc, argv);
-  smith::ApplicationManager applicationManager(argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
new file mode 100644
index 0000000000..46cbe0e39b
--- /dev/null
+++ b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp
@@ -0,0 +1,166 @@
+// Copyright (c) Lawrence Livermore National Security, LLC and
+// other Smith Project Developers. See the top-level LICENSE file for
+// details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+
+#include <limits>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "mfem.hpp"
+
+#include "smith/infrastructure/application_manager.hpp"
+#include "smith/numerics/trust_region_solver.hpp"
+
+namespace {
+
+constexpr int test_size = 5;
+constexpr double test_delta = 1.0e-3;
+
+std::vector<mfem::Vector> applyDiagonalOperator(const mfem::Vector& diag,
+                                                const std::vector<const mfem::Vector*>& states)
+{
+  std::vector<mfem::Vector> out;
+  out.reserve(states.size());
+  for (const auto* state : states) {
+    out.emplace_back(state->Size());
+    for (int i = 0; i < state->Size(); ++i) {
+      out.back()[i] = diag[i] * (*state)[i];
+    }
+  }
+  return out;
+}
+
+std::vector<const mfem::Vector*> toPointers(const std::vector<mfem::Vector>& vectors)
+{
+  std::vector<const mfem::Vector*> ptrs;
+  ptrs.reserve(vectors.size());
+  for (const auto& v : vectors) {
+    ptrs.push_back(&v);
+  }
+  return ptrs;
+}
+
+struct DiagonalSubspaceFixture {
+  DiagonalSubspaceFixture(int size) : u1(size), u2(size), u3(size), diag(size), b(size)
+  {
+    u1 = 1.0;
+    for (int i = 0; i < size; ++i) {
+      u2[i] = i + 2.0;
+      u3[i] = i * i - 15.0;
+      diag[i] = 2.0 * i + 0.01 * i * i + 1.25;
+      b[i] = -i + 0.02 * i * i + 0.1;
+    }
+  }
+
+  mfem::Vector u1;
+  mfem::Vector u2;
+  mfem::Vector u3;
+  mfem::Vector diag;
+  mfem::Vector b;
+};
+
+}  // namespace
+
+TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary)
+{
+  DiagonalSubspaceFixture fixture(test_size);
+
+  const std::vector<const mfem::Vector*> states = {&fixture.u1, &fixture.u2, &fixture.u3};
+  const auto astates = applyDiagonalOperator(fixture.diag, states);
+  const auto astate_ptrs = toPointers(astates);
+
+  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblem(states, astate_ptrs, fixture.b, test_delta, 1);
+
+  EXPECT_NEAR(sol.Norml2(), test_delta, 1.0e-12);
+  EXPECT_FALSE(leftvecs.empty());
+  EXPECT_EQ(leftvals.size(), 1);
+  EXPECT_LT(energy, 0.0);
+}
+
+TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection)
+{
+  mfem::Vector u1(4);
+  mfem::Vector u2(4);
+  mfem::Vector zero(4);
+  mfem::Vector diag(4);
+  mfem::Vector b(4);
+
+  zero = 0.0;
+  for (int i = 0; i < 4; ++i) {
+    u1[i] = 1.0 + i;
+    u2[i] = 0.25 * i - 0.5;
+    diag[i] = 1.0 + i;
+    b[i] = 0.5 - 0.1 * i;
+  }
+
+  const std::vector<const mfem::Vector*> states = {&u1, &zero, &u2};
+  const auto astates = applyDiagonalOperator(diag, states);
+  const auto astate_ptrs = toPointers(astates);
+
+  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblem(states, astate_ptrs, b, 0.25, 1);
+
+  EXPECT_LE(sol.Norml2(), 0.25 + 1.0e-12);
+  EXPECT_FALSE(leftvecs.empty());
+  EXPECT_EQ(leftvals.size(), 1);
+  EXPECT_LT(energy, 0.0);
+}
+
+TEST(TrustRegionSubspaceMfem, SolveIndefiniteHardCaseUsesShiftedNewtonPoint)
+{
+  mfem::Vector e0(2);
+  mfem::Vector e1(2);
+  mfem::Vector Ae0(2);
+  mfem::Vector Ae1(2);
+  mfem::Vector b(2);
+
+  e0 = 0.0;
+  e1 = 0.0;
+  Ae0 = 0.0;
+  Ae1 = 0.0;
+  b = 0.0;
+
+  e0[0] = 1.0;
+  e1[1] = 1.0;
+  Ae0[0] = -1.0;
+  Ae1[1] = 2.0;
+  b[1] = 1.0;
+
+  const std::vector<const mfem::Vector*> states = {&e0, &e1};
+  const std::vector<const mfem::Vector*> astates = {&Ae0, &Ae1};
+
+  auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblem(states, astates, b, 1.0, 1);
+
+  EXPECT_NEAR(sol.Norml2(), 1.0, 1.0e-12);
+  EXPECT_NEAR(std::abs(sol[0]), std::sqrt(8.0 / 9.0), 1.0e-10);
+  EXPECT_NEAR(sol[1], 1.0 / 3.0, 1.0e-10);
+  EXPECT_EQ(leftvecs.size(), 1);
+  EXPECT_EQ(leftvals.size(), 1);
+  EXPECT_NEAR(leftvals[0], -1.0, 1.0e-12);
+  EXPECT_NEAR(energy, -2.0 / 3.0, 1.0e-10);
+}
+
+TEST(TrustRegionSubspaceMfem, SolveThrowsOnNanProjection)
+{
+  mfem::Vector state(2);
+  mfem::Vector astate(2);
+  mfem::Vector b(2);
+
+  state = 1.0;
+  astate = 1.0;
+  b = 0.0;
+  astate[1] = std::numeric_limits<double>::quiet_NaN();
+
+  const std::vector<const mfem::Vector*> states = {&state};
+  const std::vector<const mfem::Vector*> astates = {&astate};
+
+  EXPECT_THROW(smith::solveSubspaceProblem(states, astates, b, 1.0, 1), smith::TrustRegionException);
+}
+
+int main(int argc, char* argv[])
+{
+  ::testing::InitGoogleTest(&argc, argv);
+  smith::ApplicationManager applicationManager(argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/smith/numerics/trust_region_solver.cpp b/src/smith/numerics/trust_region_solver.cpp
deleted file mode 100644
index 8d8d04a9cc..0000000000
--- a/src/smith/numerics/trust_region_solver.cpp
+++ /dev/null
@@ -1,496 +0,0 @@
-// Copyright (c) Lawrence Livermore National Security, LLC and
-// other Smith Project Developers. See the top-level LICENSE file for
-// details.
-//
-// SPDX-License-Identifier: (BSD-3-Clause)
-
-#include "smith/numerics/trust_region_solver.hpp"
-
-#ifdef SMITH_USE_SLEPC
-
-#include <iostream>
-
-#include "smith/infrastructure/profiling.hpp"
-#include "smith/numerics/dense_petsc.hpp"
-
-namespace smith {
-
-/**
- * @brief Get the global size of a mfem vector
- * @param parallel_v Vector to check global size
- * @param comm Parallel communicator
- */
-int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm)
-{
-  int local_size = parallel_v.Size();
-  int global_size;
-  MPI_Allreduce(&local_size, &global_size, 1, MPI_INT, MPI_SUM, comm);
-  return global_size;
-}
-
-/// @brief struct which aids in moving between mfem::Vector and petsc BV
-struct BasisVectors {
-  /**
-   * @brief Construct with a representative state to set sizes
-   * @param state The state which is used to set sizes for basis vectors
-   */
-  BasisVectors(const mfem::Vector& state) : local_rows(state.Size()), global_rows(globalSize(state, PETSC_COMM_WORLD))
-  {
-    VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &v);
-
-    PetscInt iStart, iEnd;
-    VecGetOwnershipRange(v, &iStart, &iEnd);
-
-    col_indices.reserve(size_t(local_rows));
-    for (int i = iStart; i < iEnd; ++i) {
-      col_indices.push_back(i);
-    }
-  }
-
-  /**
-   * @brief Destructor
-   */
-  ~BasisVectors() { VecDestroy(&v); }
-
-  /**
-   * @brief Construct petsc BV from vector of mfem::Vector
-   * @param states The states used to construct basis vectors
-   */
-  BV constructBases(const std::vector<const mfem::Vector*>& states) const
-  {
-    size_t num_cols = states.size();
-    BV Q;
-    BVCreate(PETSC_COMM_SELF, &Q);
-    BVSetType(Q, BVVECS);
-    BVSetSizesFromVec(Q, v, static_cast<int>(num_cols));
-    for (size_t c = 0; c < num_cols; ++c) {
-      VecSetValues(v, local_rows, &col_indices[0], &(*states[c])[0], INSERT_VALUES);
-      VecAssemblyBegin(v);
-      VecAssemblyEnd(v);
-      int c_int = static_cast<int>(c);
-      BVInsertVec(Q, c_int, v);
-    }
-    return Q;
-  }
-
- private:
-  const int local_rows;
-  const int global_rows;
-
-  std::vector<int> col_indices;
-  Vec v;
-};
-
-/**
- * @brief Create a petsc vector from a mfem::Vector
- * @param state The state used to create an mfem::Vector
- */
-Vec petscVec(const mfem::Vector& state)
-{
-  const int local_rows = state.Size();
-  const int global_rows = globalSize(state, PETSC_COMM_WORLD);
-
-  Vec v;
-  VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &v);
-
-  PetscInt iStart, iEnd;
-  VecGetOwnershipRange(v, &iStart, &iEnd);
-
-  std::vector<int> col_indices;
-  col_indices.reserve(static_cast<size_t>(local_rows));
-  for (int i = iStart; i < iEnd; ++i) {
-    col_indices.push_back(i);
-  }
-
-  VecSetValues(v, local_rows, &col_indices[0], &state[0], INSERT_VALUES);
-
-  VecAssemblyBegin(v);
-  VecAssemblyEnd(v);
-
-  return v;
-}
-
-/**
- * @brief Copy a petsc vector to an mfem::Vector
- * @param v The petsc vector
- * @param s The mfem vector
- */
-void copy(const Vec& v, mfem::Vector& s)
-{
-  const int local_rows = s.Size();
-  PetscInt iStart, iEnd;
-  VecGetOwnershipRange(v, &iStart, &iEnd);
-
-  SLIC_ERROR_IF(local_rows != iEnd - iStart,
-                "Inconsistency between local t-dof vector size and petsc start and end indices");
-
-  std::vector<int> col_indices;
-  col_indices.reserve(static_cast<size_t>(local_rows));
-  for (int i = iStart; i < iEnd; ++i) {
-    col_indices.push_back(i);
-  }
-
-  VecGetValues(v, local_rows, &col_indices[0], &s[0]);
-}
-
-/**
- * @brief The reduced matrix in the space of {s}
- * @param s The vector of mfem::Vector of directions
- * @param As The vector of mfem::Vector of a global matrix A operated on directions
- */
-Mat dot(const std::vector<const mfem::Vector*>& s, const std::vector<const mfem::Vector*>& As)
-{
-  SLIC_ERROR_IF(s.size() != As.size(),
-                "Search directions and their linear operator result must have same number of columns");
-  size_t num_cols = s.size();
-  int num_cols_int = static_cast<int>(num_cols);
-  Mat sAs;
-  MatCreateSeqDense(PETSC_COMM_SELF, num_cols_int, num_cols_int, NULL, &sAs);
-  for (size_t i = 0; i < num_cols; ++i) {
-    for (size_t j = 0; j < num_cols; ++j) {
-      MatSetValue(sAs, static_cast<int>(i), static_cast<int>(j), mfem::InnerProduct(PETSC_COMM_WORLD, *s[i], *As[j]),
-                  INSERT_VALUES);
-    }
-  }
-  MatAssemblyBegin(sAs, MAT_FINAL_ASSEMBLY);
-  MatAssemblyEnd(sAs, MAT_FINAL_ASSEMBLY);
-  return sAs;
-}
-
-/**
- * @brief The reduced vector s.T*b
- * @param s The vector of mfem::vector of directions
- * @param b The right hand size vector to be reduced
- */
-Vec dot(const std::vector<const mfem::Vector*>& s, const mfem::Vector& b)
-{
-  size_t num_cols = s.size();
-  Vec sb;
-  VecCreateSeq(PETSC_COMM_SELF, static_cast<int>(num_cols), &sb);
-  for (size_t i = 0; i < num_cols; ++i) {
-    VecSetValue(sb, static_cast<int>(i), mfem::InnerProduct(PETSC_COMM_WORLD, *s[i], b), INSERT_VALUES);
-  }
-  return sb;
-}
-
-/**
- * @brief The qr decomposition of the state vectors
- * @param states The vector of mfem::vectors of directions
- * @return Pair of BV Q and DenseMat R
- */
-auto qr(const std::vector<const mfem::Vector*>& states)
-{
-  BasisVectors bvs(*states[0]);
-  BV Q = bvs.constructBases(states);
-
-  Mat R;
-  int num_cols = static_cast<int>(states.size());
-  MatCreateSeqDense(PETSC_COMM_SELF, num_cols, num_cols, NULL, &R);
-  auto error = BVOrthogonalize(Q, R);
-
-  if (error) throw PetscException("BVOrthogonalize failed.");
-
-  return std::make_pair(Q, DenseMat(R));
-}
-
-/**
- * @brief compute the quadratic energy from small dense matrices and vectors
- * @param A The stiffness matrix
- * @param b The rhs vector
- * @param x The current solution vector
- * @return The quadratic, linearized energy approximation
- */
-double quadraticEnergy(const DenseMat& A, const DenseVec& b, const DenseVec& x)
-{
-  DenseVec Ax = A * x;
-  double xAx = dot(x, Ax);
-  double xb = dot(x, b);
-  return 0.5 * xAx - xb;
-}
-
-/**
- * @brief compute the pnorm_squared
- * @param bvv input vector
- * @param sig eigenvectors
- */
-double pnorm_squared(const DenseVec& bvv, const DenseVec& sig)
-{
-  auto bvv_div_sig_squared = bvv / (sig * sig);
-  return sum(bvv_div_sig_squared);
-}
-
-/**
- * @brief compute the qnorm_squared
- * @param bvv input vector
- * @param sig eigenvectors
- */
-double qnorm_squared(const DenseVec& bvv, const DenseVec& sig)
-{
-  auto bvv_div_sig_cubed = bvv / (sig * sig * sig);
-  return sum(bvv_div_sig_cubed);
-  // return bvv.dot((1.0 / (sig * sig * sig)).matrix());
-}
-
-//  returns:
-//    minimum energy solution within delta
-//    N leftmost eigenvectors
-//    N smallest eigenvalue
-//    success status
-
-/**
- * @brief solve the trust region problem exactly using a variant of the Moore Sorensen algorithm
- * @param A matrix
- * @param b rhs
- * @param delta trust region radius
- * @param num_leftmost the number of leftmost eigenvector/values to output
- * returns the solution vector, a std::vector of leftmost vectors
- * a std::vector of leftmost eigenvalues and the energy change (relative to x=0)
- */
-auto exactTrustRegionSolve(DenseMat A, const DenseVec& b, double delta, int num_leftmost)
-{
-  // minimize 1/2 x^T A x - b^T x, s.t. norm(x) <= delta
-  auto [isize, jsize] = A.size();
-  auto isize2 = b.size();
-  SLIC_ERROR_IF(isize != jsize, "Exact trust region solver requires square matrices");
-  SLIC_ERROR_IF(isize != isize2,
-                "The right hand size for exact trust region solve must be consistent with the input matrix size");
-
-  auto [sigs, V] = eigh(A);
-  std::vector<DenseVec> leftmosts;
-  std::vector<double> minsigs;
-  size_t num_leftmost_possible(size_t(std::min(num_leftmost, isize)));
-  for (size_t i = 0; i < num_leftmost_possible; ++i) {
-    leftmosts.emplace_back(V[i]);
-    minsigs.emplace_back(sigs[i]);
-  }
-
-  const auto& leftMost = V[0];
-  double minSig = sigs[0];
-
-  // bv = V.T b, V has columns which are eigenvectors
-  DenseVec bv(isize);
-  for (size_t i = 0; i < size_t(isize); ++i) {
-    bv.setValue(i, dot(V[i], b));
-  }
-
-  DenseVec bvOverSigs = bv / sigs;
-  double sigScale = sum(abs(sigs)) / isize;
-  double eps = 1e-12 * sigScale;
-
-  // Check if solution is inside the trust region
-  if ((minSig >= eps) && (norm(bvOverSigs) <= delta)) {
-    return std::make_tuple(A.solve(b), leftmosts, minsigs, true);
-  }
-
-  // if we get here, the solution must be on the tr boundary
-  // consider bounding the initial guess, see More' Sorenson paper
-  double lam = minSig < eps ? -minSig + eps : 0.0;
-
-  // try to solve this for lam:
-  // (A + lam I)p = b, such that norm(p) = Delta
-  DenseVec sigsPlusLam = sigs + lam;
-
-  bvOverSigs = bv / sigsPlusLam;
-
-  // Check for the hard case
-  if ((minSig < eps) && (norm(bvOverSigs) < delta)) {
-    DenseVec p(isize);
-    p = 0.0;
-    for (int i = 0; i < isize; ++i) {
-      p.add(bv[i], V[size_t(i)]);
-    }
-
-    const auto& z = leftMost;
-    double pz = dot(p, z);
-    double pp = dot(p, p);
-    double ddmpp = std::max(delta * delta - pp, 0.0);
-
-    double tau1 = -pz + std::sqrt(pz * pz + ddmpp);
-    double tau2 = -pz - std::sqrt(pz * pz + ddmpp);
-
-    DenseVec x1(p);
-    DenseVec x2(p);
-    x1.add(tau1, z);
-    x2.add(tau2, z);
-
-    double e1 = quadraticEnergy(A, b, x1);
-    double e2 = quadraticEnergy(A, b, x2);
-
-    DenseVec x = e1 < e2 ? x1 : x2;
-
-    return std::make_tuple(x, leftmosts, minsigs, true);
-  }
-  DenseVec bvbv = bv * bv;
-  sigsPlusLam = sigs + lam;
-
-  double pNormSq = pnorm_squared(bvbv, sigsPlusLam);
-  double pNorm = std::sqrt(pNormSq);
-  double bError = (pNorm - delta) / delta;
-
-  // consider an out if it doesn't converge, or use a better initial guess, or bound the lam from below and above.
-  size_t iters = 0;
-  size_t maxIters = 30;
-  while ((std::abs(bError) > 1e-9) && (iters++ < maxIters)) {
-    double qNormSq = qnorm_squared(bvbv, sigsPlusLam);
-    lam += (pNormSq / qNormSq) * bError;
-    sigsPlusLam = sigs + lam;
-    pNormSq = pnorm_squared(bvbv, sigsPlusLam);
-    pNorm = std::sqrt(pNormSq);
-    bError = (pNorm - delta) / delta;
-  }
-
-  bool success = true;
-  if (iters >= maxIters) {
-    success = false;
-  }
-
-  bvOverSigs = bv / sigsPlusLam;
-
-  DenseVec x(isize);
-  x = 0.0;
-  for (int i = 0; i < isize; ++i) {
-    x.add(bvOverSigs[i], V[size_t(i)]);
-  }
-
-  double e1 = quadraticEnergy(A, b, x);
-  double e2 = quadraticEnergy(A, b, -x);
-
-  if (e2 < e1) {
-    x *= -delta / norm(x);
-  } else {
-    x *= delta / norm(x);
-  }
-
-  return std::make_tuple(x, leftmosts, minsigs, success);
-}
-
-/// @brief remove the vector at location j and return what is left
-std::vector<const mfem::Vector*> remove_at(const std::vector<const mfem::Vector*>& a, size_t j)
-{
-  std::vector<const mfem::Vector*> b;
-  for (size_t i = 0; i < a.size(); ++i) {
-    if (i != j) {
-      b.emplace_back(a[i]);
-    }
-  }
-  return b;
-}
-
-/// @brief returns the solution, as well as a list of the N leftmost eigenvectors
-/// and their eigenvalues, and the predicted model energy change
-std::tuple<mfem::Vector, std::vector<std::shared_ptr<mfem::Vector>>, std::vector<double>, double> solveSubspaceProblem(
-    const std::vector<const mfem::Vector*>& states, const std::vector<const mfem::Vector*>& Astates,
-    const mfem::Vector& b, double delta, int num_leftmost)
-{
-  SMITH_MARK_FUNCTION;
-  DenseMat sAs1 = dot(states, Astates);
-  DenseMat sAs = sym(sAs1);
-
-  if (sAs.hasNan()) {
-    throw PetscException("States in subspace solve contain NaNs.");
-    return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0);
-  }
-
-  auto [Q_parallel, R] = qr(states);
-
-  if (R.hasNan()) {
-    throw PetscException("R from qr returning with a NaN.");
-    return std::make_tuple(b, std::vector<std::shared_ptr<mfem::Vector>>{}, std::vector<double>{}, 0);
-  }
-
-  auto [rows, cols] = R.size();
-  SLIC_ERROR_IF(rows != cols, "R matrix is not square in subspace problem solve\n");
-
-  double trace_mag = 0.0;
-  for (int i = 0; i < rows; ++i) {
-    trace_mag += std::abs(R(i, i));
-  }
-
-  // remove any nearly colinear state
-  for (int i = 0; i < rows; ++i) {
-    if (R(i, i) < 1e-9 * trace_mag) {
-      // printf("removing after QR state number %d\n", i);
-      auto statesNew = remove_at(states, size_t(i));
-      auto AstatesNew = remove_at(Astates, size_t(i));
-      return solveSubspaceProblem(statesNew, AstatesNew, b, delta, num_leftmost);
-    }
-  }
-
-  auto Rinv = inverse(R);
-  DenseMat pAp = sAs.PtAP(Rinv);
-
-  Vec b_parallel = petscVec(b);
-  std::vector<double> pb_vec(states.size());
-  BVDotVec(Q_parallel, b_parallel, &pb_vec[0]);
-  DenseVec pb(pb_vec);
-
-  auto [reduced_x, leftvecs, leftvals, success] = exactTrustRegionSolve(pAp, pb, delta, num_leftmost);
-
-  double energy = quadraticEnergy(pAp, pb, reduced_x);
-
-  Vec x_parallel;
-  VecDuplicate(b_parallel, &x_parallel);
-
-  std::vector<double> reduced_x_vec = reduced_x.getValues();
-  BVMultVec(Q_parallel, 1.0, 0.0, x_parallel, &reduced_x_vec[0]);
-  mfem::Vector sol(b);
-  copy(x_parallel, sol);
-
-  std::vector<std::shared_ptr<mfem::Vector>> leftmosts;
-  for (size_t i = 0; i < leftvecs.size(); ++i) {
-    auto reduced_leftvec = leftvecs[i].getValues();
-    BVMultVec(Q_parallel, 1.0, 0.0, x_parallel, &reduced_leftvec[0]);
-    leftmosts.emplace_back(std::make_shared<mfem::Vector>(b));
-    copy(x_parallel, *leftmosts[i]);
-  }
-
-  BVDestroy(&Q_parallel);
-  VecDestroy(&b_parallel);
-  VecDestroy(&x_parallel);
-  return std::make_tuple(sol, leftmosts, leftvals, energy);
-}
-
-/// @brief Remove any obvious dependent directions, namely ones which are scaled version of previous directions
-/// The case where they are linear combinations of previous direction will be handled in the QR solver
-std::pair<std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>> removeDependentDirections(
-    std::vector<const mfem::Vector*> directions, std::vector<const mfem::Vector*> A_directions)
-{
-  SMITH_MARK_FUNCTION;
-  std::vector<double> norms;
-  size_t num_dirs = directions.size();
-
-  for (size_t i = 0; i < num_dirs; ++i) {
-    norms.push_back(std::sqrt(mfem::InnerProduct(PETSC_COMM_WORLD, *directions[i], *directions[i])));
-  }
-
-  std::vector<std::pair<const mfem::Vector*, size_t>> kepts;
-  for (size_t i = 0; i < num_dirs; ++i) {
-    bool keepi = true;
-    if (norms[i] == 0) keepi = false;
-    for (auto&& kept_and_j : kepts) {
-      size_t j = kept_and_j.second;
-      double dot_ij = mfem::InnerProduct(PETSC_COMM_WORLD, *directions[i], *kept_and_j.first);
-      if (dot_ij > 0.999 * norms[i] * norms[j]) {
-        keepi = false;
-      }
-    }
-    // if (!keepi) printf("not keeping %zu\n",i);
-    if (keepi) {
-      kepts.emplace_back(std::make_pair(directions[i], i));
-    }
-  }
-
-  std::vector<const mfem::Vector*> directions_new;
-  std::vector<const mfem::Vector*> A_directions_new;
-
-  for (auto kept_and_j : kepts) {
-    directions_new.push_back(directions[kept_and_j.second]);
-    A_directions_new.push_back(A_directions[kept_and_j.second]);
-  }
-
-  return std::make_pair(directions_new, A_directions_new);
-}
-
-}  // namespace smith
-
-#endif  // SMITH_USE_SLEPC
diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp
index ad4b390f18..efd7f27a8b 100644
--- a/src/smith/numerics/trust_region_solver.hpp
+++ b/src/smith/numerics/trust_region_solver.hpp
@@ -14,21 +14,22 @@
 
 #include "smith/smith_config.hpp"
 
-#ifdef SMITH_USE_SLEPC
-
+#include <exception>
 #include <memory>
-#include <optional>
-#include <variant>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
 
-#include "smith/physics/state/finite_element_state.hpp"
-#include "smith/physics/state/finite_element_dual.hpp"
+#include "mfem.hpp"
 
 namespace smith {
 
-class PetscException : public std::exception {
+/// Exception type for trust-region subspace solve failures.
+class TrustRegionException : public std::exception {
  public:
   /// constructor
-  PetscException(const std::string& message) : msg(message) {}
+  TrustRegionException(const std::string& message) : msg(message) {}
 
   /// what is message
   const char* what() const noexcept override { return msg.c_str(); }
@@ -38,6 +39,32 @@ class PetscException : public std::exception {
   std::string msg;
 };
 
+/// Subspace solution, leftmost eigenvectors, leftmost eigenvalues, and predicted model energy change.
+using TrustRegionSubspaceResult =
+    std::tuple<mfem::Vector, std::vector<std::shared_ptr<mfem::Vector>>, std::vector<double>, double>;
+
+/// Cached reduced trust-region subspace data reusable across trust-region radius updates.
+struct CachedTrustRegionSubspaceProblem {
+  /// zero vector with correct size/layout for empty-subspace returns
+  mfem::Vector zero_solution;
+  /// orthonormalized physical-space basis spanning reduced subspace
+  std::vector<std::shared_ptr<mfem::Vector>> basis;
+  /// reduced Hessian in cached subspace basis
+  mfem::DenseMatrix projected_hessian;
+  /// reduced right-hand side in cached subspace basis
+  mfem::Vector projected_rhs;
+  /// eigenvalues of reduced Hessian
+  mfem::Vector eigenvalues;
+  /// eigenvectors of reduced Hessian
+  mfem::DenseMatrix eigenvectors;
+  /// reduced right-hand side projected onto reduced Hessian eigenvectors
+  mfem::Vector eigen_rhs;
+  /// cached leftmost eigenvectors lifted back to physical space
+  std::vector<std::shared_ptr<mfem::Vector>> leftmosts;
+  /// eigenvalues corresponding to cached leftmost eigenvectors
+  std::vector<double> leftvals;
+};
+
 /// @brief computes the global size of mfem::Vector
 int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm);
 
@@ -46,13 +73,18 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm
 
 /// @brief returns the solution, as well as a list of the N leftmost eigenvectors
 /// and their eigenvalues, and the predicted model energy change
-std::tuple<mfem::Vector, std::vector<std::shared_ptr<mfem::Vector>>, std::vector<double>, double> solveSubspaceProblem(
-    const std::vector<const mfem::Vector*>& directions, const std::vector<const mfem::Vector*>& A_directions,
-    const mfem::Vector& b, double delta, int num_leftmost);
+TrustRegionSubspaceResult solveSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
+                                               const std::vector<const mfem::Vector*>& A_directions,
+                                               const mfem::Vector& b, double delta, int num_leftmost,
+                                               MPI_Comm comm = MPI_COMM_WORLD);
 
-std::pair<std::vector<const mfem::Vector*>, std::vector<const mfem::Vector*>> removeDependentDirections(
-    std::vector<const mfem::Vector*> directions, std::vector<const mfem::Vector*> A_directions);
+/// @brief prepares reduced trust-region subspace data reusable across trust-region radius updates
+CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector<const mfem::Vector*>& directions,
+                                                        const std::vector<const mfem::Vector*>& A_directions,
+                                                        const mfem::Vector& b, int num_leftmost,
+                                                        MPI_Comm comm = MPI_COMM_WORLD);
 
-}  // namespace smith
+/// @brief solves cached reduced trust-region problem for given trust-region radius
+TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSubspaceProblem& prepared, double delta);
 
-#endif  // SMITH_USE_SLEPC
+}  // namespace smith
diff --git a/src/smith/physics/solid_mechanics.hpp b/src/smith/physics/solid_mechanics.hpp
index f6867ee72b..f7d2706114 100644
--- a/src/smith/physics/solid_mechanics.hpp
+++ b/src/smith/physics/solid_mechanics.hpp
@@ -53,6 +53,7 @@
 #include "smith/physics/state/finite_element_vector.hpp"
 
 namespace smith {
+
 namespace solid_mechanics {
 
 namespace detail {
diff --git a/src/smith/physics/tests/CMakeLists.txt b/src/smith/physics/tests/CMakeLists.txt
index 81a9b32b86..911c3fe241 100644
--- a/src/smith/physics/tests/CMakeLists.txt
+++ b/src/smith/physics/tests/CMakeLists.txt
@@ -76,6 +76,7 @@ set(physics_parallel_test_sources
     dynamic_thermal_adjoint.cpp
     solid_reaction_adjoint.cpp
     thermal_nonlinear_solve.cpp
+    shallow_arch_buckling.cpp
     )
 set(physics_parallel_tribol_test_sources
     contact_patch.cpp
diff --git a/src/smith/physics/tests/contact_finite_diff.cpp b/src/smith/physics/tests/contact_finite_diff.cpp
index 531b7f2eaa..87f141330f 100644
--- a/src/smith/physics/tests/contact_finite_diff.cpp
+++ b/src/smith/physics/tests/contact_finite_diff.cpp
@@ -162,9 +162,6 @@ TEST_P(ContactFiniteDiff, patch)
     merged_sol.SetVector(u, 0);
     merged_sol.SetVector(pressure, u.Size());
     mfem::Vector f(merged_sol.Size());
-    f = 0.0;
-    oper->Mult(merged_sol, f);
-    auto* J_op = &oper->GetGradient(merged_sol);
     mfem::Vector u_dot(merged_sol.Size());
     u_dot = 0.0;
     // wiggle displacement (col = j)
@@ -174,6 +171,9 @@ TEST_P(ContactFiniteDiff, patch)
         ++dof_ct;
         continue;
       }
+      f = 0.0;
+      oper->Mult(merged_sol, f);
+      auto* J_op = &oper->GetGradient(merged_sol);
       u_dot[j] = 1.0;
       mfem::Vector J_exact(merged_sol.Size());
       J_exact = 0.0;
@@ -206,6 +206,10 @@ TEST_P(ContactFiniteDiff, patch)
     }
     std::cout << "Max diff = " << std::setprecision(15) << max_diff << std::endl;
 
+    // Restore the contact state after the finite-difference probes before advancing the timestep.
+    f = 0.0;
+    oper->Mult(merged_sol, f);
+
     solid_solver.advanceTimestep(dt);
   }
 }
diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp
new file mode 100644
index 0000000000..258332731f
--- /dev/null
+++ b/src/smith/physics/tests/shallow_arch_buckling.cpp
@@ -0,0 +1,171 @@
+// Copyright (c) Lawrence Livermore National Security, LLC and
+// other Smith Project Developers. See the top-level LICENSE file for
+// details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+
+#include <format>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "mpi.h"
+#include "mfem.hpp"
+
+#include "smith/infrastructure/application_manager.hpp"
+#include "smith/infrastructure/logger.hpp"
+#include "smith/numerics/functional/domain.hpp"
+#include "smith/numerics/functional/tensor.hpp"
+#include "smith/numerics/solver_config.hpp"
+#include "smith/physics/materials/solid_material.hpp"
+#include "smith/physics/mesh.hpp"
+#include "smith/physics/solid_mechanics.hpp"
+#include "smith/physics/state/state_manager.hpp"
+
+namespace smith {
+namespace {
+
+constexpr double length = 10.0;
+constexpr double thickness = 0.025;
+constexpr double end_tol = 1.0e-8;
+constexpr double top_tol = 1.0e-8;
+std::string solver_name = "TrustRegion";
+int print_level = 2;
+int nonlinear_max_iterations = 300000;
+int trust_subspace_option = static_cast<int>(SubSpaceOptions::NEVER);
+int trust_num_leftmost = 1;
+
+NonlinearSolver selectedNonlinearSolver()
+{
+  if (solver_name == "NewtonLineSearch") {
+    return NonlinearSolver::NewtonLineSearch;
+  }
+  if (solver_name == "TrustRegion") {
+    return NonlinearSolver::TrustRegion;
+  }
+
+  throw std::runtime_error("Unknown --solver value '" + solver_name + "'. Use NewtonLineSearch or TrustRegion.");
+}
+
+void parseCommandLine(int& argc, char** argv)
+{
+  int write_arg = 1;
+  for (int read_arg = 1; read_arg < argc; ++read_arg) {
+    const std::string arg = argv[read_arg];
+    if (arg.rfind("--solver=", 0) == 0) {
+      solver_name = arg.substr(std::string("--solver=").size());
+    } else if (arg.rfind("--print-level=", 0) == 0) {
+      print_level = std::stoi(arg.substr(std::string("--print-level=").size()));
+    } else if (arg.rfind("--nonlinear-max-iterations=", 0) == 0) {
+      nonlinear_max_iterations = std::stoi(arg.substr(std::string("--nonlinear-max-iterations=").size()));
+    } else if (arg.rfind("--trust-subspace-option=", 0) == 0) {
+      trust_subspace_option = std::stoi(arg.substr(std::string("--trust-subspace-option=").size()));
+    } else if (arg.rfind("--trust-num-leftmost=", 0) == 0) {
+      trust_num_leftmost = std::stoi(arg.substr(std::string("--trust-num-leftmost=").size()));
+    } else {
+      argv[write_arg] = argv[read_arg];
+      ++write_arg;
+    }
+  }
+  argc = write_arg;
+}
+
+}  // namespace
+
+TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough)
+{
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  constexpr int p = 1;
+  constexpr int dim = 2;
+  constexpr int nx = 120;
+  constexpr int ny = 5;
+
+  axom::sidre::DataStore datastore;
+  smith::StateManager::initialize(datastore, "shallow_arch_buckling");
+
+  auto mesh = std::make_shared<smith::Mesh>(
+      mfem::Mesh::MakeCartesian2D(nx, ny, mfem::Element::QUADRILATERAL, true, length, thickness),
+      "compressed_beam_mesh", 0, 0);
+
+  mesh->addDomainOfBoundaryElements("left_end",
+                                    [](std::vector<vec2> vertices, int) { return average(vertices)[0] < end_tol; });
+  mesh->addDomainOfBoundaryElements(
+      "right_end", [](std::vector<vec2> vertices, int) { return average(vertices)[0] > length - end_tol; });
+  mesh->addDomainOfBoundaryElements(
+      "top_face", [](std::vector<vec2> vertices, int) { return average(vertices)[1] > thickness - top_tol; });
+  auto globalElementCount = [](int local_count) {
+    int global_count = 0;
+    MPI_Allreduce(&local_count, &global_count, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+    return global_count;
+  };
+  EXPECT_GT(globalElementCount(mesh->domain("left_end").total_elements()), 0);
+  EXPECT_GT(globalElementCount(mesh->domain("right_end").total_elements()), 0);
+  EXPECT_GT(globalElementCount(mesh->domain("top_face").total_elements()), 0);
+
+  smith::LinearSolverOptions linear_options{.linear_solver = LinearSolver::CG,
+                                            .preconditioner = Preconditioner::HypreJacobi,
+                                            .relative_tol = 1.0e-8,
+                                            .absolute_tol = 1.0e-14,
+                                            .max_iterations = 100000,
+                                            .print_level = 0};
+
+  smith::NonlinearSolverOptions nonlinear_options{
+      .nonlin_solver = selectedNonlinearSolver(),
+      .relative_tol = 1.0e-8,
+      .absolute_tol = 1.0e-10,
+      .max_iterations = nonlinear_max_iterations,
+      .print_level = print_level,
+      .subspace_option = static_cast<SubSpaceOptions>(trust_subspace_option),
+      .num_leftmost = trust_num_leftmost};
+
+  SolidMechanics<p, dim> solid(nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options,
+                               "compressed_beam", mesh);
+
+  solid_mechanics::NeoHookean mat{.density = 1.0, .K = 100.0, .G = 10.0};
+  solid.setMaterial(mat, mesh->entireBody());
+  solid.setFixedBCs(mesh->domain("left_end"));
+
+  constexpr double final_compression = 0.2;
+  constexpr double seed_down_traction = 1.0e-5;
+  constexpr double final_snap_up_traction = 0.02;
+  solid.setDisplacementBCs([](auto, double t) { return vec2{{-final_compression * t, 0.0}}; },
+                           mesh->domain("right_end"), Component::X);
+  solid.setFixedBCs(mesh->domain("right_end"), Component::Y);
+  solid.setTraction(
+      [](auto, auto, double t) {
+        if (t < 0.5) {
+          return vec2{{0.0, -seed_down_traction * (t / 0.5)}};
+        }
+        const double snap_ramp = (t - 0.5) / 0.5;
+        return vec2{{0.0, -seed_down_traction * (1.0 - snap_ramp) + final_snap_up_traction * snap_ramp}};
+      },
+      mesh->domain("top_face"));
+
+  solid.completeSetup();
+  solid.outputStateToDisk("shallow_arch_buckling");
+
+  SLIC_INFO_ROOT(
+      std::format("Compressed thin beam snap-through run: solver = {}, trust_subspace_option = {}, "
+                  "trust_num_leftmost = {}",
+                  solver_name, trust_subspace_option, trust_num_leftmost));
+
+  constexpr int num_steps = 5;
+  for (int step = 0; step < num_steps; ++step) {
+    solid.advanceTimestep(1.0 / num_steps);
+    SLIC_INFO_ROOT(std::format("Load step {}/{}", step + 1, num_steps));
+    solid.outputStateToDisk("shallow_arch_buckling");
+  }
+}
+
+}  // namespace smith
+
+int main(int argc, char* argv[])
+{
+  smith::parseCommandLine(argc, argv);
+  ::testing::InitGoogleTest(&argc, argv);
+  smith::ApplicationManager applicationManager(argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/smith/physics/weak_form.hpp b/src/smith/physics/weak_form.hpp
index 2e766de4b3..8bd7e48e8d 100644
--- a/src/smith/physics/weak_form.hpp
+++ b/src/smith/physics/weak_form.hpp
@@ -12,6 +12,7 @@
 
 #pragma once
 
+#include <cstddef>
 #include <vector>
 #include <string>
 #include <memory>